再介绍一种用 dreamweaver 清除HTML样式的方法。
有时候制作网页,直接从网上复制的网页内容,或从WORD、PS等软件生成的HTML都有许多不必要的CSS代码,影响加载速度,想彻底清除,用dreamweaver再加工,但找不到类似的功能,可以用以下方法:
使用dreamweaver的搜索替换功能,利用正则表达式,可以轻松清除许多垃圾代码;
如清除内联样式:只需要搜索: style="._"
,替换全部即可。
清除其它的代码也一样,如 alt="……"
之类
搜索: alt=".*"
,替换全部即可。
这是一个删除HTML样式、脚本,你可以看下
public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\\r\\n])[\\s]+", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\\"",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\\xa1",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\\xa2",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\\xa3",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\\xa9",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(\\d+);", "",
RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\\r\\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
下面是快速去除样式、获得纯净的text文本的代码。
这种去除样式的代码超级简单,直接拿过来用就可以,方便快捷。
代码如下(此代码可以随意放到该页面的任意位置):
<%
Public Function TextOnly(ByVal sOut)
Dim re:Set re=New RegExp
re.IgnoreCase=True
re.Global=True
re.Pattern="<script.*>.*</script>"
sOut=re.Replace(sOut,"")
re.Pattern="<style.*>.*</style>"
sOut=re.Replace(sOut,"")
re.Pattern="<object.*>.*</object>"
sOut=re.Replace(sOut,"")
re.Pattern="(<(.[^>]*)>)"
sOut=re.Replace(sOut,"")
Set re=Nothing
TextOnly=sOut
End Function
%>
用法:TextOnly(从数据库提出来的信息内容)
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @" <script(\\s[^>]*?)?>[\\s\\S]*? </script>", "", RegexOptions.IgnoreCase);
//删除样式
Htmlstring = Regex.Replace(Htmlstring, @" <style>[\\s\\S]*? </style>", "", RegexOptions.IgnoreCase);
//删除html标签
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^>]*)>", "", RegexOptions.IgnoreCase);
public string checkStr(string html)
{
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\\s\\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\\s\\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\\s\\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\\s\\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\\s\\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\\<img[^\\>]+\\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
html = regex1.Replace(html, "");
html = regex2.Replace(html, "");
html = regex3.Replace(html, " _disibledevent=");
html = regex4.Replace(html, "");
html = regex5.Replace(html, "");
html = regex6.Replace(html, "");
html = regex7.Replace(html, "");
html = regex8.Replace(html, "");
html = regex9.Replace(html, "");
html = html.Replace(" ", "");
html = html.Replace("</strong>", "");
html = html.Replace("<strong>", "");
return html;
}
下载App
关注公众号
Copyright©2023 w3cschool编程狮|闽ICP备15016281号-3|闽公网安备35020302033924号
违法和不良信息举报电话:173-0602-2364|举报邮箱:[email protected]
联系方式: