利用正则表达式去除网页采集时的干扰码 public string Cleardistrub(string html) {
string p = @"<span style=""display:none"">(?<posttitle>.*?)</span>";
return Regex.Replace(html , p, "", RegexOptions.IgnoreCase);
} 只取得中间字符串
输入:public <%=classname%>Extension : IExt 目的:匹配 <%= %>中间的classname并替换 表达式:<%=.*%> 代码: private string Replace(string input){ return Regex.Replace(input, @"<%=.*%>", new MatchEvaluator(RefineCodeTag), RegexOptions.Singleline);}
string RefineCodeTag(Match m){ string x = m.ToString();
x = Regex.Replace(x, "<%=", ""); x = Regex.Replace(x, "%>", "");
return x.Trim() + ",";}
|