提取HTML代码中文字的C#函数|神德数码|网站空间|网络空间|域名注册|网站推广|虚拟主机|网通空间|电信空间|ftp空间|wap空间|asp空间|aspx空间|.net空间|php空间|数据库空间

提取HTML代码中文字的C#函数
发布时间：2006-10-14 3:53:12		收集提供：gaoqian


/// <summary> /// 去除HTML标记 /// </summary> /// <param name="strHtml">包括HTML的源码 </param> /// <returns>已经去除后的文字</returns> public static string StripHTML(string strHtml) { string [] aryReg ={ @"<script[^>]?>.?</script>", @"<(\/\s)?!?((\w+:)?\w+)(\w+(\s=?\s(([""'])(\\[""'tbnr]\|[^\7])?\7\|\w+)\|.{0})\|\s)?(\/\s)?>", @"([\r\n])[\s]+", @"&(quot\|#34);", @"&(amp\|#38);", @"&(lt\|#60);", @"&(gt\|#62);", @"&(nbsp\|#160);", @"&(iexcl\|#161);", @"&(cent\|#162);", @"&(pound\|#163);", @"&(copy\|#169);", @"&#(\d+);", @"-->", @"<!--.*\n" }; string [] aryRep = { "", "", "", "\"", "&", "<", ">", " ", "\xa1",//chr(161), "\xa2",//chr(162), "\xa3",//chr(163), "\xa9",//chr(169), "", "\r\n", "" }; string newReg =aryReg[0]; string strOutput=strHtml; for(int i = 0;i<aryReg.Length;i++) { Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase ); strOutput = regex.Replace(strOutput,aryRep[i]); } strOutput.Replace("<",""); strOutput.Replace(">",""); strOutput.Replace("\r\n",""); return strOutput; }

\|网站空间\|网络空间\|域名注册\|网站推广\|虚拟主机\|网通空间\|电信空间\|ftp空间\|wap空间\| \|asp空间\|aspx空间\|.net空间\|php空间\|数据库空间\|html5空间\|