代码如下:
try { int totalFile = 0; //string dirPath = @"E:filesBSC6810 alarm"; if (this.textBox1.Text.Trim() == "") { MessageBox.Show("请输入HTML文件路径!"); } else { string dirPath = this.textBox1.Text.Trim(); if (!dirPath.Substring(dirPath.Length - 1).Contains("")) { dirPath = dirPath+""; } StreamWriter sw; DirectoryInfo dirInfo = new DirectoryInfo(dirPath); FileInfo[] files = dirInfo.GetFiles(); string filename = dirPath + "告警经验库信息.txt"; if (File.Exists(filename)) { sw = File.AppendText(filename); } else { sw = File.CreateText(filename); } foreach (FileInfo fileinfo in files) { if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件 { totalFile = totalFile + 1; WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name); WebResponse myWebResponse = myWebRequest.GetResponse(); Stream myStream = myWebResponse.GetResponseStream(); Encoding encode = System.Text.Encoding.GetEncoding("gb2312"); StreamReader myStreamReader = new StreamReader(myStream, encode); string strhtml = myStreamReader.ReadToEnd(); myWebResponse.Close(); string stroutput = strhtml; Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML标记的正则表达式 string tmpStr = "<h4>([^<]*)</h4>"; //获取<h4>之间内容的表达式 Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline); string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>标记 causename = Regex.Replace(causename, "[n|r|t]", " ");//去掉换行和TAB键符号 causename = causename.Trim(); string cause = causename.Substring(4, causename.Length - 9);//得到告警原因 string titleStr = "<title>([^<]*)</title>"; TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline); string titlename = TitleMatch.Value.ToString(); titlename = Regex.Replace(titlename, "[n|r|t]", "");//去掉换行和TAB键符号 titlename = titlename.Trim(); string regexStr = "<ul><li>(?<key>.*?)</ul>";//获取<ul><li>后边的内容,直到</ul>结尾 Regex r = new Regex(regexStr, RegexOptions.None); strhtml = Regex.Replace(strhtml, "[n|r|t]", "");//去掉换行和TAB键符号 Match mc = r.Match(strhtml); string dataStr = mc.Groups["key"].Value; dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之间的源码 strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除<ul></ul>部分源码 strhtml = strhtml.Replace(titlename, "");//去掉<title></title> strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记 strhtml = strhtml.Replace(" ", "");//去掉空格字符 string[] arr = cause.Split(' '); string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因 sw.WriteLine("第" + totalFile + "个文件:" + fileinfo.Name); sw.WriteLine("-----告警原因------:"); //sw.WriteLine(cause);// ALM-1 网元启动 zhCause=this.chinaString(zhCause); sw.WriteLine(zhCause);//网元启动 sw.WriteLine("-----处理经验------:"); sw.WriteLine(strhtml); sw.WriteLine(); sw.Flush(); } } //http://www.jb200.com sw.Close(); MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information); } } catch (Exception ee) { MessageBox.Show("操作失败:" + ee.Message); }