C#读取HTML文件内容写入记事本的代码

发布时间:2019-07-18编辑:脚本学堂
C#读取Html文件内容并写入记事本的代码一例,供大家学习参考。

代码如下:

try
 {
     int totalFile = 0;
     //string dirPath = @"E:filesBSC6810 alarm";
     if (this.textBox1.Text.Trim() == "")
     {
         MessageBox.Show("请输入HTML文件路径!");
     }
     else
     {
         string dirPath = this.textBox1.Text.Trim();
         if (!dirPath.Substring(dirPath.Length - 1).Contains(""))
         {
  dirPath = dirPath+"";
         }
         StreamWriter sw;
         DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
         FileInfo[] files = dirInfo.GetFiles();
         string filename = dirPath + "告警经验库信息.txt";
         if (File.Exists(filename))
         {
  sw = File.AppendText(filename);
         }
         else
         {
  sw = File.CreateText(filename);
         }
 
         foreach (FileInfo fileinfo in files)
         {
  if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件
  {
      totalFile = totalFile + 1;
      WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
      WebResponse myWebResponse = myWebRequest.GetResponse();
      Stream myStream = myWebResponse.GetResponseStream();
      Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
      StreamReader myStreamReader = new StreamReader(myStream, encode);
      string strhtml = myStreamReader.ReadToEnd();
      myWebResponse.Close();
      string stroutput = strhtml;
      Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML标记的正则表达式
      string tmpStr = "<h4>([^<]*)</h4>";        //获取<h4>之间内容的表达式
      Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
      string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>标记
      causename = Regex.Replace(causename, "[n|r|t]", " ");//去掉换行和TAB键符号
      causename = causename.Trim();
      string cause = causename.Substring(4, causename.Length - 9);//得到告警原因
      string titleStr = "<title>([^<]*)</title>";
      TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
      string titlename = TitleMatch.Value.ToString();
      titlename = Regex.Replace(titlename, "[n|r|t]", "");//去掉换行和TAB键符号
      titlename = titlename.Trim();
      string regexStr = "<ul><li>(?<key>.*?)</ul>";//获取<ul><li>后边的内容,直到</ul>结尾
      Regex r = new Regex(regexStr, RegexOptions.None);
      strhtml = Regex.Replace(strhtml, "[n|r|t]", "");//去掉换行和TAB键符号
      Match mc = r.Match(strhtml);
      string dataStr = mc.Groups["key"].Value;
      dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之间的源码
      strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除<ul></ul>部分源码
      strhtml = strhtml.Replace(titlename, "");//去掉<title></title>
      strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记
      strhtml = strhtml.Replace("&nbsp;", "");//去掉空格字符
      string[] arr = cause.Split(' ');
      string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因
      sw.WriteLine("第" + totalFile + "个文件:" + fileinfo.Name);
      sw.WriteLine("-----告警原因------:");
      //sw.WriteLine(cause);//  ALM-1 网元启动
      zhCause=this.chinaString(zhCause);
      sw.WriteLine(zhCause);//网元启动
      sw.WriteLine("-----处理经验------:");
      sw.WriteLine(strhtml);
      sw.WriteLine();
      sw.Flush();
  }
}
   //http://www.jb200.com
   sw.Close();
   MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
     }
 }
 catch (Exception ee)
 {
     MessageBox.Show("操作失败:" + ee.Message);
 }