c#下载网页html源码的多种方式(httprequest与webclient方式)

发布时间:2019-09-29编辑:脚本学堂
c#下载网页源码的多个方法,c#实现下载网页html源码,包括HttpRequest方式下载网页源码与WebClient方式下载网页源码,c#下载网页源码及获取http状态码的实现代码。

c#如何下载网页源码?

这里分享三种实现方法,供大家学习参考。

一、c#下载网页html源码的实现代码。
 

复制代码 代码示例:
public static class DownLoad_HTML
{
private static int FailCount = 0; //记录下载失败的次数
 
public static string GetHtml(string url) //传入要下载的网址
{
string str = string.Empty;
try
{
System.Net.WebRequest request = System.Net.WebRequest.Create(url);
request.Timeout = 10000; //下载超时时间
request.Headers.Set("Pragma", "no-cache");
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream streamReceive = response.GetResponseStream();
Encoding encoding = Encoding.GetEncoding("gb2312");//utf-8 网页文字编码
System.IO.StreamReader streamReader = new System.IO.StreamReader(streamReceive, encoding);
str = streamReader.ReadToEnd();
streamReader.Close();
}
catch (Exception ex)
{
FailCount++;
 
if (FailCount > 5)
{
var result = System.Windows.Forms.MessageBox.Show("已下载失败" + FailCount + "次,是否要继续尝试?" + Environment.NewLine + ex.ToString(), "数据下载异常", System.Windows.Forms.MessageBoxButtons.YesNo, System.Windows.Forms.MessageBoxIcon.Error);
if (result == System.Windows.Forms.DialogResult.Yes)
{
str = GetHtml(url);
}
else
{
System.Windows.Forms.MessageBox.Show("下载HTML失败" + Environment.NewLine + ex.ToString(), "下载HTML失败", System.Windows.Forms.MessageBoxButtons.OK, System.Windows.Forms.MessageBoxIcon.Error);
throw ex;
}
}
else
{
str = GetHtml(url);
}
}
 
FailCount = 0; //执行到此步,表示下载成功
return str;
}

二、c#下载网页源码及获取http状态码

c#下载网页源码 获取http状态码

复制代码 代码示例:

HttpWebRequest hwr = (HttpWebRequest) WebRequest.Create("http://www.dgjs123.com");
hwr.AllowAutoRedirect = false; //不允许重定向

hwr.Timeout = 10000; //连接超时时间设置
hwr.Method = "GET"; //协议:GET、HEAD、POST、PUT、DELETE、TRACE 或OPTIONS。

try
{
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse();
MessageBox.Show(((int)hwrs.StatusCode).ToString()); //获得http状态码 如:200但是404却捕捉不到

Stream stream=hwrs.GetResponseStream();
MessageBox.Show(hwrs.CharacterSet); //获取返回结果的字符编码
StreamReader sr = new StreamReader(stream,Encoding.GetEncoding(hwrs.CharacterSet)); //注意读取的文字编码格式要和写入文件的文字编码格式相同
StreamWriter sw = new StreamWriter("c:b.html",false,Encoding.GetEncoding(hwrs.CharacterSet)); //写入文字的编码格式和读取时候的编码格式一样

sw.Write(sr.ReadToEnd());

sw.Flush();
sw.Close();
sr.Close();
}
catch(Exception ex)
{
MessageBox.Show(ex.ToString());
}

三、c#下载网页源码的二种方法

1、HttpRequest方式下载网页源码。
 

复制代码 代码示例:

static class WebFunc
{
private static CookieContainer cookie = new CookieContainer();
private static string contentType = "application/x-www-form-urlencoded";
private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
 
/// <summary> 
///  
/// </summary> 
/// <param name="url">网页地址</param> 
/// <param name="encoding">编码方式</param> 
/// <returns></returns> 
public static string GetHtmlEx(string url, Encoding encoding)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = userAgent;
request.ContentType = contentType;
request.CookieContainer = cookie;
request.Accept = accept;
request.Method = "get";

WebResponse response = request.GetResponse();
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream, encoding);
String html = reader.ReadToEnd();
response.Close();

return html;
}
}
 

2、WebClient方式下载网页源码
 

复制代码 代码示例:
System.Net.WebClient wc = new System.Net.WebClient();
Byte[] pageData = wc.DownloadData("网页地址");
string s= System.Text.Encoding.Default.GetString(pageData);  
//s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码