protected void Page_Load(object sender, EventArgs e)
{
using(StreamWriter sw = new StreamWriter(Request.PhysicalApplicationPath+"index.html",false,System.Text.Encoding.GetEncoding("gb2312")))
{
sw.Write(getHtml(" }
}
private string getHtml(string url, string charSet)//url是要訪問的網(wǎng)站地址,charSet是目標(biāo)網(wǎng)頁的編碼,如果傳入的是null或者"",那就自動分析網(wǎng)頁的編碼
{
WebClient myWebClient = new WebClient(); //創(chuàng)建WebClient實(shí)例myWebClient
// 需要注意的:
//有的網(wǎng)頁可能下不下來,有種種原因比如需要cookie,編碼問題等等
//這是就要具體問題具體分析比如在頭部加入cookie
// webclient.Headers.Add("Cookie", cookie);
//這樣可能需要一些重載方法。根據(jù)需要寫就可以了
//獲取或設(shè)置用于對向 Internet 資源的請求進(jìn)行身份驗(yàn)證的網(wǎng)絡(luò)憑據(jù)。
myWebClient.Credentials = CredentialCache.DefaultCredentials;
//如果服務(wù)器要驗(yàn)證用戶名,密碼
//NetworkCredential mycred = new NetworkCredential(struser, strpassword);
//myWebClient.Credentials = mycred;
//從資源下載數(shù)據(jù)并返回字節(jié)數(shù)組。(加@是因?yàn)榫W(wǎng)址中間有"/"符號)
byte[] myDataBuffer = myWebClient.DownloadData(url);
string strWebData = Encoding.Default.GetString(myDataBuffer);
//獲取網(wǎng)頁字符編碼描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string webCharSet = charSetMatch.Groups[2].Value;
if (charSet == null || charSet == "")
charSet = webCharSet;
if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
return strWebData;
}
posted on 2010-08-22 15:03
sanmao 閱讀(87)
評論(0) 編輯 收藏