using System; using System.Net; using System.Text; using System.Text.RegularExpressions;
class Program { // 获取网页的HTML内容,根据网页的charset自动判断Encoding static string GetHtml(string url) { return GetHtml(url, null); }
// 获取网页的HTML内容,指定Encoding static string GetHtml(string url, Encoding encoding) { byte[] buf = new WebClient().DownloadData(url); if (encoding != null) return encoding.GetString(buf); string html = Encoding.UTF8.GetString(buf); encoding = GetEncoding(html); if (encoding == null || encoding == Encoding.UTF8) return html; return encoding.GetString(buf); }
// 根据网页的HTML内容提取网页的Encoding static Encoding GetEncoding(string html) { string pattern = @"(?i)bcharset=(?<charset>[-a-zA-Z_0-9]+)"; string charset = Regex.Match(html, pattern).Groups["charset"].Value; try { return Encoding.GetEncoding(charset); } catch (ArgumentException) { return null; } }
// 程序入口 static void Main() { Console.WriteLine(GetHtml(//www.jb51.net));
Console.Read(); } }
(编辑:焦作站长网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|