C#源代码-查询alexa全球,中国排名,google pr,google收录和百度收录及快照

2012-7-16更新
1)更新百度html代码获取的编码,从gb2312变更为utf-8

2012-4-24更新
1)修改Alexa API正则
2)修改百度收录查询代码,去掉协议部分,要不site带协议的url地址百度无法查询

2011-12-7更新:
1)更新了google pagerank 查询地址
2)更新google收录正则表达式
3)更新alexa国内排名正则表达式

  发一个C#源代码,可以查询alexa全球,中国排名,google pr,google收录和百度收录及快照,方便友链时查询这个网站是否被google或者百度惩罚过。

示例效果查看alexa排名,google page rank,google收录,百度收录和百度快照
源代码下载:ASP.NET百度权重,alexa排名,google page rank, google收录,百度收录和百度快照查询源代码

源代码如下

using System;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace BLL
{
    /// <summary>
    /// 下载信息url的html内容的类
    /// </summary>
    public class Net
    {
        /// <summary>
        /// 创建URI
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static Uri CreatUri(string u)
        {
            if (string.IsNullOrEmpty(u)) return null;
            else
            {
                u = u.Trim('/', ' ').ToLower();
                if (!u.StartsWith("http://") && !u.StartsWith("https://")) u = "http://" + u;
                try { return new Uri(u); }
                catch { return null; }
            }
        }
        /// <summary>
        /// 使用webclient
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string GetHtml(string url)
        {
            string html = null;
            WebClient wc = new WebClient();
            try { html = wc.DownloadString(url); }
            catch { }
            wc.Dispose();
            return html;
        }
        /// <summary>
        /// 使用HttpWebRequest对象
        /// </summary>
        /// <param name="url"></param>
        /// <param name="encoding">编码</param>
        /// <returns></returns>
        public static string GetHtml(string url,Encoding encoding)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
            string html = null;
            try
            {
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                StreamReader srd = new StreamReader(response.GetResponseStream(), encoding);
                html = srd.ReadToEnd();
                srd.Close();
                response.Close();
            }
            catch { }
            return html;
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="contenttype"></param>
        /// <returns></returns>
        private static Encoding GetEncoding(string contenttype)
        {
            if (!string.IsNullOrEmpty(contenttype))
            {
                contenttype = contenttype.ToLower();
                if (contenttype.IndexOf("gb2312") != -1 || contenttype.IndexOf("gbk") != -1) return Encoding.GetEncoding(936);
                if (contenttype.IndexOf("big5") != -1) return Encoding.GetEncoding(950);
            }
            return Encoding.UTF8;
        }
        /// <summary>
        /// 使用HttpWebRequest对象,自动识别字符集
        /// </summary>
        /// <param name="url"></param>
        /// <param name="addUseragent">是否添加UserAgent</param>
        /// <returns></returns>
        public static string GetHtml(string url, bool addUseragent)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
            request.UserAgent = "Googlebot|Feedfetcher-Google|Baiduspider";
            string html = null;
            try
            {
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                StreamReader srd = new StreamReader(response.GetResponseStream(), GetEncoding(response.ContentType));
                html = srd.ReadToEnd();
                srd.Close();
                response.Close();
            }
            catch { }
            return html;
        }
    }
    /// <summary>
    /// 获取google pageran的类
    /// </summary>
    public class GooglePR
    {
        private static string tryMore(string strUrl)
        {
            //string sURL = "http://toolbarqueries.google.com/search?client=navclient-auto&ch=" + GetPR(strUrl) + "&ie=UTF-8&oe=UTF-8&features=Rank&q=info:" + System.Web.HttpUtility.UrlEncode(strUrl);
            string sURL = "http://toolbarqueries.google.com/tbr?client=navclient-auto&features=Rank&ch=" + GetPR(strUrl) + "&q=info:" + System.Web.HttpUtility.UrlEncode(strUrl);
            string result = Net.GetHtml(sURL);
            if (result != null)
            {
                result = result.Trim();
                if (result.IndexOf(':') > 0)
                {
                    string[] pr = result.Split(':');
                    if (pr.Length == 3) return pr[2].ToString();
                }
            }
            return "0";
        }
        public static string GetPageRank(string strUrl)
        {
            string pr = "0";
            if (!string.IsNullOrEmpty(strUrl))
            {
                Uri u = Net.CreatUri(strUrl);
                if (u != null)
                {
                    string host = u.Host, path = u.PathAndQuery;
                    if (path == "/")
                    {
                        //查询主域名,非路径.其他2级域名只进行一次查询.
                        //如果是顶级域名或者www2级域名,得到的值为0时,查询www2级域名或者顶级域名,然后再试http://
                        string[] arr = host.Split('.');
                        if (arr[0] == "www" || arr.Length == 2)
                        {
                            pr = tryMore(host);
                            if (pr == "0")
                            {
                                pr = tryMore(arr.Length == 2 ? "www." + host : host.Replace("www.", ""));
                                if (pr == "0")
                                {
                                    pr = tryMore("http://" + host);
                                    if (pr == "0") pr = tryMore("http://" + (arr.Length == 2 ? "www." + host : host.Replace("www.", "")));
                                }
                            }
                        }
                        else
                        {
                            pr = tryMore(u.Host);
                            if (pr == "0") pr = tryMore("http://" + u.Host);
                        }
                    }
                    else//查询路径
                    {
                        pr = tryMore(u.Host + path);
                        if (pr == "0") pr = tryMore("http://" + u.Host + path);
                    }
                }
            }
            return pr;
        }
        private static string GetPR(string url)
        {
            url = "info:" + url;
            string ch = GoogleCH(str_asc(url)).ToString();
            ch = "6" + ch;
            return ch;
        }
        private static int[] str_asc(string str)
        {
            if (str == null || str == string.Empty) return null;
            int[] result = new int[str.Length];
            for (int i = 0; i < str.Length; i++) result[i] = (int)str[i];
            return result;
        }
        private static long yiweitwo(long a, long b)
        {
            long z = 0x80000000;
            if ((z & a) != 0)
            {
                a = (a >> 1);
                a &= (~z);
                a |= 0x40000000;
                a = ((int)a >> (int)(b - 1));
            }
            else
            {
                a = ((int)a >> (int)b);
            }
            return a;
        }
        private static int[] yiwei(long a, long b, long c)
        {
            a -= b; a -= c; a ^= (yiweitwo(c, 13));
            b -= c; b -= a; b ^= (a << 8);
            c -= a; c -= b; c ^= (yiweitwo(b, 13));
            a -= b; a -= c; a ^= (yiweitwo(c, 12));
            b -= c; b -= a; b ^= (a << 16);
            c -= a; c -= b; c ^= (yiweitwo(b, 5));
            a -= b; a -= c; a ^= (yiweitwo(c, 3));
            b -= c; b -= a; b ^= (a << 10);
            c -= a; c -= b; c ^= (yiweitwo(b, 15));
            return new int[] { (int)a, (int)b, (int)c };

        }
        private static int GoogleCH(int[] url)
        {
            int length = url.Length;
            long a = 0x9E3779B9;
            long b = 0x9E3779B9;
            long c = 0xE6359A60;
            int k = 0;
            int len = length;
            int[] mid;
            while (len >= 12)
            {
                a += (url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24));
                b += (url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24));
                c += (url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24));
                mid = yiwei(a, b, c);
                a = mid[0]; b = mid[1]; c = mid[2];
                k += 12;
                len -= 12;
            }
            c += length;
            switch (len)
            {
                case 11:
                    {
                        c += (url[k + 10] << 24);
                        c += (url[k + 9] << 16);
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }
                case 10:
                    {
                        c += (url[k + 9] << 16);
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 9:
                    {
                        c += (url[k + 8] << 8);
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 8:
                    {
                        b += (url[k + 7] << 24);
                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 7:
                    {

                        b += (url[k + 6] << 16);
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 6:
                    {
                        b += (url[k + 5] << 8);
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 5:
                    {
                        b += (url[k + 4]);
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 4:
                    {
                        a += (url[k + 3] << 24);
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;

                    }

                case 3:
                    {
                        a += (url[k + 2] << 16);
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }
                case 2:
                    {
                        a += (url[k + 1] << 8);
                        a += (url[k + 0]);
                        break;
                    }

                case 1:
                    {
                        a += (url[k + 0]);
                        break;
                    }

            }
            mid = yiwei(a, b, c);
            return mid[2];
        }
    }
    /// <summary>
    /// 获取Alexa排名
    /// </summary>
    public class Alexa
    {
        /// <summary>
        /// 通过API接口获取数据,只能获取全球排名
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string API(string u)
        {
            string rank = "NaN";
            try
            {
                string xmlString = BLL.Net.GetHtml("http://data.alexa.com/data/+wQ411en8000lA?cli=10&dat=snba&ver=7.0&cdt=alx_vw=20&"
                    + "wid=12206&act=00000000000&ss=1680x1050&bw=964&t=0&ttl=35371&vis=1&rq=4&url=" + u);
                Match m = Regex.Match(xmlString, "<POPULARITY URL=\"[^\"]+\" TEXT=\"(\\d+)\""
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                if (m.Groups[1].Value != "") rank = m.Groups[1].Value;
            }
            catch {  }
            return rank;
        }
        /// <summary>
        /// 直接从alexa的网站下载html代码分析,获取中国及世界排名,返回内容为json
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string Latest(string u)
        {
            string rank = "{rank:'{0}',cnrank:'{1}'}";
            try
            {
                string htmlString = BLL.Net.GetHtml("http://www.alexa.com/siteinfo/" + u);
                Match m = Regex.Match(htmlString, "<th>3\\s*month</th>\\s*<td class=\"avg\\s*\">\\s*([\\d,]+)\\s*</td>"
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                rank = rank.Replace("{0}", m.Groups[1].Value);
                m = Regex.Match(htmlString, "class=\"data\">\\s*<img[^>]+>\\s*([\\d,]+)\\s*</div>"
                    , RegexOptions.IgnoreCase | RegexOptions.Compiled);
                rank = rank.Replace("{1}", m.Groups[1].Value);
            }
            catch { rank = "{rank:'NaN',cnrank:'NaN'}"; }
            return rank;
        }
    }
    /// <summary>
    /// 获取google和百度收录数量,为百时同时返回快找时间
    /// </summary>
    public class GoogleBaiduIndex
    {
        /// <summary>
        /// 获取数据
        /// </summary>
        /// <param name="u"></param>
        /// <param name="isGG"></param>
        /// <param name="isJson">是否返回json格式的数据,否则返回用|分隔的数据</param>
        /// <returns></returns>
        public static string IndexGoogleBaidu(string u, bool isGG, bool isJson)
        {
            string rst = "", html = "";
            Uri uriU = BLL.Net.CreatUri(u);
            if (uriU != null)
            {
                u = (isGG ? "http://www.google.com.hk/search?q=" : "http://www.baidu.com/s?wd=") + "site%3A" + uriU.Host;
                html = BLL.Net.GetHtml(u, isGG ? Encoding.GetEncoding(950) : Encoding.UTF8);
                if (html != null)
                {//找到约 4,210 条结果
                    Regex r = new Regex(isGG ? @"約有\s*([\d,]+)\s*項結果" : @"找到相关结果数([\d,]+)个", RegexOptions.Compiled);
                    Match m = r.Match(html);
                    if (isGG) rst = isJson ? "{gg:'" + m.Groups[1].Value + "'}" : m.Groups[1].Value;
                    else rst = isJson ? "{bd:'" + m.Groups[1].Value + "',bdTime:'"
                        + Regex.Match(html, @"/\s*(\d{4}(-\d{1,2}){2})", RegexOptions.Compiled).Groups[1].Value + "'}"
                    : m.Groups[1].Value + "|" + Regex.Match(html, @"/\s*(\d{4}(-\d{1,2}){2})", RegexOptions.Compiled).Groups[1].Value;
                }
                else rst = "{success:false}";
            }
            return rst;
        }
    }
    /// <summary>
    /// 获取百度权重
    /// </summary>
    public class BaiduWeight
    {
        /// <summary>
        /// 获取权重
        /// </summary>
        /// <param name="u"></param>
        /// <returns></returns>
        public static string Weight(string u)
        {
            string w = "n";
            if (!string.IsNullOrEmpty(u))
            {
                Uri uri = Net.CreatUri(u);
                if (uri != null)
                {
                    string html = Net.GetHtml("http://www.aizhan.com/getbr.php?url=" + uri.Host + "&style=1");
                    if (html != null)
                        w = Regex.Match(html, @">([n\d])</a>", RegexOptions.IgnoreCase | RegexOptions.Compiled).Groups[1].Value;
                    if (w == "") w = "n";
                }
            }
            return w;
        }
    }
}

 


原创文章,转载请注明出处:C#源代码-查询alexa全球,中国排名,google pr,google收录和百度收录及快照
评论(20)Web开发网
阅读(69)喜欢(0)不喜欢(0)Asp.Net/C#/WCF