Asp.Net/asp检查百度搜索引擎是否收录网址

  使用asp.net或者asp检查某个url地址,某篇文章是否被搜索引擎,如百度,谷歌,搜狗收录。

  实现原理:直接搜索你那篇文章的url地址(不带协议,但上协议也行,代码会自动去掉协议内容),如果被索引会返回搜索结果,否则会提示找不到信息。

 

  Asp.Net检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码

using System;
using System.Net;
using System.Text;
using System.IO;
using System.Web;
public class SearchEngineIndex
{
    public static string[] urls =  { //搜索引擎检查地址
            "http://www.baidu.com/s?ie=utf-8&wd=",//百度索引url检查地址
            "https://www.google.com.hk/search?q=",//谷歌索引url检查地址
            "http://www.sogou.com/web?ie=utf8&query="//搜狗索引url检查地址
        }
        , noFindKeyword = { "抱歉,没有找到与", "找不到和您的查询", "未收录?" };//搜索引擎未索引url地址时的关键字
    /// <summary>
    /// 获取响应的编码
    /// </summary>
    /// <param name="contenttype"></param>
    /// <returns></returns>
    private static Encoding GetEncoding(string contenttype)
    {
        if (!string.IsNullOrEmpty(contenttype))
        {
            contenttype = contenttype.ToLower();
            if (contenttype.IndexOf("gb2312") != -1 || contenttype.IndexOf("gbk") != -1) return Encoding.GetEncoding(936);
            if (contenttype.IndexOf("big5") != -1) return Encoding.GetEncoding(950);
        }
        return Encoding.UTF8;
    }
    /// <summary>
    /// 使用HttpWebRequest对象,自动识别字符集
    /// </summary>
    /// <param name="url"></param>
    /// <param name="addUseragent">是否添加UserAgent,采集其他网站时防止被拦截</param>
    /// <returns></returns>
    public static string GetHtml(string url, bool addUseragent)
    {
        HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
        if (addUseragent) request.UserAgent = "Googlebot|Feedfetcher-Google|Baiduspider";
        string html = null;
        try
        {
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            StreamReader srd = new StreamReader(response.GetResponseStream(), GetEncoding(response.ContentType));
            html = srd.ReadToEnd();
            srd.Close();
            response.Close();
        }
        catch { }
        return html;
    }
    /// <summary>
    /// 检查某个url是否被搜索引擎索引
    /// </summary>
    /// <param name="url">url地址</param>
    /// <param name="engin">0:百度 1:谷歌 2:搜狗,其他搜索引擎如bing和360直接查网址显示的结果不是直接得到网址的,有些出入,不做检查</param>
    /// <returns></returns>
    public static bool CheckIndex(string url, int engin)
    {
        if (string.IsNullOrEmpty(url)) return  false;
        if (engin < 0 || engin > 2) engin = 0;
        url = urls[engin] + HttpUtility.UrlEncode(url.ToLower().Replace("http://", "").Replace("https://", ""));
        bool r = true;
        string html = GetHtml(url, true);
        if (html == null || html.IndexOf(noFindKeyword[engin]) != -1) r = false;
        return r;
    }
}



//调用方法示例

        SearchEngineIndex.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx", 0);//检查百度索引
        SearchEngineIndex.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx", 1);//检查谷歌索引
        SearchEngineIndex.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx", 2);//检查搜狗索引

  Asp检查百度,谷歌,搜狗搜索引擎是否收录文章网址源代码

<%
class SearchEnginIndex
  dim urls,noFindKeyword
  private sub Class_Initialize
    '百度,谷歌,搜狗url地址索引查询地址
    urls=array("http://www.baidu.com/s?ie=utf-8&wd=","https://www.google.com.hk/search?q=","http://www.sogou.com/web?ie=utf8&query=")
    '搜索引擎未索引url地址时的关键字
    NoFindKeyword=array("抱歉,没有找到与", "找不到和您的查询", "未收录?")
  End sub
  private function GetEncoding(contenttype)
    contenttype=lcase(contenttype)
    if instr(contenttype,"gb2312")<>0 and instr(contenttype,"gbk")<>0 then
      GetEncoding="gb2312"
    elseif instr(contenttype,"big5")<>0 then
      GetEncoding="big5"
    else
      GetEncoding="utf-8"
    end if
  end function
  private function BinToString(bin,encoding)'将2进制流数据依据编码转为对应的字符串内容
    dim obj
    set obj=Server.CreateObject("Adodb.Stream")
    obj.Type=1:obj.Mode=3:obj.Open
    obj.Write bin
    obj.Position=0:obj.Type=2:obj.Charset=encoding
    BinToString=obj.ReadText
    obj.Close:set obj=nothing
  end function
  public function GetHtml(url)
    dim xhr
    set xhr=server.CreateObject("microsoft.xmlhttp")
    xhr.open "get",url,false
    xhr.send
    encoding=GetEncoding(xhr.getResponseHeader("content-type"))
    response.CharSet=encoding
    GetHtml=BinToString(xhr.responsebody,encoding)
    set xhr=nothing
  end function
  public function CheckIndex(url,engin)
    if len(url)=0 then exit function
    if engin<0 or engin>2 then engin=1
    url=urls(engin)&server.URLEncode(url)
    dim html
    html=GetHtml(url)
    CheckIndex=instr(html,NoFindKeyword(engin))=0
  End function
end Class
set sei=new SearchEnginIndex
response.Write sei.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx",0)'百度索引
response.Write sei.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx",1)'谷歌索引
response.Write sei.CheckIndex("www.w3dev.cn/article/20101014/2902.aspx",2)'搜狗索引
set sei=nothing
 %>

 

加支付宝好友偷能量挖...


原创文章,转载请注明出处:Asp.Net/asp检查百度搜索引擎是否收录网址

评论(0)Web开发网
阅读(260)喜欢(0)Asp.Net/C#/WCF