python 自动提交到百度,利用百度API自动提交

# -*- coding: utf-8 -*-
import json
import os
from time import sleep
from urllib import parse
import schedule
import time
import requests
import re


class Pusher(object):
    def __init__(self):
        print('init_')

    def getSitemapIndex(self, sitemapIndexUrl):
        print('getSitemapIndex:' + sitemapIndexUrl)

        result = requests.get(sitemapIndexUrl)
        print(result)
        # print(result.content)
        sitemapUrls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S)
        print(sitemapUrls)
        return sitemapUrls

    def getSitemap(self, sitemapUrl):
        print("getSitemap:" + sitemapUrl)
        result = requests.get(sitemapUrl)
        self.urls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S)
        print(self.urls)

    def postBaidu(self):
        print("postBaidu:=================================")
        header_baidu = {
            "User-Agent": "curl/7.12.1",
            "Host": "data.zz.baidu.com",
            "Content-Type": "text/plain",
            "Content-Length": "83"
        }

        file = open('push_baidu.txt', 'a+')
        file.seek(0, 0)  # 游标移动到第一行,继续读,否则读取到的是空

        content = file.read()
        # self.f.close()
        print("content:" + content)

        for url in self.urls:

            if url in content:
                print("已经推送过:" + url)
                pass
            else:
                try:
                    result = requests.post(push_baidu, data=url, headers=header_baidu)
                    print(url + result.content.decode('utf-8'))
                    if '"success":1' in result.content.decode('utf-8'):
                        file.write(url + '\n')
                        file.flush()

                    if '{"remain":0' in result.content.decode('utf-8'):
                        break
                # break
                except Exception as e:
                    print(e)
                sleep(1)
        file.close()

    def postShenma(self):
        print("postShenma:=================================")
        header_baidu = {
            "User-Agent": "curl/7.12.1",
            "Host": "data.zhanzhang.sm.cn",
            "Content-Type": "text/plain"
        }

        file = open('push_shenma.txt', 'a+')
        file.seek(0, 0)  # 游标移动到第一行,继续读,否则读取到的是空

        content = file.read()
        # self.f.close()
        print("content:" + content)

        data = ''
        for url in self.urls:
            if url in content:
                print("已经推送过:" + url)
                pass
            else:
                data = data + url + '\n'
        try:
            result = requests.post(push_shenma, data=data, headers=header_baidu)
            print("url:" + url)
            print("status_code:" + str(result.status_code))
            print("content:" + result.content.decode('utf-8'))

            if str(result.status_code) == "200":
                content = json.loads(result.content.decode('utf-8'))
                print("returnCode " + str(content['returnCode']))
                if str(content['returnCode']) == "200":
                    file.write(data + '\n')
                    file.flush()
        except Exception as e:
            print(e)
            sleep(1)
        file.close()

    def postSougou(self):
        print("postSougou:=================================")
        header_baidu = {
            "User-Agent": "Opera/9.80 (Windows NT 6.2; Win64; x64) Presto/2.12.388 Version/12.15",
            "Host": "sogou.com",
            "Content-Type": "application/x-www-form-urlencoded"
        }

        file = open('push_sogou.txt', 'a+')
        file.seek(0, 0)  # 游标移动到第一行,继续读,否则读取到的是空

        content = file.read()
        # self.f.close()
        print("content:" + content)

        for url in self.urls:
            if url in content:
                print("已经推送过:" + url)
                pass
            else:
                try:
                    result = requests.post(push_sogou, data={"source": "1", "site_type": "1",
                                                             "Shoulu": {"webAdr": url, "email": "[email protected]",
                                                                        "reason": "网站收录不正常,恳请收录!"}},
                                           headers=header_baidu)
                    print(url + result.content.decode('utf-8'))
                except Exception as e:
                    print(e)
                sleep(1)
        file.close()

    def get360Token(self, url, key):
        n = list(url)
        n.reverse()
        r = list(key)
        i = []
        # for ($s=0, $o=16;$s < $o;$s++)
        for s in range(0, 16):
            if n[s] is None:
                i.append(r[s])
            else:
                i.append(r[s] + n[s])
        return ''.join(i)

   
    def run(self, sitemapUrl):
        self.getSitemap(sitemapUrl)
        self.postBaidu()

urlSitemap = ''  # Your sitemap url, like 'http://blog.kxrr.us/index.php/sitemap'
urlPost = ''  # Your Baidu API, like 'http://data.zz.baidu.com/urls?site=blog.kxrr.us&token=xxxxxxxxxxxx'

push_baidu = 'http://data.zz.baidu.com/urls?site=meishih.com&token='


# 定义你要周期运行的函数
def job():
    print("I'm working...")
    pusher = Pusher()
    sitemapUrls = pusher.getSitemapIndex("http://meishih.com/sitemap_index.xml")
    for sitemapUrl in sitemapUrls:
        pusher.run(sitemapUrl)


schedule.every().day.at("17:32").do(job)  # 每天在 10:30 时间点运行 job 函数

if __name__ == '__main__':
    # while True:
    #     schedule.run_pending()  # 运行所有可以运行的任务
    #     time.sleep(10)
    job()
 # pusher = Pusher()
    # pusher.postBingQuota()
    # pusher.getSitemapIndex('meishih.com/sitempa_index.xml')
    # print(pusher.get360Token("http://meishih.com/", "d182b3f28525f2db83acfaaf6e696db"))
# pusher = Pusher() # pusher.postBingQuota() # pusher.getSitemapIndex('meishih.com/sitempa_index.xml') # print(pusher.get360Token("http://meishih.com/", "d182b3f28525f2db83acfaaf6e696db"))

相关推荐