用 Python 获取百度搜索结果链接
前言
近期有许多项目需要这个功能,由于Python
实现起来比较简单就这么做了,代码贴下来觉得好点个赞吧~
代码
# coding: utf-8 import os import time import requests import urllib.parse from bs4 import BeautifulSoup from urllib.parse import urlparse from fake_useragent import UserAgent from multiprocessing.pool import ThreadPool LOCATIONS = {} GLOBAL_THREAD = 500 GLOBAL_TIMEOUT = 50 def get_links(keyword, generator, pages): links = [] for page in range(int(pages.split("-")[0]), int(pages.split("-")[1]) + 1): for genera in range(int(generator.split("-")[0]), int(generator.split("-")[1]) + 1): links.append( "http://www.baidu.com.cn/s?wd=" + urllib.parse.quote(keyword + str(genera)) + "&pn=" + str(page * 10)) return links def get_page(url): headers = {"user-agent": UserAgent().chrome} req = requests.get(url, headers=headers) req.encoding = "utf-8" soup = BeautifulSoup(req.text, "lxml") for link in soup.select("div.result > h3.t > a"): req = requests.get(link.get("href"), headers=headers, allow_redirects=False) if "=" in req.headers["location"]: root = urlparse(req.headers["location"]).netloc LOCATIONS[root] = req.headers["location"] def baidu_search(): try: os.system("cls") print("-" * 56 + "\n") print("| BaiduSearch Engine By 美图博客[https://www.meitubk.com/] |\n") print("-" * 56 + "\n") keyword = input("Keyword: ") generator = input("Generator(1-10): ") pages = input("Pages(0-10): ") start = time.time() pool = ThreadPool(processes=GLOBAL_THREAD) pool.map(get_page, get_links(keyword, generator, pages)) pool.close() pool.join() end = time.time() path = r"D:\Desktop\result.txt" save_result(path) print("\nSava in %s" % path) print("Result count: %d" % len(LOCATIONS.values())) print("Running time: %ds" % (end - start)) except: print("\nInput Error!") exit(0) def save_result(path): with open(path, "w") as file: for url in list(LOCATIONS.values()): file.write(url + "\n") baidu_search()
使用
相关推荐
rojyang 2020-05-20
ELEMENTS爱乐小超 2020-05-07
baynkbtg 2019-11-02
ksjlhy 2019-10-28
geminihr 2015-03-19
chang00 2015-06-17
LinSeeker 2019-09-07
nextwhy 2012-09-09
tongjianru 2011-05-18
Jarvan 2016-09-25
fadacai0 2016-04-18
deusrwvf 2017-06-21
AIOps智能运维 2015-05-25
magvwiz 2013-10-18
AIOps智能运维 2010-08-17
linqiang 2010-02-04
XSxing 2019-06-30
XSxing 2019-06-30