Python爬虫实践 —— 3.利用爬虫提取返回值,模拟有道词典接口
有道词典的web接口,实际上可以用爬虫模拟,输入key,拼接为有道词典接口的formdata,爬取返回值,实际为Ajax动态生成的translation,这样外部来看实现了翻译接口的模拟,相当于爬虫模拟浏览器调用了有道词典web接口,其实讲真的话来说,直接调用有道web接口,传json参数就可以了,不用这么费事,但爬虫模拟了人登陆web,输入关键词,获得翻译结果的过程。
浏览器输入操作,解析有道词典翻译的web接口url和格式


#爬虫模拟调用有道词典web接口调用
from urllib import request
from urllib import parse
import re
class YoudaoTranslator:
def __init__(self, key):
self.key = key
def __getData(self):
# 构造 有道词典web接口所需的Form data
formdata = {
"i": self.key,
"from": "AUTO",
"to": "AUTO" ,
"smartresult": "dict",
"client": "fanyideskweb",
"salt": "15763837022114",
"sign": "2b12fd214e066f53bc3455a126d7a509",
"ts": "1576383702211",
"bv": "5575008ba9785f184b106838a72d6536",
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME"
}
data = parse.urlencode(formdata).encode(encoding="utf-8")
return data
def __getPage(self):
#获得模拟浏览器请求,获得Ajax返回值
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
req = request.Request(url, data=YoudaoTranslator.__getData(self), headers=header)
res = request.urlopen(req).read().decode()
return res
def __Pat(self):
#解析ajax返回json字符串,正则匹配获取翻译值
pat = r‘"tgt":"(.*?)"}]]‘
result = re.findall(pat, YoudaoTranslator.__getPage(self))
print(result[0])
return result
def Translator(self):
YoudaoTranslator.__Pat(self)
if __name__ == ‘__main__‘:
i = YoudaoTranslator("人格心理学")
i.Translator()然后是运行结果

相关推荐
Airuio 2020-02-13
AmbiRF 2019-11-19
SUDevops 2019-11-03
wordmhg 2019-10-19
sharkandshark 2019-05-07
ibatsiSpring 2013-06-01
冬冬阳光 2010-07-13
libinhai0 2019-06-30
齐北的小村 2013-06-11
luoj 2013-06-11
Dataleon 2013-01-14
jiahaowanhao 2015-12-10
努力练功不自宫 2014-07-02
bufsin 2019-04-23
zupzng 2019-03-15