python爬虫-requests与bs4获得所有炉石传说卡背
太简单了就当做个记录,直接贴代码了
import os import requests from bs4 import BeautifulSoup import time # 发送请求 def send(): r = requests.get(url=base_url) # 设置编码防止乱码 r.encoding ="GBK"; content = r.text parseAndSave(content) # 解析页面和保存数据 def parseAndSave(html): soup = BeautifulSoup(html, 'lxml') ulList = soup.find_all('ul', attrs={'class': 'kzlist'}) # print(ulList); for ul in ulList: li = ul.find_all('li'); for item in li: name = item.find('img').next_sibling obtain_method = item.find('a').find('p').text rootDir = os.getcwd() if not os.path.exists(name): os.mkdir(name); os.chdir(name); src = item.find('a').find('img')['src'] pic = requests.get(src) with open('pic.jpg', 'wb') as fw: fw.write(pic.content) with open('info.txt', 'a+') as fw: fw.write(name+'\n') fw.write(obtain_method) os.chdir(rootDir); def main(): start_time = time.time() send() end_time = time.time() print('程序用时:',(end_time - start_time)) if __name__ == '__main__': base_url = 'http://news.4399.com/gonglue/lscs/kabei/' cardList = [] main()
相关推荐
夜斗不是神 2020-11-17
染血白衣 2020-11-16
ARCXIANG 2020-11-02
ARCXIANG 2020-10-28
CycloneKid 2020-10-27
荒谬小孩 2020-10-26
逍遥友 2020-10-26
snakeson 2020-10-09
meylovezn 2020-08-28
囧芝麻 2020-08-17
数据挖掘工人 2020-08-15
cxcxrs 2020-07-28
dashoumeixi 2020-07-20
sunzhihaofuture 2020-07-19
我欲疾风前行 2020-07-06
sunzhihaofuture 2020-07-04
Dimples 2020-06-28