python 爬取文章后存储excel 以及csv
import requests from bs4 import BeautifulSoup import random import openpyxl xls=openpyxl.Workbook() sheet=xls.active sheet.title=‘movies‘ sheet[‘A1‘]=‘序号‘ sheet[‘B1‘]=‘名称‘ sheet[‘C1‘]=‘评分‘ sheet[‘D1‘]=‘推荐语‘ sheet[‘E1‘]=‘链接‘ for i in range(11): params={ ‘start‘: str(i*25), ‘filter‘:‘‘ } headers={ ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36‘ } url=‘https://movie.douban.com/top250‘ res=requests.get(url,params=params,headers=headers) con=res.text soup=BeautifulSoup(con,‘html.parser‘) maindiv=soup.find(class_="grid_view") for titles in maindiv.find_all(‘li‘): try: num = titles.find(‘em‘,class_="").text #查找序号 title = titles.find(‘span‘, class_="title").text #查找电影名 tes = titles.find(‘span‘,class_="inq").text #查找推荐语 comment = titles.find(‘span‘,class_="rating_num").text #查找评分 url_movie = titles.find(‘a‘)[‘href‘] print(num + ‘.‘ + title + ‘——‘ + comment + ‘\n‘ + ‘推荐语:‘ + tes +‘\n‘ + url_movie) sheet.append([num,title,comment,tes,url_movie]) except: continue xls.save(‘douban.xlsx‘)
csv:
import requests from bs4 import BeautifulSoup import random import openpyxl import csv url="https://www.zhihu.com/api/v4/members/zhang-jia-wei/articles" headers={ ‘referer‘: ‘https://www.zhihu.com/people/zhang-jia-wei/posts/posts_by_votes?page=1‘, ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36‘ } csv_file=open(‘dazhangwei.csv‘,‘w‘,newline=‘‘,encoding=‘utf-8‘) writer=csv.writer(csv_file) header=[‘标题‘,‘简介‘,‘连接‘] writer.writerow(header) x=0 while True: params={ ‘include‘: ‘data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics‘, ‘offset‘: str((x*10)), ‘limit‘: ‘10‘, ‘sort_by‘: ‘voteups‘ } res=requests.get(url,headers=headers,params=params) res_json=res.json() con=res_json[‘data‘] for i in con: lists=[i[‘title‘],i[‘url‘],i[‘excerpt‘]] writer.writerow(lists) if res_json[‘paging‘][‘is_end‘] == True: break x+=1 csv_file.close()