scrapy抓取校花网图片
一:基础版(抓取首页图片)
爬虫py文件代码:
# -*- coding: utf-8 -*-
import scrapy
import sys
import io
from scrapy.selector import Selector
from scrapy.http import Request
from ..items import Day96XiaohuaItem
import re
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding=‘utf-8‘)
class XiaohuaSpider(scrapy.Spider):
name = ‘xiaohua‘
allowed_domains = [‘www.xueshengmai.com/hua/‘]
start_urls = [‘http://www.xueshengmai.com/hua/‘]
def parse(self, response):
# ------------持久化数据--------------
hxs = Selector(response=response).xpath("//div[@class=‘item_t‘]/div[@class=‘img‘]/a/img").extract()
# print(hxs)
for i in hxs:
# print(i)
title = re.findall("alt=(.*) src=",i)[0].strip(‘"‘)+".jpg"
src = "http://www.xueshengmai.com%s"%re.findall("src=(.*)>",i)[0].strip(‘"‘)
print(title,src)
item_obj = Day96XiaohuaItem(title=title, src=src)
yield item_objitems.py 代码:
import scrapy
class Day96XiaohuaItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title=scrapy.Field()
src=scrapy.Field()pipelines代码:
import requests
class Day96XiaohuaPipeline(object):
def process_item(self, item, spider):
file_path="imgs/%s"%item["title"]
file_src=item["src"]
f=open(file_path,"wb")
img_date=requests.get(file_src)
f.write(img_date.content)
f.close()二:分页抓取校花网图片
相关推荐
andrewwf 2020-11-11
Arvinzx 2020-10-28
CycloneKid 2020-10-27
paleyellow 2020-10-25
baifanwudi 2020-10-25
heyboz 2020-10-21
wumxiaozhu 2020-10-16
ZHANGRENXIANG00 2020-07-27
zhangll00 2020-07-05
javaraylu 2020-06-28
ZHANGRENXIANG00 2020-06-28
Catastrophe 2020-06-26
Catastrophe 2020-06-26
fangjack 2020-06-25
andrewwf 2020-06-16
qyf 2020-06-14
荒乱的没日没夜 2020-06-14