【python爬虫】加密代理IP的使用与设置一套session请求头
1:代理ip请求,存于redis:
# 请求ip代理连接,更新redis的代理ip def proxy_redis(): sr = redis.Redis(connection_pool=Pool) proxys_text = requests.get("你请求代理IP的地址").text #更新redis ret = sr.set(‘proxy_list‘,proxys_text) # 检测ip代理是否有用 有用返回True,无用返回False def check_proxy(proxy_list): print("检测的ip池是",proxy_list) try: for proxy in proxy_list: ret = requests.get("https://www.baidu.com/",proxies={"https": "http://账号:密码@{}".format(proxy)}).text print("IP可用") return True except Exception as e: return False # 返回请求代理池 def get_proxy_list(): while True: sr = redis.Redis(connection_pool=Pool) ip_list = sr.get("proxy_list") #redies拿到数据 if ip_list == None: proxy_redis() proxy_list = ip_list.split("\r\n") #数据结构格式化 列表 ret = check_proxy(proxy_list) #检测代理是否有效果 if ret == True: print("请求代理池:", proxy_list) return proxy_list break else: proxy_redis() print("redis池更新成功")
2: 设置session的N套请求头:
# 1: session = requests.Session() # 设置一组session请求 session.proxies = { "http": "http://账号:密码@{}".format(proxy), "https": "http://账号:密码@{}".format(proxy), } headers = head() session.headers.update(headers) # 更新请求头 请求头自定义 session.post("https://www.amazon.com/gp/delivery/ajax/address-change.html", data) # 设置cookeis ret = session.get("https://www.amazon.com/dp/B0047CJZLM", headers=headers).text IUSC.append(session) # 1 # 2 session = requests.Session() # 设置一组session请求 session.proxies = { "http": "http://sellerbdata:{}".format(proxy), "https": "http://sellerbdata:{}".format(proxy), } headers = head() session.headers.update(headers) session.post("https://www.amazon.com/gp/delivery/ajax/address-change.html", data,) # 设置邮编 IUSC.append(session) # 2
相关推荐
houmenghu 2020-11-17
我心似明月 2020-11-09
oraclemch 2020-11-06
ltd00 2020-09-12
康慧欣 2020-09-10
waveclouds 2020-09-04
蓝色深海 2020-09-15
jincheng 2020-09-01
思君夜未眠 2020-08-25
取个好名字真难 2020-08-06
歆萌 2020-08-03
阳光之吻 2020-08-03
婷婷小屋 2020-07-28
solarspot 2020-07-28
MLXY 2020-07-26
dxbjfu0 2020-07-26
songbinxu 2020-07-19
83520298 2020-07-06