【python爬虫】加密代理IP的使用与设置一套session请求头

1:代理ip请求,存于redis:

# 请求ip代理连接,更新redis的代理ip
def proxy_redis():
    sr = redis.Redis(connection_pool=Pool)
    proxys_text = requests.get("你请求代理IP的地址").text
    #更新redis
    ret = sr.set(‘proxy_list‘,proxys_text)


# 检测ip代理是否有用 有用返回True,无用返回False
def check_proxy(proxy_list):
    print("检测的ip池是",proxy_list)
    try:
        for proxy in proxy_list:
            ret = requests.get("https://www.baidu.com/",proxies={"https": "http://账号:密码@{}".format(proxy)}).text
        print("IP可用")
        return True
    except Exception as e:
        return False

# 返回请求代理池
def get_proxy_list():
    while True:
        sr = redis.Redis(connection_pool=Pool)
        ip_list = sr.get("proxy_list")                    #redies拿到数据
        if ip_list == None:
            proxy_redis()
        proxy_list = ip_list.split("\r\n")             #数据结构格式化   列表
        ret = check_proxy(proxy_list)                     #检测代理是否有效果
        if ret == True:
            print("请求代理池:", proxy_list)
            return proxy_list
            break
        else:
            proxy_redis()
            print("redis池更新成功")

2: 设置session的N套请求头:

    # 1:
        session = requests.Session()  # 设置一组session请求
        session.proxies = {
            "http": "http://账号:密码@{}".format(proxy),
            "https": "http://账号:密码@{}".format(proxy),
        }
        headers = head()
        session.headers.update(headers)  # 更新请求头 请求头自定义
        session.post("https://www.amazon.com/gp/delivery/ajax/address-change.html", data)      # 设置cookeis
        ret = session.get("https://www.amazon.com/dp/B0047CJZLM", headers=headers).text
        IUSC.append(session)
    
        # 1

        # 2
        session = requests.Session()  # 设置一组session请求
        session.proxies = {
            "http": "http://sellerbdata:{}".format(proxy),
            "https": "http://sellerbdata:{}".format(proxy),
        }
        headers = head()
        session.headers.update(headers)
        session.post("https://www.amazon.com/gp/delivery/ajax/address-change.html", data,)  # 设置邮编
        IUSC.append(session)
        # 2

相关推荐