python小白学习记录 多线程爬取ts片段
from lxml import etree import requests from urllib import request import time import os from queue import Queue import threading import re from multiprocessing import pool from urllib import request def download(urls): for index in range(0,1342): n = index if(index<=999): n = str(n).zfill(3) else: n = str(n) # print(n) url = "https://zy.512wx.com/20171106/vM1OOVna/1200kb/hls/ppvod1983%s.ts" %n print("url",url) urls.put(url) class Consumer(threading.Thread): def __init__(self,urls,*args,**kwargs): super(Consumer, self).__init__(*args,**kwargs) self.urls = urls self.queueLock = threading.Lock() def run(self): while True: if self.urls.empty(): break else: self.queueLock.acquire() file = self.urls.get() self.queueLock.release() name = file.split(‘/‘)[-1] print("name",name) with open("./video/{}".format(name), "wb") as fp: if os.access("./video/{}".format(name), os.F_OK): pass else: resp = requests.get(file,verify = False) fp.write(resp.content) print(name + "下载完成") def get_ts(urls): with open("./video/kuiba.ts","wb") as fp: for index in range(0, len(urls)): resp = requests.get(urls[index]) fp.write(resp.content) print(str(index)+"下载完成") def main(): urls = Queue(2000) download(urls) # while not urls.empty(): # print(urls.get()) for index in range(0,16): x = Consumer(urls) x.start() if __name__ == ‘__main__‘: main()
此处开了16个子线程
相关推荐
farewellpoem 2020-11-09
lhtzbj 2020-08-13
learnpy 2020-07-19
kyelu 2020-07-09
举 2020-06-14
haokele 2020-05-31
fengling 2020-05-31
maimang00 2020-05-30
坚持是一种品质 2020-05-28
laityc 2020-05-27
jling 2020-05-19
YENCSDN 2020-05-14
singer 2020-04-30
举 2020-04-29
学习备忘录 2020-04-20
CloudXli 2020-04-07
chunjiekid 2020-08-16
小菜鸟的代码世界 2020-06-17
bizercsdn 2020-06-06