python小白学习记录 多线程爬取ts片段
from lxml import etree
import requests
from urllib import request
import time
import os
from queue import Queue
import threading
import re
from multiprocessing import pool
from urllib import request
def download(urls):
for index in range(0,1342):
n = index
if(index<=999):
n = str(n).zfill(3)
else:
n = str(n)
# print(n)
url = "https://zy.512wx.com/20171106/vM1OOVna/1200kb/hls/ppvod1983%s.ts" %n
print("url",url)
urls.put(url)
class Consumer(threading.Thread):
def __init__(self,urls,*args,**kwargs):
super(Consumer, self).__init__(*args,**kwargs)
self.urls = urls
self.queueLock = threading.Lock()
def run(self):
while True:
if self.urls.empty():
break
else:
self.queueLock.acquire()
file = self.urls.get()
self.queueLock.release()
name = file.split(‘/‘)[-1]
print("name",name)
with open("./video/{}".format(name), "wb") as fp:
if os.access("./video/{}".format(name), os.F_OK):
pass
else:
resp = requests.get(file,verify = False)
fp.write(resp.content)
print(name + "下载完成")
def get_ts(urls):
with open("./video/kuiba.ts","wb") as fp:
for index in range(0, len(urls)):
resp = requests.get(urls[index])
fp.write(resp.content)
print(str(index)+"下载完成")
def main():
urls = Queue(2000)
download(urls)
# while not urls.empty():
# print(urls.get())
for index in range(0,16):
x = Consumer(urls)
x.start()
if __name__ == ‘__main__‘:
main()此处开了16个子线程
相关推荐
lhtzbj 2020-08-13
learnpy 2020-07-19
kyelu 2020-07-09
举 2020-06-14
haokele 2020-05-31
fengling 2020-05-31
maimang00 2020-05-30
坚持是一种品质 2020-05-28
laityc 2020-05-27
jling 2020-05-19
YENCSDN 2020-05-14
singer 2020-04-30
举 2020-04-29
学习备忘录 2020-04-20
CloudXli 2020-04-07
chunjiekid 2020-08-16
小菜鸟的代码世界 2020-06-17
bizercsdn 2020-06-06