Python3.X下的爬虫实现
# coding:utf-8 import urllib.request import re def get_html(url): page = urllib.request.urlopen(url) html = page.read() return html def find_img_list(html_str): reg = r'src="(.+?\.jpg)" width' reg_img = re.compile(reg) html_str = html_str.decode('utf-8') # python3 img_list = reg_img.findall(html_str) return img_list imgList = find_img_list(get_html('http://tieba.baidu.com/p/1753935195')) for img in imgList: print(img)
由于Python部分的不同版本代码有些不同,故修改一份python3.X的备忘