|
代码如下:
import re
import urllib.request
def craw(url,page):
htm11=urllib.request.urlopen(url).read()
htm11=str(htm11)
pat1='<div id="J_goodList".+?<div class="page clearfix">'
result1=re.compile(pat1).findall(htm11)
result1=result1[0]
pat2='<img width="220" height="220" class="err-product" data-img="1" src="//(.+?\.jpg)">'
imagelist=re.compile(pat2).findall(result1)
x=1
for imageurl in imagelist:
imagename="D:/Urllib库/图片爬虫集/"+str(page)+str(x)+".jpg"
imageurl="http://"+imageurl
try:
urllib.request.urlretrive(imageurl,filename=imagename)
except urllib.error.URLError as e:
if hasattr(e,"code"):
x+=1
if hasattr(e,"reason"):
x+=1
x+=1
for i in range(1,100):
url="https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page="+str(i)
craw(url,i)
错误如下:
Traceback (most recent call last):
File "<pyshell#71>", line 3, in <module>
craw(url,i)
File "<pyshell#65>", line 7, in craw
imagelist=re.compile(plat2).findall(result1)
TypeError: expected string or bytes-like object
新手刚上手,不知怎么解决,请求大神帮助
|
|