|
代码如下,请大神赐教!
爬取的图片全部是93字节,而且打不开。
#!/usr/bin/python
# -*- coding:utf-8 -*-
import urllib,urllib2
from bs4 import BeautifulSoup
x = 0
def crawl(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
req = urllib2.Request(url,headers=headers)
page = urllib2.urlopen(req,timeout=10)
contents = page.read()
#print contents
soup = BeautifulSoup(contents,'html.parser')
my_girl= soup.find_all('img')
print my_girl
for girl in my_girl:
link = girl.get('src')
print link
global x
urllib.urlretrieve(link,'image\%s.jpg' % x)
x += 1
print("正在下载第%s张图片..." % x)
for page in range(1,4):
page += 1
url = 'http://www.dbmeinv.com/?pager_offset=%s' % page
crawl(url)
print "第",page-1,"页图片已下载完毕!"
|
|