|
还请大神看看我的字符串与列表转化的代码,网上看了半天讲解也没弄明白。
下面的代码是爬取SJR网站数据,需要翻页,卡在了翻页这。
import urllib2
import re
from bs4 import BeautifulSoup
import xlwt
wbk=xlwt.Workbook()
sheet=wbk.add_sheet('sheet 1',cell_overwrite_ok=True)
sheet.write(0,0,'Ranking')
sheet.write(0,1,'Title')
sheet.write(0,2,'Type')
sheet.write(0,3,'SJR')
sheet.write(0,4,'H index')
sheet.write(0,5,'Total Docs.(2015)')
sheet.write(0,6,'Total Docs.(3years)')
sheet.write(0,7,'Total Refs')
sheet.write(0,8,'Total Cites(3years)')
sheet.write(0,9,'Citable Docs.(3years)')
sheet.write(0,10,'Cites/Doc. (2years)')
sheet.write(0,11,'Ref./Doc.')
url='http://www.scimagojr.com/journalrank.php?area=1700&year=2015&page=x&total_size=5171'
i=1
m=0
while i<=104:
a=list(url)
a[-17]=i
b=''.join(a)
webpage=urllib2.urlopen(url)
html_doc=webpage.read()
soup=BeautifulSoup(html_doc,'html.parser',from_encoding='utf-8')
links=soup.find_all('tr')
for link1 in links:
n=0
for link2 in link1.find_all('td'):
sheet.write(m,n,link2.get_text())
n +=1
m +=1
i +=1
wbk.save(r'C:\Users\123\Desktop/SJR.xls')
Spyder上显示的错误原因是:
runfile('C:/Users/123/Desktop/SJR.py', wdir='C:/Users/123/Desktop')
Traceback (most recent call last):
File "<ipython-input-11-94deea264665>", line 1, in <module>
runfile('C:/Users/123/Desktop/SJR.py', wdir='C:/Users/123/Desktop')
File "C:\Users\123\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
execfile(filename, namespace)
File "C:/Users/123/Desktop/SJR.py", line 33, in <module>
b=''.join(a)
TypeError: sequence item 66: expected string, int found
|
|