|
下面代码提示 result = re.search(pattern, page)这几句 报 excepted string or buffer 错误 。大神指导一下是为什么?
import urllib
import urllib2
import re
class BDTB:
def __init__(self, baseURL, seeLZ):
self.baseURL = baseURL
self.seeLZ = 'seeLZ='+ str(seeLZ)
def getPage(self, pageNum):
try:
url = self.baseURL + '?' + self.seeLZ + '&pn' + str(pageNum)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
#print response.read().decode('utf-8')
return response
except urllib2.URLError, e:
if hasattr(e, 'reason'):
print e.reason
return None
def getTitle(self):
page = self.getPage(1)
pattern = re.compile('<h3 class="core_title_txt.*?>(.*?)</h3>',re.S)
result = re.search(pattern, page)
if result:
print result.group[1]
baseURL = 'http://tieba.baidu.com/p/3138733512'
bdtb = BDTB(baseURL,1)
bdtb.getTitle()
|
|