from urllib import request class Spider: def __init__(self, page): self.page = pagedef load_page(self): url = 'http://www.neihanba.com/dz/list_' + self.page + '.html' # 各个段子吧页面的url user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36" headers = ("User-Agent", user_agent) opener = request.build_opener() opener.add_handlers = [headers] resp**e = opener.open(url) html = resp**e.read() return html
def main(): page = input('请输入页码:') myspider = Spider(page) the_page = myspider.load_page() print(the_page)
if name == 'main': main() 执行结果: Traceback (most recent call last): File "D:/python保存库/spider/daunzi_spider.py", line 44, in <module> main() File "D:/python保存库/spider/daunzi_spider.py", line 37, in main the_page = myspider.load_page() File "D:/python保存库/spider/daunzi_spider.py", line 22, in load_page resp**e = opener.open(url) File "D:\python3.6.1\lib\urllib\request.py", line 526, in open resp**e = self._open(req, data) File "D:\python3.6.1\lib\urllib\request.py", line 544, in _open '_open', req) File "D:\python3.6.1\lib\urllib\request.py", line 504, in _call_chain result = func(*args) File "D:\python3.6.1\lib\urllib\request.py", line 1346, in http_open return self.do_open(http.client.HTTPConnection, req) File "D:\python3.6.1\lib\urllib\request.py", line 1321, in do_open r = h.getresp**e() File "D:\python3.6.1\lib\http\client.py", line 1331, in getresp**e resp**e.begin() File "D:\python3.6.1\lib\http\client.py", line 297, in begin version, status, reason = self._read_status() File "D:\python3.6.1\lib\http\client.py", line 258, in _read_status line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") File "D:\python3.6.1\lib\socket.py", line 586, in readinto return self._sock.recv_into(b) ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。
|