|
本帖最后由 SimaSmile 于 2019-5-10 15:09 编辑
- <pre style="background-color:#2b2b2b;color:#a9b7c6;font-family:'宋体';font-size:9.8pt;"><span style="color:#cc7832;">from </span>bs4 <span style="color:#cc7832;">import </span>BeautifulSoup
- <span style="color:#cc7832;">import </span>requests
- <span style="color:#cc7832;">import </span>pymongo
- mongo_db = pymongo.MongoClient(<span style="color:#6a8759;">'loacllhost'</span><span style="color:#cc7832;">,</span><span style="color:#6897bb;">27017</span>)
- ku = mongo_db[<span style="color:#6a8759;">'tongcheng'</span>]
- ku_biao = ku[<span style="color:#6a8759;">'cate_urls'</span>]
- cate_url = <span style="color:#6a8759;">'https://xa.58.com'
- </span>start_url = <span style="color:#6a8759;">'https://xa.58.com/sale.shtml'
- </span><span style="color:#6a8759;">
- </span><span style="color:#6a8759;">
- </span><span style="color:#cc7832;">def </span><span style="color:#ffc66d;">get_cate_urls</span>(url):
- wb_data = requests.get(url)
- soup = BeautifulSoup(wb_data.text<span style="color:#cc7832;">, </span><span style="color:#6a8759;">'lxml'</span>)
- links = soup.select(<span style="color:#6a8759;">'ul.ym-submnu > li > b > a'</span>)
- <span style="color:#cc7832;">for </span>link <span style="color:#cc7832;">in </span>links:
- cate_name = link.get_text()
- page_url = cate_url + link.get(<span style="color:#6a8759;">'href'</span>)
- <span style="color:#cc7832;">if </span>cate_name == <span style="color:#6a8759;">'</span><span style="color:#cc7832;">\xa0</span><span style="color:#6a8759;">'</span>:
- <span style="color:#cc7832;">pass
- </span><span style="color:#cc7832;"> else</span>:
- data = {
- <span style="color:#6a8759;">'cate_name'</span>: cate_name<span style="color:#cc7832;">,
- </span><span style="color:#cc7832;"> </span><span style="color:#6a8759;">'page_url'</span>: page_url
- }
- ku_biao.insert_one(data)
- get_cate_urls(start_url)</pre>
复制代码 报错信息如下
- Traceback (most recent call last):
- File "F:/python/58/Cate_List.py", line 30, in <module>
- get_cate_urls(start_url)
- File "F:/python/58/Cate_List.py", line 27, in get_cate_urls
- ku_biao.insert_one(data)
- File "F:\python\58\venv\lib\site-packages\pymongo\collection.py", line 700, in insert_one
- session=session),
- File "F:\python\58\venv\lib\site-packages\pymongo\collection.py", line 614, in _insert
- bypass_doc_val, session)
- File "F:\python\58\venv\lib\site-packages\pymongo\collection.py", line 602, in _insert_one
- acknowledged, _insert_command, session)
- File "F:\python\58\venv\lib\site-packages\pymongo\mongo_client.py", line 1279, in _retryable_write
- with self._tmp_session(session) as s:
- File "D:\Python\lib\contextlib.py", line 112, in __enter__
- return next(self.gen)
- File "F:\python\58\venv\lib\site-packages\pymongo\mongo_client.py", line 1611, in _tmp_session
- s = self._ensure_session(session)
- File "F:\python\58\venv\lib\site-packages\pymongo\mongo_client.py", line 1598, in _ensure_session
- return self.__start_session(True, causal_c**istency=False)
- File "F:\python\58\venv\lib\site-packages\pymongo\mongo_client.py", line 1551, in __start_session
- server_session = self._get_server_session()
- File "F:\python\58\venv\lib\site-packages\pymongo\mongo_client.py", line 1584, in _get_server_session
- return self._topology.get_server_session()
- File "F:\python\58\venv\lib\site-packages\pymongo\topology.py", line 429, in get_server_session
- None)
- File "F:\python\58\venv\lib\site-packages\pymongo\topology.py", line 200, in _select_servers_loop
- self._error_message(selector))
- pymongo.errors.ServerSelectionTimeoutError: loacllhost:27017: [Errno 11001] getaddrinfo failed
复制代码
|
-
-
|