|
import requests
from bs4 import BeautifulSoup
import re
import pymysql
def stronger():
#网页表头伪装器
headers={
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Cookie':'ahpvno=1; historybbsName4=c-3217%7C%E5%A5%94%E5%A5%94; fvlid=14828219988978DxYXiUE; sessionip=119.126.168.250; sessionid=E8084B14-2C86-4A31-982F-8DEE8D00E35D%7C%7C2016-12-27+15%3A00%3A05.705%7C%7C0; __utma=1.550096830.1482822000.1482822000.1482822000.1; __utmb=1.0.10.1482822000; __utmc=1; __utmz=1.1482822000.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); ref=0%7C0%7C101061%7C0%7C2016-12-27+15%3A00%3A06.288%7C2016-12-27+15%3A00%3A05.705; sessionvid=3814CBB8-D047-4B42-8632-8B9C07EBFAFC; area=440699; sessionuid=E8084B14-2C86-4A31-982F-8DEE8D00E35D||2016-12-27+15%3A00%3A05.705||0',
'Host':'club.autohome.com.cn',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36'
}
return headers
def conndb(title,author,date):
# try:
# 获取一个数据库连接,注意如果是UTF-8类型的,需要制定数据库
conn = pymysql.connect(host='localhost', user='root', passwd=None, db='analysis', port=3306, charset='utf8')
cur = conn.cursor()
# 获取一个游标
# sql = "insert into mes(mes_title,mes_author,mes_date) values(?,?,?)"
# 注意int类型需要使用str函数转义
sql = "insert into mes(mes_title,mes_author,mes_date) values(%s,%s,%s)"
cur.execute(sql,(title,author,date))
# effect_row = cur.execute("select * from mes")
# print(effect_row)
conn.commit()#提交数据
cur.close() # 关闭游标
conn.close() # 释放数据库资源
# except Exception:
# print("连接失败")
if __name__=="__main__":
# url = 'http://meitu.xunlei.com/'
# requester(url)
# downloder()
# test()
url = 'http://club.autohome.com.cn/bbs/forum-c-3217-2.html?qaType=-1#pvareaid=101061'
connection = requests.get(url, headers=stronger()).content.decode("gbk")
soup = BeautifulSoup(connection, "html")
content = soup.find_all('dl', {'class': 'list_dl'})
title = []
author = []
date = []
for each in content:
str_title = str(each('a', {'class': 'a_topic'}))
title = re.findall('(.*?)</a>', str_title)
str_author = str(each.find_all('dd')[0].find_all('a', {'class': 'linkblack'}))
author = re.findall('">(.*?)</a>', str_author)
str_date = str(each.find_all('dd')[0].find_all('span', {'class': 'tdate'}))
date = re.findall('">(.*?)</span>', str_date)
conndb(title,author,date)
time.sleep(2)
title = []
author = []
date = []
求助,本人尝试用bs4解析的网页内容遍历写入数据库,结果报错,自己调了很久都调不出来,现在求助各位高手。。。
大概思路是这样的,
先用 request请求一个网站,
然后,解析出这个网站的文章的标题,作者,和发布时间
然后,把这个网站的文章标题,作者,和发布时间一同写入Mysql数据库
前两步已经实现了,现在卡在第三步,无法写入。。。
如果换成普通的参数 比如说 conndb('asadasdas','bdafsf','2016-12-13')
则写入正常,使用debug调试后发现,是title,author,date 为list类型
然后我尝试使用强制转化为 str 写入完数据库后清空列表,但依然报错。
然后,我就解决不了了,数据库能正常运行,语句数据库语句无错。。
求解决如何写入三个list的数据进入数据库,,,, |
|