|
我已经搞了快两个星期了.....还是不能登录上去.......还请大神帮忙!
主要思路就是构造post然后提交表单记录cookie,代码如下:
出现问题就是验证码错误。其实验证码的这个seed在登录界面的html代码中,但是爬虫爬不下来,这点我不知道为什么...
# -*- coding: utf-8 -*-
import requests
import cookielib
from bs4 import BeautifulSoup
import time
from selenium import webdriver
agent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36'
headers={'User-Agent':agent,
'Host':'login.zbj.com',
'Referer':'https://login.zbj.com/',
}
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard=True)
print 'Cookie成功加载'
except:
print "Cookie 未能加载"
def get_captcha(seed):
#t = str(int(time.time() * 1000))
captcha_url='https://login.zbj.com/login/verify?seed='+seed
r = session.get(captcha_url, headers=headers)
with open('captcha.jpg', 'wb') as f:
f.write(r.content)
f.close()
print '手动输入验证码'
captcha = raw_input("输入\n>")
return captcha
def login(username, password):
headers["X-Requested-With"] = "XMLHttpRequest"
post_url = 'https://login.zbj.com/login/dologin/'
postdata = {
'fromurl':'',
'ivkey':'',
'key': '',
'kid': '',
'password': password,
'username': username,
'l': 1,
'vf-id': '',
'vid': ''
}
seed = str(int(time.time() * 1000))
postdata["captcha"] = get_captcha(seed)
postdata['seed']=seed
print '提交表单...'
login_page = session.post(post_url, data=postdata, headers=headers).text
print login_page
print '保存cookie'
session.cookies.save()
def is_login():
url='http://u.zbj.com/mydata/baseinfo'
html=session.get(url,headers=headers).text
soup=BeautifulSoup(html)
if soup.find("label",{"class":"control-username"}) !=None: #如果登录成功
return True
else:
return False
def start():
name=raw_input('输入用户名:')
password=raw_input('输入密码:')
if is_login():
print '成功!'
else:
print '正在登录...'
login(name,password)
start()
|
|