|
爬虫模拟登录我校教务网站,遇到问题,各位大神帮忙看看,先上代码为敬:
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import urllib2
- import urllib
- import httplib
- import cookielib
- from bs4 import BeautifulSoup
- cookie = cookielib.CookieJar()
- handler=urllib2.HTTPCookieProcessor(cookie)
- opener=urllib2.build_opener(handler)
- headers={
- 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0',
- 'Referer':'http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fyjsxt.xidian.edu.cn%2Flogin.jsp',
- 'Host':'ids.xidian.edu.cn',
- 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Content-Type':'application/x-www-form-urlencoded'
- }
- #获取登录页面中hidden的可变的post数据
- url='http://yjsxt.xidian.edu.cn'
- request = urllib2.Request(url)
- res = urllib2.urlopen(request)
- login_res=res.read()
- soup=BeautifulSoup(login_res)
- lt=soup.find('input',{'type':'hidden','name':'lt'})['value']
- execution=soup.find('input',{'type':'hidden','name':'execution'})['value']
- #action=soup.find('form',{'id':'casLoginForm','class':'fm-v clearfix'})['action']
- print lt
- print execution
- #print action
- postdata=urllib.urlencode({
- 'username':'*****',
- 'password':'********',
- 'submit':'登录',
- '_eventId':'submit',
- 'rmShown':'1',
- 'lt':lt,
- 'execution':execution
- })
- #myurl='http://ids.xidian.edu.cn/authserver/login;jsessionid=json_id?service=http://yjsxt.xidian.edu.cn/login.jsp'
- myurl='http://ids.xidian.edu.cn/authserver/login'
- req=urllib2.Request(myurl,postdata,headers=headers)
- try:
- result=opener.open(req)
- #print result.geturl()
- text=result.read()
- #for item in cookie:
- # print item
- result1=opener.open('http://yjsxt.xidian.edu.cn/student/index.jsp')
- text1=result1.read()
- except Exception, e:
- raise e
- f=open('gr.html','w')
- f.write(text)
- f1=open('gr1.html','w')
- f1.write(text1)
- f.close()
- f1.close()
复制代码 用httpFox查看登陆过程:
action为提交的url,里面含有jsessionid,不知道如何处理,和cookie当中的JSESSIONID值一样,两次提交都返回到了登录页面,没法进入本人主页,各位爬虫大大给点意见,谢谢!
|
|