本帖最后由 wblblbl 于 2017-12-6 15:06 编辑
具体代码如下,是用来链接到查找同义词库网站Thesaurus.com的一段代码。。我是把他做到一个py文件里面,在terminal里面用python指令运行。但是没有出结果,不知道具体怎么使用,代码是从一个博客上粘过来的,也没有说明运行方法。求大神帮帮忙
# -*- coding: utf-8 -*-
import time
import urllib2
from urllib2 import urlopen
import re
import cookielib, urllib2
from cookielib import CookieJar
import datetime
import sqlite3
from bs4 import BeautifulSoup
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
conn = sqlite3.connect('English_Dict.db')
print "Opened database successfully!!"
c = conn.cursor()
#c.execute("SELECT * FROM EnglishDB")
rows = c.fetchall()
WordArray =[]
SentimentArray = []
for row in rows:
WordArray.append(row[0])
SentimentArray.append(row[1])
print 'Got values from the Database and filled the Array!!'
#WordArray = ['good']
#SentimentArray =[1]
count = 0
def main():
count=0
while count < len(WordArray):
try:
startingWord =str(WordArray[count]).encode('utf-8')
startingWordVal =SentimentArray[count]
val =abs(int(SentimentArray[count]))/int(SentimentArray[count])
except Exception, e:
print str(e)
print 'Can not decode to aUnicode. Ignoring the word'
#print startingWord,startingWordVal, val
page ='http://thesaurus.com/browse/' + startingWord
sourceCode =opener.open(page).read()
masterNym =sourceCode.split('<div id="synonyms-')
x = 1
while (x <len(masterNym)):
#print x
string =str(BeautifulSoup(masterNym[x], 'html.parser'))
string = ''.join(string.split())
#print string
try:
synoNymSplit =re.findall(r'<div\sclass=\"heading-rowsynonyms-heading\">(.*?)<div\sid=\"filter', string)[0]
synoNyms =re.findall(r'text\">(.*?)</span>', synoNymSplit)
SynSentimentValues =re.findall(r'relevant\-(\d?)', synoNymSplit)
SynComplexity =re.findall(r'complexity\=\"(\d?)', synoNymSplit)
SynLengths = re.findall(r'length\=\"(\d?)',synoNymSplit)
i=-1
for each Syn insynoNyms:
i +=1
query = 'SELECT *FROM EnglishDB WHERE Word =?'
c.execute(query,[(eachSyn.decode('utf-8'))])
data =c.fetchone()
if data isNone:
print 'Not in DB.Adding the Synonym to DB'
c.execute("INSERT INTO EnglishDB (Word, SentimentValue,ComplexityFactor, LengthFactor) VALUES (?,?,?,?)",(eachSyn.decode('utf-8'), val*int(SynSentimentValues),int(SynComplexity), int(SynLengths)))
conn.commit()
WordArray.append(eachSyn.decode('utf-8'))
SentimentArray.append(val*int(SynSentimentValues))
else:
print 'Synonymalready present'
except Exception, e:
print str(e)
print "No Synonym onthis page"
try:
antoNymSplit =re.findall(r'\<section\sclass=\"container\-info\santonyms\">(.*?)<div\sclass=\"citation\">',string)[0]
antoNyms =re.findall(r'text\">(.*?)</span>', antoNymSplit)
AntSentimentValues =re.findall(r'relevant\-\-(\d?)', antoNymSplit)
AntComplexity =re.findall(r'complexity\=\"(\d?)', antoNymSplit)
AntLengths =re.findall(r'length\=\"(\d?)', antoNymSplit)
j=-1
for each Ant inantoNyms:
j +=1
query = "SELECT* from EnglishDB where Word =?"
c.execute(query,[(eachAnt.decode('utf-8'))])
data =c.fetchone()
if data isNone:
print 'Not in DB.Adding the AntoNym to DB'
c.execute("INSERT INTO EnglishDB(Word, SentimentValue, ComplexityFactor, LengthFactor) VALUES (?,?,?,?)",(eachAnt.decode('utf-8'), -val*int(AntSentimentValues[j]),int(AntComplexity[j]), int(AntLengths[j])))
conn.commit()
WordArray.append(eachAnt.decode('utf-8'))
SentimentArray.append(-val*int(AntSentimentValues[j]))
else:
print 'Antonymalready present'
except Exception, e:
print str(e)
print "No Antonymson this page"
x +=1
count +=1
#print count
main()
|