|
今天写了一个下载MP3的爬虫,显示歌曲名的时候出现乱码求助各路大神,帮我看看什么原因,谢谢!
其他地方输出都正常,就有一部分是乱码
代码在下面:
#!/usr/bin/env python
#coding:utf-8
__author__ = 'Administrator'
import urllib
import re
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def get_mp3(html):
reg = r'src="(.*?\.(wav|mp3))" data'
mp3re = re.compile(reg)
mp3list = re.findall(mp3re,html)
for mp3url in mp3list:
print mp3url[0]
def get_mp3_name(html):
re_MP3_name = r'songinfo="(.*?)"'
MP3_name_re = re.compile(re_MP3_name)
MP3_name_list = re.findall(MP3_name_re,html)
for i in MP3_name_list:
print i[0:4]
print i[4:-20]
html = getHtml('http://tx3.163.com/music/')
get_mp3(html)
get_mp3_name(html)
|
|