|
读取文件:
f = open('E:/python/data.json','r+')
f
_io.TextIOWrapper name='E:/python/data.json' mode='r+' encoding='cp936'>
使用pandas处理显示错误:
a = pd.Series(f)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-de5589147c94> in <module>()
----> 1 a = pd.Series(f)
E:\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
235 if not is_list_like(data):
236 data = [data]
--> 237 index = _default_index(len(data))
238
239 # create/copy the manager
TypeError: object of type '_io.TextIOWrapper' has no len()
使用json模块读取显示错误:
import json
a = json.load(f)
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-14-37f1afc8515a> in <module>()
----> 1 a = json.load(f)
E:\Anaconda\lib\json\__init__.py in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
297 cls=cls, object_hook=object_hook,
298 parse_float=parse_float, parse_int=parse_int,
--> 299 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
300
301
E:\Anaconda\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
E:\Anaconda\lib\json\decoder.py in decode(self, s, _w)
340 end = _w(s, end).end()
341 if end != len(s):
--> 342 raise JSONDecodeError("Extra data", s, end)
343 return obj
344
JSONDecodeError: Extra data: line 2 column 1 (char 226)
部分原始数据,使用spyder打开后的json文件显示如下:
{"msgid":"8280204259419051","msgpriority":0,"msgtext":"PTV-8698|8280204259419051|function () {}|1498722414739|257|!206!3041!0!0!1!CCTV-1 综合(高清)","receiverid":"data","senderid":"8280204259419051","subjectid":"data.stb.report"}
{"msgid":"0","msgpriority":0,"msgtext":"HC3100||ec-f4-bb-da-4c-e4|599925|257|1!5!501!0!0!2!CCTV-2 财经","receiverid":"data","senderid":"0","subjectid":"data.stb.report"}
{"msgid":"8280203295899516","msgpriority":0,"msgtext":"OTS_4K_SC|8280203295899516|00-23-b8-d6-9d-f1|139169|257|1!5!500!0!0!1!CCTV-1 综合","receiverid":"data","senderid":"8280203295899516","subjectid":"data.stb.report"}
{"msgid":"0","msgpriority":0,"msgtext":"HC3100||ec-f4-bb-da-4c-e4|747643|49|影视/剧场&logos=/poster/201705261441546372.jpg!index.html/second.html!剧场&logos=/poster/201705261441546372.jpg","receiverid":"data","senderid":"0","subjectid":"data.stb.report"}
{"msgid":"0","msgpriority":0,"msgtext":"HC3100||ec-f4-bb-da-4c-e4|751253|771|1!2591697!剧场/年度新剧!1!10","receiverid":"data","senderid":"0","subjectid":"data.stb.report"}
{"msgid":"0","msgpriority":0,"msgtext":"HC3100||ec-f4-bb-da-4c-e4|753289|772|_A1004457072!欢乐颂!01!0!0!1004457072!剧场/年度新剧!2582!1!0","receiverid":"data","senderid":"0","subjectid":"data.stb.report"}
{"msgid":"8280203295899516","msgpriority":0,"msgtext":"OTS_4K_SC|8280203295899516|00-23-b8-d6-9d-f1|182526|772|TVMA214976_A1003122726!回魂夜(香港 1995年!)!0!0!1003122726!!4734!1!0","receiverid":"data","senderid":"8280203295899516","subjectid":"data.stb.report"}
{"msgid":"0","msgpriority":0,"msgtext":"HC3100||ec-f4-bb-da-4c-e4|835216|49|影视/!index.html/!","receiverid":"data","senderid":"0","subjectid":"data.stb.report"}
{"msgid":"8510010615009789","msgpriority":0,"msgtext":"|8510010615009789|18-99-f5-ea-ed-61|1498722859743|774|1005273535!楚乔传:末路逢生(6)!湖南卫视高清!20170628!22:59:24!0!1","receiverid":"data","senderid":"8510010615009789","subjectid":"data.stb.report"}
{"msgid":"8280204143293241","msgpriority":0,"msgtext":"DVC-7078|8280204143293241||1498723024614|257|1!206!3041!0!0!1!CCTV-1 综合(高清)","receiverid":"data","senderid":"8280204143293241","subjectid":"data.stb.report"}
求助如何处理这个json文件,提取msgtext的信息,并根据‘|’和‘!’来分词,最后导入到excel文件或者csv文件格式。
|
|