|
- #coding=utf-8
- import re
- def seek(pattern, data):
- """数据格式有点类似,‘abaabcabmn’,以‘ab’为格式头,在这是pattern,
- 内容为‘a, c, mn’
- """
- # 用来处理换行
- pattern = pattern.replace('\n', ' ')
- data = data.replace('\n', ' ')
-
- # 把所有格式头均寻找出来
- match = re.finditer(pattern,data, re.S)
- # 存放所有的格式头索引
- head_index = []
- for m in match:
- head_index.append(m.start())
- # 格式头+格式头长度为内容开始,到一下个的格式头为结束
- content = []
- head_len = len(pattern.replace('\\', '')) # 注意在pattern为正则时,\为
- # 转义;但在len里,却为算是一
- # 个长度
- # 抠取内容
- i = 0
- while i < (len(head_index) - 1):
- content.append(data[(head_index[i]+head_len) : (head_index[i+1])])
- i += 1
- content.append(data[(head_index[i]+head_len) : ]) # 结尾的抠取
- return content
-
- file = open('temp.txt', 'r').read()
- pattern = r"""ITEM: TIMESTEP
- 50000
- ITEM: NUMBER OF ATOMS
- 10
- ITEM: BOX BOUNDS pp pp pp
- 5\.4707499220706715e\+01 1\.3968300077929459e\+02
- 8\.5261969777867606e\+01 1\.7334823022213180e\+02
- -7\.9500500000000000e\+02 7\.9500500000000000e\+02
- ITEM: ATOMS id type xs ys zs
- """
- print seek(pattern, file)
复制代码 |
|