我正在尝试从python中的众多文本文件中提取值。我要求的数字是科学记数法形式。我的结果文本文件如下
ADDITIONAL DATA
Tip Rotation (degrees)
Node , UR[x] , UR[y] , UR[z]
21 , 1.0744 , 1.2389 , -4.3271
22 , -1.0744 , -1.2389 , -4.3271
53 , 0.9670 , 1.0307 , -3.8990
54 , -0.0000 , -0.0000 , -3.5232
55 , -0.9670 , -1.0307 , -3.8990
Mean rotation variation along blade
Region , Rotation (degrees)
Partition line 0, 7.499739E-36
Partition line 1, -3.430092E-01
Partition line 2, -1.019287E+00
Partition line 3, -1.499808E+00
Partition line 4, -1.817651E+00
Partition line 5, -2.136372E+00
Partition line 6, -2.448321E+00
Partition line 7, -2.674414E+00
Partition line 8, -2.956737E+00
Partition line 9, -3.457806E+00
Partition line 10, -3.995106E+00
我过去一直在成功使用regexp,但它似乎并不想拿起这些数字。结果文件中的节点数会发生变化,因此无法按行搜索。我的python脚本如下。
import re
from pylab import *
from scipy import *
import matplotlib
from numpy import *
import numpy as np
from matplotlib import pyplot as plt
import csv
########################################
minTheta = -90
maxTheta = 0
thetaIncrements = 10
numberOfPartitions = 10
########################################
numberOfThetas = ((maxTheta - minTheta)/thetaIncrements)+1
print 'Number of thetas = '+str(numberOfThetas)
thetas = linspace(minTheta,maxTheta,numberOfThetas)
print 'Thetas = '+str(thetas)
part = linspace(1,numberOfPartitions,numberOfPartitions)
print 'Parts = '+str(part)
meanRotations = np.zeros((numberOfPartitions+1,numberOfThetas))
#print meanRotations
theta = minTheta
n=0
m=0
while theta <= maxTheta:
fileName = str(theta)+'.0.txt'
#print fileName
regexp = re.compile(r'Partition line 0, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[0,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 1, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[1,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 2, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[2,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 3, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[3,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 4, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[4,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 5, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[5,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 6, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[6,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 7, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[7,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 8, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[8,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 9, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[9,m]=(float((match.group(1))))
regexp = re.compile(r'Partition line 10, .*?([-+0-9.E]+)')
with open(fileName) as f:
for line in f:
match = regexp.match(line)
if match:
print (float((match.group(1))))
meanRotations[10,m]=(float((match.group(1))))
m=m+1
theta = theta+thetaIncrements
print 'Mean rotations on partition lines = '
print meanRotations
任何帮助都将非常感谢!!
答案 0 :(得分:4)
这种文件标准格式是一种吗?如果是这样?您可以使用其他技术获得所有浮动值。 所以,这是代码:
str = """ ADDITIONAL DATA
Tip Rotation (degrees)
Node , UR[x] , UR[y] , UR[z]
21 , 1.0744 , 1.2389 , -4.3271
22 , -1.0744 , -1.2389 , -4.3271
53 , 0.9670 , 1.0307 , -3.8990
54 , -0.0000 , -0.0000 , -3.5232
55 , -0.9670 , -1.0307 , -3.8990
Mean rotation variation along blade
Region , Rotation (degrees)
Partition line 0, 7.499739E-36
Partition line 1, -3.430092E-01
Partition line 2, -1.019287E+00
Partition line 3, -1.499808E+00
Partition line 4, -1.817651E+00
Partition line 5, -2.136372E+00
Partition line 6, -2.448321E+00
Partition line 7, -2.674414E+00
Partition line 8, -2.956737E+00
Partition line 9, -3.457806E+00
Partition line 10, -3.995106E+00
"""
arr = str.split()
for index in enumerate(arr):
print index # just to see the list
start = 59 # from this position the numbers begin
step = 4 # current number is each fourth
ar = []
for j in range(start, len(arr), step):
ar.append(arr[j])
floatAr = []
# or you can use this expression instead of the following loop
# floatAr = [float(x) for x in ar]
for n in range(len(ar)):
floatAr.append(float(ar[n]))
print floatAr
最后,您将使用所有浮点值重新显示名为 floatAr 的列表。您可以添加 try-except 块以获得更好的可用性。
或者,如果你想使用正则表达式,这里是代码:
<!--language:python -->
str = """ ADDITIONAL DATA
Tip Rotation (degrees)
Node , UR[x] , UR[y] , UR[z]
21 , 1.0744 , 1.2389 , -4.3271
22 , -1.0744 , -1.2389 , -4.3271
53 , 0.9670 , 1.0307 , -3.8990
54 , -0.0000 , -0.0000 , -3.5232
55 , -0.9670 , -1.0307 , -3.8990
Mean rotation variation along blade
Region , Rotation (degrees)
Partition line 0, 7.499739E-36
Partition line 1, -3.430092E-01
Partition line 2, -1.019287E+00
Partition line 3, -1.499808E+00
Partition line 4, -1.817651E+00
Partition line 5, -2.136372E+00
Partition line 6, -2.448321E+00
Partition line 7, -2.674414E+00
Partition line 8, -2.956737E+00
Partition line 9, -3.457806E+00
Partition line 10, -3.995106E+00"""
regex = '\s-?[1-9]+[0-9]*.?[0-9]*E-?\+?[0-9]+\s?'
import re
values = re.findall(regex, str)
floatAr = [float(x) for x in values]
print floatAr
顺便说一句,这是python pythex
的一个很好的在线正则表达式检查器答案 1 :(得分:0)
说实话,我不需要正则表达式。这样的事情应该做你需要的:
with open(fileName) as f:
for line in f:
if line.startswith('Partition line'):
number=float(line.split(',')[1])
print number # or do whatever you want with it
# read other file contents with different if clauses