有什么方法可以简化以下代码,使其看起来更清晰,更优雅?
以下代码使用Linq和regex返回在文本集合中找到的值的集合:
IEnumerable<double> _results = pages.Select(result => {
Regex _regex = new Regex("<my regex here>", RegexOptions.None);
MatchCollection _matches = _regex.Matches(result);
double _number = 0.0;
foreach (Match _match in _matches) {
if (_match.Groups["value"].Value.Contains("("))
break;
else
double.TryParse(_match.Groups["value"].Value, out _number);
}
return _number;
});
如您所见,正则表达式很棘手,它基本上是在满足条件之前返回每个文本中找到的最后一个值,这就是期望的结果。
您如何简化先前的代码以求美观?内存和CPU利用率不是问题。
答案 0 :(得分:1)
我会这样做,如果我能正确理解您的代码,则此语法在C#7.0中具有内联import numpy as np
from numpy.lib.recfunctions import append_fields
import matplotlib.pyplot as plt
data_y = np.genfromtxt('data/housing-and-development-board-resale-price-index-1q2009-100-quarterly.csv',
names=True,
dtype=None,
delimiter=",",
missing_values='na,-',
filling_values=-1,
encoding=None)
# check if data load correctly
print(data_y)
years = []
for quarter in data_y['quarter']:
year, q = quarter.split('-') # the new column name is year
years.append(int(year))
years = np.array(years)
data_y = append_fields(data_y, 'year', years)
print(data_y)
# is there a way to make the following of 29 extractions more elegant?
data_1990 = data_y[data_y['year']==1990]
data_1991 = data_y[data_y['year']==1991]
data_1992 = data_y[data_y['year']==1992]
data_1993 = data_y[data_y['year']==1993]
data_1994 = data_y[data_y['year']==1994]
data_1995 = data_y[data_y['year']==1995]
data_1996 = data_y[data_y['year']==1996]
data_1997 = data_y[data_y['year']==1997]
data_1998 = data_y[data_y['year']==1998]
data_1999 = data_y[data_y['year']==1999]
data_2000 = data_y[data_y['year']==2000]
data_2001 = data_y[data_y['year']==2001]
data_2002 = data_y[data_y['year']==2002]
data_2003 = data_y[data_y['year']==2003]
data_2004 = data_y[data_y['year']==2004]
data_2005 = data_y[data_y['year']==2005]
data_2006 = data_y[data_y['year']==2006]
data_2007 = data_y[data_y['year']==2007]
data_2008 = data_y[data_y['year']==2008]
data_2009 = data_y[data_y['year']==2009]
data_2010 = data_y[data_y['year']==2010]
data_2011 = data_y[data_y['year']==2011]
data_2012 = data_y[data_y['year']==2012]
data_2013 = data_y[data_y['year']==2013]
data_2014 = data_y[data_y['year']==2014]
data_2015 = data_y[data_y['year']==2015]
data_2016 = data_y[data_y['year']==2016]
data_2017 = data_y[data_y['year']==2017]
data_2018 = data_y[data_y['year']==2018]
# is there a way to make the following of 29 extractions more elegant?
data_90 = data_1990['index']
data_91 = data_1991['index']
data_92 = data_1992['index']
data_93 = data_1993['index']
data_94 = data_1994['index']
data_95 = data_1995['index']
data_96 = data_1996['index']
data_97 = data_1997['index']
data_98 = data_1998['index']
data_99 = data_1999['index']
data_00 = data_2000['index']
data_01 = data_2001['index']
data_02 = data_2002['index']
data_03 = data_2003['index']
data_04 = data_2004['index']
data_05 = data_2005['index']
data_06 = data_2006['index']
data_07 = data_2007['index']
data_08 = data_2008['index']
data_09 = data_2009['index']
data_10 = data_2010['index']
data_11 = data_2011['index']
data_12 = data_2012['index']
data_13 = data_2013['index']
data_14 = data_2014['index']
data_15 = data_2015['index']
data_16 = data_2016['index']
data_17 = data_2017['index']
data_18 = data_2018['index']
data_combined = np.empty([len(year), 29])
for i in range(len(data_90)):
data_combined[i] = np.array([data_90[i], data_91[i], data_92[i], data_93[i], data_94[i], data_95[i], data_96[i],
data_97[i], data_98[i], data_99[i], data_00[i], data_01[i], data_02[i], data_03[i],
data_04[i], data_05[i], data_06[i], data_07[i], data_08[i], data_09[i], data_10[i],
data_11[i], data_12[i], data_13[i], data_14[i], data_15[i], data_16[i], data_17[i],
data_18[i]])
# is there a way to make the following of 29 extractions of labels more elegant?
labels = np.array(['1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
'2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', ' 2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018'])
boxprops = dict(linestyle='-', linewidth=2, color='blue')
flierprops = dict(marker='o', markerfacecolor='green', markersize=8)
medianprops = dict(linewidth=2, color='red')
plt.figure(figsize=(60, 60))
plt.title('Movement of Resale Price Index (RPI)', fontsize=15, weight='bold')
plt.boxplot(data_combined, labels=labels, flierprops=flierprops, medianprops=medianprops, boxprops=boxprops)
plt.ylabel('Resale Price Index (RPI)', labelpad=20, fontsize=12)
plt.xlabel('Years', labelpad=20, fontsize=12)
plt.show()
变量声明是有效的:
out
答案 1 :(得分:1)
尽管bobince's advice关于正则表达式和HTML :),这是基于正则表达式的解决方案:
.NET的正则表达式引擎可以向后移动,因此我们可以利用它并让rex在>中找到最后一个括号中的值最近的数字(使用悲观匹配器import UIKit
class cell1: UICollectionViewCell {
@IBOutlet var BB: UIButton!
override func awakeFromNib() {
super.awakeFromNib()
BB.layer.cornerRadius = 10
}
func congigureCell(EditLater: Ques){
BB.setImage(UIImage(named: EditLater.LIMG), for: .normal)
}
}
)
.*?
这是“匹配并命名>之间的数字,然后是所有字符中最短的数字,然后是()之间的数字”-根据需要进行调整
>(?<v>[,.0-9]+)<.*?\([.0-9]+\)
例如:
答案 2 :(得分:1)
除了@ dan-d的答案外,这也许是最简单易读且更优雅的代码:
double[] _results = _pages
.Select(page => _regex.Matches(page).Cast<Match>().Select(value => value.Groups["value"].Value))
.Select(value => value.TakeWhile(condition => !condition.Contains("(")).Last())
.Select(number => double.TryParse(number, out double _result) ? _result : _result)
.ToArray();
第一个 select 遍历所有数据页,并使用正则表达式返回具有所有找到的值的数组。第二个 select 为每个页面找到条件之前的最后一个值(该值是否带有括号);而最终的 select 会评估结果,并返回一个双精度数组。
最后,在遵循@ caius-jard的建议之后,改进正则表达式现在返回一个值,因此将linq语句进一步简化为以下内容:
double[] _results = _pages
.Select(page => _regex.Matches(page).Cast<Match>().Select(value => value.Groups["value"].Value).First())
.Select(number => double.TryParse(number, out double _result) ? _result : _result)
.ToArray();