KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1420243200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2015-01-03 00:00:00')
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1420243200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py in _get_predict_end(self, end)
172 try:
--> 173 end = self._get_dates_loc(dates, dtend)
174 except KeyError as err: # end is greater than dates[-1]...probably
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py in _get_dates_loc(self, dates, date)
94 def _get_dates_loc(self, dates, date):
---> 95 date = dates.get_loc(date)
96 return date
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py in get_loc(self, key, method, tolerance)
1425 key = Timestamp(key, tz=self.tz)
-> 1426 return Index.get_loc(self, key, method, tolerance)
1427
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2015-01-03 00:00:00')
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-206-505c74789333> in <module>()
3 ax = price.loc['2012-01-03':].plot(ax=ax, label='observed')
4
----> 5 fig = model_fit.plot_predict('2014-01-03','2015-01-03', dynamic=False, ax=ax, plot_insample=False)
6
7 plt.show()
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in plot_predict(self, start, end, exog, dynamic, alpha, plot_insample, ax)
1885
1886 # use predict so you set dates
-> 1887 forecast = self.predict(start, end, exog, 'levels', dynamic)
1888 # doing this twice. just add a plot keyword to predict?
1889 start = self.model._get_predict_start(start, dynamic=dynamic)
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in predict(self, start, end, exog, typ, dynamic)
1808 def predict(self, start=None, end=None, exog=None, typ='linear',
1809 dynamic=False):
-> 1810 return self.model.predict(self.params, start, end, exog, typ, dynamic)
1811 predict.__doc__ = _arima_results_predict
1812
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in predict(self, params, start, end, exog, typ, dynamic)
1184 if not dynamic:
1185 predict = super(ARIMA, self).predict(params, start, end, exog,
-> 1186 dynamic)
1187
1188 start = self._get_predict_start(start, dynamic)
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in predict(self, params, start, end, exog, dynamic)
732 # will return an index of a date
733 start = self._get_predict_start(start, dynamic)
--> 734 end, out_of_sample = self._get_predict_end(end, dynamic)
735 if out_of_sample and (exog is None and self.k_exog > 0):
736 raise ValueError("You must provide exog for ARMAX")
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in _get_predict_end(self, end, dynamic)
1062 Handling of inclusiveness should be done in the predict function.
1063 """
-> 1064 end, out_of_sample = super(ARIMA, self)._get_predict_end(end, dynamic)
1065 if 'mle' not in self.method and not dynamic:
1066 end -= self.k_ar
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in _get_predict_end(self, end, dynamic)
673 def _get_predict_end(self, end, dynamic=False):
674 # pass through so predict works for ARIMA and ARMA
--> 675 return super(ARMA, self)._get_predict_end(end)
676
677 def geterrors(self, params):
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/base/tsa_model.py in _get_predict_end(self, end)
177 freq = self.data.freq
178 out_of_sample = datetools._idx_from_dates(dates[-1], dtend,
--> 179 freq)
180 else:
181 if freq is None:
~/anaconda3/lib/python3.6/site-packages/statsmodels/tsa/base/datetools.py in _idx_from_dates(d1, d2, freq)
100 return len(DatetimeIndex(start=_maybe_convert_period(d1),
101 end=_maybe_convert_period(d2),
--> 102 freq=_freq_to_pandas[freq])) - 1
103
104
~/anaconda3/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/datetimes.py in __new__(cls, data, freq, start, end, periods, copy, name, tz, verify_integrity, normalize, closed, ambiguous, dtype, **kwargs)
303
304 if data is None and freq is None:
--> 305 raise ValueError("Must provide freq argument if no data is "
306 "supplied")
307
ValueError: Must provide freq argument if no data is supplied
NaN
问题陈述: 需要一个XSLT才能从上面的Input XML转换为Output XML 输入XML的实际样本大约为10 MB,我编写了一个转换,对PayrollGroup进行了两次循环,其循环次数为n平方的O(n2)。这对我的服务器来说非常糟糕,并且抛出了内存不足异常。
有人可以提供性能更好的XSLT吗?
编辑:
以下是我正在使用的两个XSLT,一个接一个
<PayrollGroup xmlns="http://www.example.org">
<Payroll>
<EmpID>1</EmpID>
<Name>Jacob</Name>
<WeekNumber>12</WeekNumber>
<HoursType>Regular</HoursType>
<Hours>80</Hours>
<EarningsType></EarningsType>
<Earnings></Earnings>
</Payroll>
<Payroll>
<EmpID>1</EmpID>
<Name>Jacob</Name>
<WeekNumber>12</WeekNumber>
<HoursType></HoursType>
<Hours></Hours>
<EarningsType>Regular</EarningsType>
<Earnings>800.00</Earnings>
</Payroll>
<Payroll>
<EmpID>2</EmpID>
<Name>John</Name>
<WeekNumber>12</WeekNumber>
<HoursType></HoursType>
<Hours></Hours>
<EarningsType>Regular</EarningsType>
<Earnings>1000.00</Earnings>
</Payroll>
<Payroll>
<EmpID>3</EmpID>
<Name>Augira</Name>
<WeekNumber>12</WeekNumber>
<HoursType>Other</HoursType>
<Hours>12</Hours>
<EarningsType></EarningsType>
<Earnings></Earnings>
</Payroll>
<Payroll>
<EmpID>4</EmpID>
<Name>Satya</Name>
<WeekNumber>12</WeekNumber>
<HoursType>SMT</HoursType>
<Hours>40</Hours>
<EarningsType></EarningsType>
<Earnings></Earnings>
</Payroll>
<Payroll>
<EmpID>4</EmpID>
<Name>Satya</Name>
<WeekNumber>12</WeekNumber>
<HoursType></HoursType>
<Hours></Hours>
<EarningsType>SMT</EarningsType>
<Earnings>600.00</Earnings>
</Payroll>
</PayrollGroup>
之后,下面的XSLT删除重复项
<PayrollGroup xmlns="http://www.example.org">
<Payroll>
<EmpID>1</EmpID>
<Name>Jacob</Name>
<WeekNumber>12</WeekNumber>
<HoursType>Regular</HoursType>
<Hours>80</Hours>
<EarningsType>Regular</EarningsType>
<Earnings>800.00</Earnings>
</Payroll>
<Payroll>
<EmpID>2</EmpID>
<Name>John</Name>
<WeekNumber>12</WeekNumber>
<HoursType></HoursType>
<Hours></Hours>
<EarningsType>Regular</EarningsType>
<Earnings>1000.00</Earnings>
</Payroll>
<Payroll>
<EmpID>3</EmpID>
<Name>Augira</Name>
<WeekNumber>12</WeekNumber>
<HoursType>Other</HoursType>
<Hours>12</Hours>
<EarningsType></EarningsType>
<Earnings></Earnings>
</Payroll>
<Payroll>
<EmpID>4</EmpID>
<Name>Satya</Name>
<WeekNumber>12</WeekNumber>
<HoursType>SMT</HoursType>
<Hours>40</Hours>
<EarningsType>SMT</EarningsType>
<Earnings>600.00</Earnings>
</Payroll>
</PayrollGroup>
答案 0 :(得分:0)
目前尚不清楚您是在使用一个还是两个单独的XSLT样式表。但是,您只需要一个,在XSLT 2.0中删除重复项的方法是使用xsl:for-each-group
。所以,不要这样做...
<xsl:for-each select="/ns0:PayrollGroup/ns0:Payroll">
执行此操作...
<xsl:for-each-group select="/ns0:PayrollGroup/ns0:Payroll" group-by="ns0:EmpID">
除此之外,以下行(及类似内容)可能会影响性能:
<xsl:value-of select="/ns0:PayrollGroup/ns0:Payroll[ns0:EmpID = $empId and ns0:HoursType = $earningsType]/ns0:HoursType"/>
这是因为他们必须在整个XML文档中搜索匹配项。在使用xsl:for-each-group
时,您可以将搜索限制为仅当前组(即Payroll
具有相同EmpID
的元素)
<xsl:value-of select="current-group()[ns0:HoursType = $earningsType]/ns0:HoursType"/>
尝试使用此XSLT
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:ns0="http://www.example.org"
version="2.0">
<xsl:key name="Emps" match="ns0:Payroll" use="ns0:EmpID" />
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:output method="xml" indent="yes" />
<xsl:template match="/">
<ns0:PayrollGroup>
<xsl:for-each-group select="/ns0:PayrollGroup/ns0:Payroll" group-by="ns0:EmpID">
<xsl:variable name="empId" select="ns0:EmpID"/>
<xsl:variable name="earningsType" select="ns0:EarningsType"/>
<xsl:variable name="hoursType" select="ns0:HoursType"/>
<ns0:Payroll>
<ns0:EmpID>
<xsl:value-of select="ns0:EmpID"/>
</ns0:EmpID>
<ns0:Name>
<xsl:value-of select="ns0:Name"/>
</ns0:Name>
<ns0:WeekNumber>
<xsl:value-of select="ns0:WeekNumber"/>
</ns0:WeekNumber>
<xsl:choose>
<xsl:when test="ns0:HoursType = '' and ns0:Hours ='' and ns0:EarningsType !='' and ns0:Earnings !='' ">
<ns0:HoursType>
<xsl:value-of select="current-group()[ns0:HoursType = $earningsType]/ns0:HoursType"/>
</ns0:HoursType>
<ns0:Hours>
<xsl:value-of select="current-group()[ns0:HoursType = $earningsType]/ns0:Hours"/>
</ns0:Hours>
<ns0:EarningsType>
<xsl:value-of select="ns0:EarningsType"/>
</ns0:EarningsType>
<ns0:Earnings>
<xsl:value-of select="ns0:Earnings"/>
</ns0:Earnings>
</xsl:when>
<xsl:when test="ns0:HoursType != '' and ns0:Hours !='' and ns0:EarningsType ='' and ns0:Earnings ='' ">
<ns0:HoursType>
<xsl:value-of select="ns0:HoursType"/>
</ns0:HoursType>
<ns0:Hours>
<xsl:value-of select="ns0:Hours"/>
</ns0:Hours>
<ns0:EarningsType>
<xsl:value-of select="current-group()[ns0:EarningsType = $hoursType]/ns0:EarningsType"/>
</ns0:EarningsType>
<ns0:Earnings>
<xsl:value-of select="current-group()[ns0:EarningsType = $hoursType]/ns0:Earnings"/>
</ns0:Earnings>
</xsl:when>
<xsl:otherwise>
<ns0:HoursType>
<xsl:value-of select="ns0:HoursType"/>
</ns0:HoursType>
<ns0:Hours>
<xsl:value-of select="ns0:Hours"/>
</ns0:Hours>
<ns0:EarningsType>
<xsl:value-of select="ns0:EarningsType"/>
</ns0:EarningsType>
<ns0:Earnings>
<xsl:value-of select="ns0:Earnings"/>
</ns0:Earnings>
</xsl:otherwise>
</xsl:choose>
</ns0:Payroll>
</xsl:for-each-group>
</ns0:PayrollGroup>
</xsl:template>
</xsl:stylesheet>