我正在尝试索引由3列代表的日期时间(年,年和2400小时)。
2014,323,1203,47.77,320.9
2014,323,1204,48.46,402.6
2014,323,1205,49.2,422.7
2014,323,1206,49.82,432.4
2014,323,1207,50.03,438.6
2014,323,1208,50.15,445.4
2014,323,1209,50.85,449.7
2014,323,1210,50.85,454.4
2014,323,1211,50.85,458.1
2014,323,1212,50.91,460.2
我使用以下代码:
In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
In [2]:
def parse(yr, yearday, hrmn):
date_string = ' '.join([yr, yearday, hrmn])
print(date_string)
return datetime.strptime(date_string,"%Y %j %H%M")
In [3]:
df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]},
date_parser=parse, index_col='datetime', header=None)
当数据存在缺陷(有关DST更改的额外数据)时,我已成功将其引入,现在它已修复(删除并重新拼接)我遇到此错误(完整):
2014 92 2355
2014 92 2356
2014 92 2357
2014 92 2358
2014 92 2359
2014 92 2400
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-9c710834ee23> in <module>()
1
----> 2 df = pd.read_csv('home_prepped.dat', parse_dates={'datetime':[0,1,2]}, date_parser=parse, index_col='datetime', header=None)
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in read(self, nrows)
1209 data = dict((k, v) for k, (i, v) in zip(names, data))
1210
-> 1211 names, data = self._do_date_conversions(names, data)
1212 index, names = self._make_index(data, alldata, names)
1213
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _do_date_conversions(self, names, data)
1033 data, names = _process_date_conversion(
1034 data, self._date_conv, self.parse_dates, self.index_col,
-> 1035 self.index_names, names, keep_date_col=self.keep_date_col)
1036
1037 return names, data
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _process_date_conversion(data_dict, converter, parse_spec, index_col, index_names, columns, keep_date_col)
2100
2101 _, col, old_names = _try_convert_dates(converter, colspec,
-> 2102 data_dict, orig_names)
2103
2104 new_data[new_name] = col
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _try_convert_dates(parser, colspec, data_dict, columns)
2132 to_parse = [data_dict[c] for c in colnames if c in data_dict]
2133
-> 2134 new_col = parser(*to_parse)
2135 return new_name, new_col, colnames
2136
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in converter(*date_cols)
2048 dayfirst=dayfirst)
2049 except Exception:
-> 2050 return generic_parser(date_parser, *date_cols)
2051
2052 return converter
/Volumes/anaconda/anaconda/lib/python2.7/site-packages/pandas/io/date_converters.pyc in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-2-57e18ddd7deb> in parse(yr, yearday, hrmn)
1 def parse(yr, yearday, hrmn):
2 date_string = ' '.join([yr, yearday, hrmn])
----> 3 return datetime.strptime(date_string,"%Y %j %H%M")
/Volumes/anaconda/anaconda/python.app/Contents/lib/python2.7/_strptime.pyc in _strptime(data_string, format)
326 if len(data_string) != found.end():
327 raise ValueError("unconverted data remains: %s" %
--> 328 data_string[found.end():])
329
330 year = None
ValueError: unconverted data remains: 0
我正在寻找有关如何调试或解决此问题的建议。我已经浏览了数据,根据我在类似帖子中读到的内容,我应该寻找无关的时间数据,而不是那里。
感谢。