我正在尝试从3个数据帧创建一个实体集,但这样做却出现错误:TypeError:无法散列的类型:'Int64Index'
我已经在Internet上搜索了类似的问题,但是找不到与日期时间类型有关的任何问题。请注意,df_raw_view_logs的所有列都不是唯一的,因此这些列均不能用作索引值,因此正在使用dataframe.index。
当我从中创建列作为时间索引时,我正在为其抛出错误的数据帧共享dtypes。
df_raw_view_logs.dtypes
server_time datetime64[ns]
device_type int8
session_id int64
user_id int64
item_id int64
dtype: object
es = ft.EntitySet()
es = es.entity_from_dataframe(entity_id="train",
dataframe=df_es_train,
index=df_es_train.index,
time_index="impression_time",
)
es = es.entity_from_dataframe(entity_id="viewlogs",
dataframe=df_es_view_logs,
index=df_es_view_logs.index,
time_index="server_time",
)
es = es.entity_from_dataframe(entity_id="itemdata",
dataframe=df_es_item_data,
index=df_es_item_data.index,
)
new_relationship = ft.Relationship(es["train"]["user_id"],
es["viewlogs"]["user_id"])
es = es.add_relationship(new_relationship)
new_relationship_1 = ft.Relationship(es["viewlogs"]["item_id"],
es["itemdata"]["item_id"])
es = es.add_relationship(new_relationship_1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-32-81425e9b87c5> in <module>
9 dataframe=df_es_view_logs,
10 index=df_es_view_logs.index,
---> 11 time_index="server_time",
12 )
13
D:\Anaconda3\envs\fastai\lib\site-packages\featuretools\entityset\entityset.py in entity_from_dataframe(self, entity_id, dataframe, index, variable_types, make_index, time_index, secondary_time_index, already_sorted)
495 secondary_time_index=secondary_time_index,
496 already_sorted=already_sorted,
--> 497 make_index=make_index)
498 self.entity_dict[entity.id] = entity
499 self.reset_data_description()
D:\Anaconda3\envs\fastai\lib\site-packages\featuretools\entityset\entity.py in __init__(self, id, df, entityset, variable_types, index, time_index, secondary_time_index, last_time_index, already_sorted, make_index, verbose)
67 """
68 _validate_entity_params(id, df, time_index)
---> 69 created_index, index, df = _create_index(index, make_index, df)
70
71 self.id = id
D:\Anaconda3\envs\fastai\lib\site-packages\featuretools\entityset\entity.py in _create_index(index, make_index, df)
547 # Case 3: user wanted to make index but column already exists
548 raise RuntimeError("Cannot make index: index variable already present")
--> 549 elif index not in df.columns:
550 if not make_index:
551 # Case 4: user names index, it is not in df. does not specify
D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\indexes\base.py in __contains__(self, key)
3917 @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
3918 def __contains__(self, key):
-> 3919 hash(key)
3920 try:
3921 return key in self._engine
D:\Anaconda3\envs\fastai\lib\site-packages\pandas\core\indexes\base.py in __hash__(self)
3932
3933 def __hash__(self):
-> 3934 raise TypeError("unhashable type: %r" % type(self).__name__)
3935
3936 def __setitem__(self, key, value):
TypeError: unhashable type: 'Int64Index'
答案 0 :(得分:0)
这是错误的,因为index参数应该是一个字符串,该字符串是DataFrame中作为索引的列的名称。不是索引值本身。