我正在尝试开发一种图像模型,该模型采用分类嵌入和连续数据来帮助分类。 我添加了图像数据类来帮助提取其他数据,但现在可以使用了。
class image_Data_Dataset(Dataset):
'''
image class data set
'''
def __init__(self, data, cont, cat, transform = None):
'''
Args:
------------------------------------------------------------
data = dataframe
image = column in dataframe with absolute path to the image
cont = list of continuous data columns
cat = list of categorical data columns
policy = ID variable
'''
#data frame
self.image_frame = data
#transform
self.transform = transform
#categorical data
self.categorical = np.stack(pd.get_dummies(self.image_frame, columns=cat, drop_first=True).iloc[:, 15:].values)
#numerical data
self.features = self.image_frame[cont]
scaler = StandardScaler().fit(self.features.values)
self.numerical = scaler.transform(self.features.values)
def __len__(self):
return len(self.image_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
label = self.image_frame.iloc[idx, 15]
pic = Path(self.image_frame.iloc[idx,18])
img = Image.open(pic)
policy = self.image_frame.iloc[idx, 0]
categorical_data = self.categorical_data[idx]
numerical_data = self.numerical_data[idx]
sample = {'image': img, 'policy': policy, 'label':label, 'categorical_data': categorical_data
,'numerical_data': numerical_data}
if self.transform:
image = self.transform(img)
#return image, label, policy
return categorical_data, numerical_data, label
这是错误:
TypeError Traceback (most recent call last)
in
1 roof = image_Data_Dataset(train_test_split, cont = [‘age’, ‘aoh’,‘lmt01’, ‘totl_sq_ft_cnt’]
----> 2 , cat = [‘liablmt’, ‘deductible’, ‘dw_style_class’, ‘fnclrspn’], transform = None)
in init(self, data, cont, cat, transform)
22
23 #categorical data
—> 24 self.categorical = np.stack(pd.get_dummies(self.image_frame, columns=cat, drop_first=True).iloc[:, 15:].values)
25
26 #numerical data
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in get_dummies(data, prefix, prefix_sep, dummy_na, columns, sparse, drop_first, dtype)
864 sparse=sparse,
865 drop_first=drop_first,
–> 866 dtype=dtype)
867 return result
868
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in _get_dummies_1d(data, prefix, prefix_sep, dummy_na, sparse, drop_first, dtype)
899 # if dummy_na, we just fake a nan level. drop_first will drop it again
900 if drop_first and len(levels) == 1:
–> 901 return get_empty_frame(data)
902
903 number_of_cols = len(levels)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in get_empty_frame(data)
885 index = data.index
886 else:
–> 887 index = np.arange(len(data))
888 return DataFrame(index=index)
889
TypeError: object of type ‘function’ has no len()
这个自定义数据类让我感到困惑。有人看到我在做什么错吗?