Pytorch自定义数据集不返回表格数据

时间:2019-11-16 23:58:50

标签: python pytorch

我正在尝试开发一种图像模型,该模型采用分类嵌入和连续数据来帮助分类。 我添加了图像数据类来帮助提取其他数据,但现在可以使用了。

class image_Data_Dataset(Dataset):
    '''
    image class data set   

    '''
    def __init__(self, data, cont, cat, transform = None):
        '''
        Args:
        ------------------------------------------------------------
            data = dataframe
            image = column in dataframe with absolute path to the image
            cont = list of continuous data columns
            cat = list of categorical data columns
            policy = ID variable

        '''
        #data frame
        self.image_frame = data

        #transform
        self.transform = transform

        #categorical data
        self.categorical = np.stack(pd.get_dummies(self.image_frame, columns=cat, drop_first=True).iloc[:, 15:].values)

        #numerical data
        self.features = self.image_frame[cont]
        scaler = StandardScaler().fit(self.features.values)
        self.numerical = scaler.transform(self.features.values)


    def __len__(self):
        return len(self.image_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()


        label = self.image_frame.iloc[idx, 15]
        pic = Path(self.image_frame.iloc[idx,18])
        img = Image.open(pic)
        policy = self.image_frame.iloc[idx, 0]
        categorical_data = self.categorical_data[idx]
        numerical_data = self.numerical_data[idx]
        sample = {'image': img, 'policy': policy, 'label':label, 'categorical_data': categorical_data
                  ,'numerical_data': numerical_data}


        if self.transform:
            image = self.transform(img)

        #return image, label, policy
        return categorical_data, numerical_data, label

这是错误:

TypeError Traceback (most recent call last)
in 
1 roof = image_Data_Dataset(train_test_split, cont = [‘age’, ‘aoh’,‘lmt01’, ‘totl_sq_ft_cnt’]
----> 2 , cat = [‘liablmt’, ‘deductible’, ‘dw_style_class’, ‘fnclrspn’], transform = None)
in init(self, data, cont, cat, transform)
22
23 #categorical data
—> 24 self.categorical = np.stack(pd.get_dummies(self.image_frame, columns=cat, drop_first=True).iloc[:, 15:].values)
25
26 #numerical data
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in get_dummies(data, prefix, prefix_sep, dummy_na, columns, sparse, drop_first, dtype)
864 sparse=sparse,
865 drop_first=drop_first,
–> 866 dtype=dtype)
867 return result
868
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in _get_dummies_1d(data, prefix, prefix_sep, dummy_na, sparse, drop_first, dtype)
899 # if dummy_na, we just fake a nan level. drop_first will drop it again
900 if drop_first and len(levels) == 1:
–> 901 return get_empty_frame(data)
902
903 number_of_cols = len(levels)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\reshape.py in get_empty_frame(data)
885 index = data.index
886 else:
–> 887 index = np.arange(len(data))
888 return DataFrame(index=index)
889
TypeError: object of type ‘function’ has no len()

这个自定义数据类让我感到困惑。有人看到我在做什么错吗?

0 个答案:

没有答案