Question

我需要绘制numeric变量的直方图，以确定它们的分布是否偏斜。下面是函数定义，以及被调用的函数。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys

def variable_type(df, nominal_level = 3):
    categorical, numeric, nominal = [],[],[]
    for variable in df.columns.values:
        if np.issubdtype(np.array(df[variable]).dtype, int) or np.issubdtype(np.array(df[variable]).dtype, float): #if srray variable is of type int or float
            if len(np.unique(np.array(df[variable]))) <= nominal_level:
                nominal.append(variable)
            else:
                numeric.append(variable)
        else:
            categorical.append(variable)
    return numeric,categorical,nominal
def draw_histograms(df, variables, n_rows, n_cols):
    fig = plt.figure()
    import math
    for i in range(min(n_rows * n_cols, len(variables))):
        index = n_rows * 100 + n_cols * 10 + i + 1
        ax = fig.add_subplot(index)
        df[variables[i]].hist(bins = 20, ax = ax)
        plt.title(variables[i]+' distribution')
        #plt.xlabel(variables[i])
        #plt.ylabel('Count')
    plt.show()

def main():
    df = read_data()
    col_names = df.columns.tolist()
    numeric,categorical,nominal = variable_type(df) 
    util.draw_histograms(df, numeric, 3, 3)
if __name__ == "__main__":
    main()

我的程序仅在我在调用函数中使用3 {3 n_rows和n_cols时才有效，这是一个问题，因为它只绘制了20个变量中的9个。如果我尝试其他任何数字，我会获得ValueError: num must be 1 <= num <= 18, not 0或其他范围，具体取决于我选择的n_rows和n_cols。如何将所有20个数字变量绘制为一个图上的子图？或者我应该把它分成不同的数字？这是我的数据框的示例。

   TARGET_B      ID  GiftCnt36  GiftCntAll  GiftCntCard36  GiftCntCardAll  \
0         0   14974          2           4              1               3   
1         0    6294          1           8              0               3   
2         1   46110          6          41              3              20   
3         1  185937          3          12              3               8   
4         0   29637          1           1              1               1   

   GiftAvgLast  GiftAvg36  GiftAvgAll  GiftAvgCard36      ...       \
0           17      13.50        9.25          17.00      ...        
1           20      20.00       15.88            NaN      ...        
2            6       5.17        3.73           5.00      ...        
3           10       8.67        8.50           8.67      ...        
4           20      20.00       20.00          20.00      ...        

   PromCntCardAll  StatusCat96NK  StatusCatStarAll  DemCluster  DemAge  \
0              13              A                 0           0     NaN   
1              24              A                 0          23      67   
2              22              S                 1           0     NaN   
3              16              E                 1           0     NaN   
4               6              F                 0          35      53   

   DemGender  DemHomeOwner  DemMedHomeValue DemPctVeterans  DemMedIncome  
0          F             U              $0               0           $0   
1          F             U        $186,800              85           $0   
2          M             U         $87,600              36      $38,750   
3          M             U        $139,200              27      $38,942   
4          M             U        $168,100              37      $71,509

Answer 1

您的第10个属性中有NaN。你的代码可以处理这个吗？你是否绘制了第10个属性？

难以显示每个变量的直方图

1 个答案: