在获取热图图时缺少数据的TypeError包括inf

时间:2018-12-30 02:50:39

标签: python dataframe typeerror seaborn missing-data

我编写了代码,以24x20矩阵形式通过HeatMap绘图演示了数据集(文本文件)中3个参数[速度,加速度,温度]的值。文本文件中的数据按以下顺序保存: 指针速度-加速温度 并重复了很多次,并在480次之后再次重复了第二个循环,依此类推。 所以我提取了这些主要参数并将它们放在列表中,并为每个循环中的每个参数打印并保存了csv文件,并尝试绘制每个值,还尝试绘制每个参数的缺失值(包括nan和inf),并尝试获取一个窗口中所有具有正确循环编号标题的图,并尝试计算缺失值(不仅是 nan ,还包括 inf )来提及下一个图在通过insnull获取图并用0替换缺失值的过程中(我更喜欢用上一个周期中的最后一个可用值替换Nan和Inf,或者在最佳情况下,用上一个和下一个可用周期的值替换而不是0 ),但我遇到了以下错误:

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

我想知道是否存在一种优雅的方法来分别计算列表中的inf和nan,并在每个循环中通过HeatMapisnull在绘图窗口中将它们打印出来,包括计数信息并将其保存在硬盘执行其他任务。 我的脚本如下:

import sys
import os
import numpy as np
import pandas as pd
#from sklearn.preprocessing import minmax_scale
import seaborn as sns
import matplotlib.pyplot as plt

def mkdf(ListOf480Numbers):
normalMatrix = np.array_split(ListOf480Numbers,8)
fixMatrix = []
for i in range(8):
    lines = np.array_split(normalMatrix[i],6)
    newMatrix = [0,0,0,0,0,0]
    for j in (1,3,5):
        newMatrix[j] = lines[j]
    for j in (0,2,4):
        newMatrix[j] = lines[j][::-1]
    fixMatrix.append(newMatrix) 
return fixMatrix

def print_df(fixMatrix):
values = []
for i in range(6):
    values.append([*fixMatrix[6][i], *fixMatrix[7][i]])
for i in range(6):
    values.append([*fixMatrix[4][i], *fixMatrix[5][i]])
for i in range(6):
    values.append([*fixMatrix[2][i], *fixMatrix[3][i]])
for i in range(6):
    values.append([*fixMatrix[0][i], *fixMatrix[1][i]])
df = pd.DataFrame(values)
return (df)

df = pd.read_csv('D:\me4.TXT', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
Speed = df[df.index % 4 == 1].values
Acceleration = df[df.index % 4 == 2].values
Temperature = df[df.index % 4 == 3].values
data = {'Speed': Speed[:,0], 'Acceleration': Acceleration[:,0], 'Temperature': Temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])

def plotheatmap(new_value):
Sections = mkdf(new_value)
df = print_df(Sections)
sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm')
plt.title(i, fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.savefig(f'{i}/{i}{count}.png')
plt.clf()
return

'''
Missing data plot
'''
Speed = np.array(Speed, dtype='float64')
Acceleration = np.array(Acceleration, dtype='float64')
Temperature = np.array(Temperature, dtype='float64')
df = pd.DataFrame({"Speed":[Speed],"Acceleration":[Acceleration],"Temperature":[Temperature]})
#pd.isnull(np.array([np.nan, 0], dtype=object))
pd.isnull(np.array([np.nan, 0], dtype=float))
df = df.replace(0,np.nan)
df = df.replace(1,np.inf)
plt.subplot2grid((3, 3), (2, 0), colspan=3)
plt.suptitle('Analysis of data in cycle Nr.{count}', fontsize=14, fontweight='bold')
plt.subplots_adjust(hspace=0.5, bottom=0.1)
sns.heatmap(df.isnull(),cbar=False)
plt.text(3, 4, r'nan={c}   inf={d}',
    verticalalignment='bottom', horizontalalignment='right', color='green', fontsize=15)

nan = np.array(df.isnull())
inf = np.array(df.isnull())
count_nan = len(df) - df.count()
print(count_nan)
c = 0
for i in nan:
    for j in i:
        if j:
            c += 1
print(f'Total nan: {c}')
d = 1
for i in inf:
    for j in i:
        if j:
            d += 1
print(f'Total inf: {d}')
plt.title(f'Total missing values: {c+d}', fontsize=12, color='black', 
loc='left', style='italic')
plt.axis('off')


cycles = int(len(main_data)/480)
print(f'Total cycles: {cycles}')
for i in main_data:
    for cycle in range(3):  
        count =  '{:04}'.format(cycle)
        j = cycle * 480
        ordered_data = mkdf(main_data.iloc[j:j+480][i])
        csv = print_df(ordered_data)
        csv.to_csv(f'{i}{count}.csv', header=None, index=None)    
        Sections = mkdf(ordered_data)
        df = print_df(Sections)
        plt.title(i, fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        plt.savefig(f'{i}{count}.png')
        plt.clf()
        '''
        Speed plot
        '''
        Speed_df = pd.read_csv("Speed.csv",header=None)
        plt.subplot2grid((3, 3), (0, 0), colspan=1)
        sns.heatmap(Speed_df, vmin=-1, vmax=1, cmap ="coolwarm")
        plt.title('Speed', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        '''
        Speed missing plot
        '''
        Speed_df = pd.read_csv("Speed.csv",header=None)
        Speed_df = Speed_df.replace(0,np.nan)
        plt.subplot2grid((3, 3), (1, 0), colspan=1)
        sns.heatmap(Speed_df.isnull(),cbar=False)
        plt.title(f'Speed MV: {c}', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        '''
        acceleration plot
        '''
        acc_df = pd.read_csv("acceleration.csv",header=None)
        plt.subplot2grid((3, 3), (0, 1), colspan=1)
        sns.heatmap(acc_df, vmin=-1, vmax=1, cmap ="coolwarm")
        plt.title('acceleration', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        '''
        acceleration missing plot
        '''
        acceleration_df = pd.read_csv("acceleration.csv",header=None)
        acceleratione_df = acceleration_df.replace(0,np.nan)
        plt.subplot2grid((3, 3), (1, 1), colspan=1)
        sns.heatmap(acceleration_df.isnull(),cbar=False)
        plt.title(f'accel MV: {c}', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        '''
        Temperature plot
        '''
        temp_df = pd.read_csv("temperature.csv",header=None)
        plt.subplot2grid((3, 3), (0, 2), colspan=1)
        sns.heatmap(temp_df, vmin=-40, vmax=150, cmap ="coolwarm")
        plt.title('Temperature', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        '''
        Temperature missing plot
        '''
        temperature_df = pd.read_csv("temperature.csv",header=None)
        temperature_df = temperature_df.replace(0,np.nan)
        temperature_df = temperature_df.replace(0,np.inf)
        plt.subplot2grid((3, 3), (1, 2), colspan=1)
        sns.heatmap(temperature_df.isnull(),cbar=False)
        plt.title(f'temp MV: {c}', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')    
plt.show()

我想要的结果是:

output result picture

我还提供了3个周期的数据集示例文本文件:

dataset

0 个答案:

没有答案