我编写了代码,以24x20矩阵形式通过HeatMap绘图演示了数据集(文本文件)中3个参数[速度,加速度,温度]的值。文本文件中的数据按以下顺序保存:
指针速度-加速温度
并重复了很多次,并在480次之后再次重复了第二个循环,依此类推。
所以我提取了这些主要参数并将它们放在列表中,并为每个循环中的每个参数打印并保存了csv文件,并尝试绘制每个值,还尝试绘制每个参数的缺失值(包括nan和inf),并尝试获取一个窗口中所有具有正确循环编号标题的图,并尝试计算缺失值(不仅是 nan ,还包括 inf )来提及下一个图在通过insnull
获取图并用0替换缺失值的过程中(我更喜欢用上一个周期中的最后一个可用值替换Nan和Inf,或者在最佳情况下,用上一个和下一个可用周期的值替换而不是0 ),但我遇到了以下错误:
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
我想知道是否存在一种优雅的方法来分别计算列表中的inf和nan,并在每个循环中通过HeatMap
和isnull
在绘图窗口中将它们打印出来,包括计数信息并将其保存在硬盘执行其他任务。
我的脚本如下:
import sys
import os
import numpy as np
import pandas as pd
#from sklearn.preprocessing import minmax_scale
import seaborn as sns
import matplotlib.pyplot as plt
def mkdf(ListOf480Numbers):
normalMatrix = np.array_split(ListOf480Numbers,8)
fixMatrix = []
for i in range(8):
lines = np.array_split(normalMatrix[i],6)
newMatrix = [0,0,0,0,0,0]
for j in (1,3,5):
newMatrix[j] = lines[j]
for j in (0,2,4):
newMatrix[j] = lines[j][::-1]
fixMatrix.append(newMatrix)
return fixMatrix
def print_df(fixMatrix):
values = []
for i in range(6):
values.append([*fixMatrix[6][i], *fixMatrix[7][i]])
for i in range(6):
values.append([*fixMatrix[4][i], *fixMatrix[5][i]])
for i in range(6):
values.append([*fixMatrix[2][i], *fixMatrix[3][i]])
for i in range(6):
values.append([*fixMatrix[0][i], *fixMatrix[1][i]])
df = pd.DataFrame(values)
return (df)
df = pd.read_csv('D:\me4.TXT', header=None)
id_set = df[df.index % 4 == 0].astype('int').values
Speed = df[df.index % 4 == 1].values
Acceleration = df[df.index % 4 == 2].values
Temperature = df[df.index % 4 == 3].values
data = {'Speed': Speed[:,0], 'Acceleration': Acceleration[:,0], 'Temperature': Temperature[:,0]}
main_data = pd.DataFrame(data, columns=['Speed','Acceleration','Temperature'], index = id_set[:,0])
def plotheatmap(new_value):
Sections = mkdf(new_value)
df = print_df(Sections)
sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm')
plt.title(i, fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.savefig(f'{i}/{i}{count}.png')
plt.clf()
return
'''
Missing data plot
'''
Speed = np.array(Speed, dtype='float64')
Acceleration = np.array(Acceleration, dtype='float64')
Temperature = np.array(Temperature, dtype='float64')
df = pd.DataFrame({"Speed":[Speed],"Acceleration":[Acceleration],"Temperature":[Temperature]})
#pd.isnull(np.array([np.nan, 0], dtype=object))
pd.isnull(np.array([np.nan, 0], dtype=float))
df = df.replace(0,np.nan)
df = df.replace(1,np.inf)
plt.subplot2grid((3, 3), (2, 0), colspan=3)
plt.suptitle('Analysis of data in cycle Nr.{count}', fontsize=14, fontweight='bold')
plt.subplots_adjust(hspace=0.5, bottom=0.1)
sns.heatmap(df.isnull(),cbar=False)
plt.text(3, 4, r'nan={c} inf={d}',
verticalalignment='bottom', horizontalalignment='right', color='green', fontsize=15)
nan = np.array(df.isnull())
inf = np.array(df.isnull())
count_nan = len(df) - df.count()
print(count_nan)
c = 0
for i in nan:
for j in i:
if j:
c += 1
print(f'Total nan: {c}')
d = 1
for i in inf:
for j in i:
if j:
d += 1
print(f'Total inf: {d}')
plt.title(f'Total missing values: {c+d}', fontsize=12, color='black',
loc='left', style='italic')
plt.axis('off')
cycles = int(len(main_data)/480)
print(f'Total cycles: {cycles}')
for i in main_data:
for cycle in range(3):
count = '{:04}'.format(cycle)
j = cycle * 480
ordered_data = mkdf(main_data.iloc[j:j+480][i])
csv = print_df(ordered_data)
csv.to_csv(f'{i}{count}.csv', header=None, index=None)
Sections = mkdf(ordered_data)
df = print_df(Sections)
plt.title(i, fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.savefig(f'{i}{count}.png')
plt.clf()
'''
Speed plot
'''
Speed_df = pd.read_csv("Speed.csv",header=None)
plt.subplot2grid((3, 3), (0, 0), colspan=1)
sns.heatmap(Speed_df, vmin=-1, vmax=1, cmap ="coolwarm")
plt.title('Speed', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
'''
Speed missing plot
'''
Speed_df = pd.read_csv("Speed.csv",header=None)
Speed_df = Speed_df.replace(0,np.nan)
plt.subplot2grid((3, 3), (1, 0), colspan=1)
sns.heatmap(Speed_df.isnull(),cbar=False)
plt.title(f'Speed MV: {c}', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
'''
acceleration plot
'''
acc_df = pd.read_csv("acceleration.csv",header=None)
plt.subplot2grid((3, 3), (0, 1), colspan=1)
sns.heatmap(acc_df, vmin=-1, vmax=1, cmap ="coolwarm")
plt.title('acceleration', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
'''
acceleration missing plot
'''
acceleration_df = pd.read_csv("acceleration.csv",header=None)
acceleratione_df = acceleration_df.replace(0,np.nan)
plt.subplot2grid((3, 3), (1, 1), colspan=1)
sns.heatmap(acceleration_df.isnull(),cbar=False)
plt.title(f'accel MV: {c}', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
'''
Temperature plot
'''
temp_df = pd.read_csv("temperature.csv",header=None)
plt.subplot2grid((3, 3), (0, 2), colspan=1)
sns.heatmap(temp_df, vmin=-40, vmax=150, cmap ="coolwarm")
plt.title('Temperature', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
'''
Temperature missing plot
'''
temperature_df = pd.read_csv("temperature.csv",header=None)
temperature_df = temperature_df.replace(0,np.nan)
temperature_df = temperature_df.replace(0,np.inf)
plt.subplot2grid((3, 3), (1, 2), colspan=1)
sns.heatmap(temperature_df.isnull(),cbar=False)
plt.title(f'temp MV: {c}', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.show()
我想要的结果是:
我还提供了3个周期的数据集示例文本文件: