我正在使用来自Kaggle的Housing train.csv数据进行预测。
https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data?select=train.csv
我试图生成一个关联,并且仅将与SalePrice关联的功能保持在0.5到0.9之间。我试图使用此函数来过滤其中的一些内容,但是我将删除仅高于.9的相关值。 我将如何更新此功能,以仅保留生成关联热图所需的那些特定功能?
module sram(addr,clk,din,dout,we); //sram.v
parameter addr_width = 12, word_depth = 110, word_width = 16;
input clk,we;
input [addr_width-1:0] addr;
input [word_width-1:0] din;
output [word_width-1:0] dout;
reg [word_width-1:0]mem[0:word_depth-1];
reg [word_width-1:0]dout;
always @ (posedge clk) begin
if(!we)
mem[addr] <= din[word_width-1:0];
end
always @ (posedge clk) begin
if(we)
dout[word_width-1:0] <= mem[addr];
end
endmodule
module cpu(clk,reset); //cpu.v
input clk, reset;
reg [15:0] dr, ac, ir;
reg [11:0] addr, pc;
reg [2:0] opcode;
reg [5:0]t;
reg we;
reg sc;
reg [15:0] din;
wire [15:0] dout;
sram sram(addr,clk,din,dout,we);
always @ (posedge clk or negedge reset) begin
if(!reset) begin
ir <= 16'd0; dr <= 16'd0; ac <= 16'd0; addr <= 12'd0; pc <= 12'd0; sc <= 0; t <= 0; we<=1;
end
else if(t==0) begin
addr <= pc; sc<=1;
end
else if(t==1) begin
ir[15:0] <= dout[addr]; pc <= pc+1;
end
else if(t==2) begin
opcode <= ir[14:12];
addr <= ir[11:0]; //no indirect mode, no i
sc<=0;
end
else if(t==3) begin
if(opcode==3'b111) begin
ac <= 0;
end
if(opcode==3'b000) begin
end
end
end
always @ (negedge clk) begin
if(!sc) begin
t<=0;
end
else t<=t+1;
end
endmodule
module tbcpu(); //tbcpu.v
reg clk,reset;
integer file_pointer;
cpu cpu(clk,reset);
always #5 clk = ~clk;
initial begin
$readmemb("memory.dat", tbcpu.cpu.sram.mem); //assembly
clk = 0; reset = 1;
#1 reset = 0;
#1 reset = 1;
#100 $finish;
end
endmodule
答案 0 :(得分:1)
import pandas as pd
data = pd.read_csv('train.csv')
col = data.columns
c = [i for i in col if data[i].dtypes=='int64' or data[i].dtypes=='float64'] # dropping columns as dtype == object
main_col = ['SalePrice'] # column with which we have to compare correlation
corr_saleprice = data.corr().filter(main_col).drop(main_col)
c1 =(corr_saleprice['SalePrice']>=0.5) & (corr_saleprice['SalePrice']<=0.9)
c2 =(corr_saleprice['SalePrice']>=-0.9) & (corr_saleprice['SalePrice']<=-0.5)
req_index= list(corr_saleprice[c1 | c2].index) # selecting column with given criteria
#req_index.append('SalePrice') #if you want SalePrice column in your final dataframe too , uncomment this line
data = data[req_index]
data
使用for循环也不是那么有效,直接实现是有利的。希望这就是您想要的!
要生成热图,可以使用以下代码:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
a =data.corr()
mask = np.triu(np.ones_like(a, dtype=np.bool))
plt.figure(figsize=(10,10))
_ = sns.heatmap(a,cmap=sns.diverging_palette(250, 20, n=250),square=True,mask=mask,annot=True,center=0.5)