根据相关值选择特定特征

时间:2020-06-06 16:24:47

标签: python

我正在使用来自Kaggle的Housing train.csv数据进行预测。
https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data?select=train.csv

我试图生成一个关联,并且仅将与SalePrice关联的功能保持在0.5到0.9之间。我试图使用此函数来过滤其中的一些内容,但是我将删除仅高于.9的相关值。 我将如何更新此功能,以仅保留生成关联热图所需的那些特定功能?

module sram(addr,clk,din,dout,we); //sram.v

parameter addr_width = 12, word_depth = 110, word_width = 16;

input clk,we;
input [addr_width-1:0] addr; 
input [word_width-1:0] din; 
output [word_width-1:0] dout; 

reg [word_width-1:0]mem[0:word_depth-1]; 
reg [word_width-1:0]dout;

always @ (posedge clk) begin
    if(!we)
        mem[addr] <= din[word_width-1:0]; 
    end
always @ (posedge clk) begin
    if(we)
        dout[word_width-1:0] <= mem[addr];
    end

endmodule



module cpu(clk,reset); //cpu.v

input clk, reset;


reg [15:0] dr, ac, ir;
reg [11:0] addr, pc;

reg [2:0] opcode;
reg [5:0]t;
reg we;
reg sc;

reg [15:0] din;
wire [15:0] dout;

sram sram(addr,clk,din,dout,we);

always @ (posedge clk or negedge reset) begin
    if(!reset) begin
        ir <= 16'd0; dr <= 16'd0; ac <= 16'd0; addr <= 12'd0; pc <= 12'd0; sc <= 0; t <= 0; we<=1;
        end

        else if(t==0) begin
        addr <= pc; sc<=1; 
        end

        else if(t==1) begin
        ir[15:0] <= dout[addr]; pc <= pc+1; 

        end

        else if(t==2) begin
        opcode <= ir[14:12]; 
        addr <= ir[11:0]; //no indirect mode, no i
        sc<=0;

        end

        else if(t==3) begin
            if(opcode==3'b111) begin
                ac <= 0; 

                end
            if(opcode==3'b000) begin
            end
        end

   end

always @ (negedge clk) begin
    if(!sc) begin
    t<=0;
    end
    else t<=t+1;
    end

endmodule


module tbcpu(); //tbcpu.v

reg clk,reset;

integer file_pointer;

cpu cpu(clk,reset);

always #5 clk = ~clk;

initial begin
    $readmemb("memory.dat", tbcpu.cpu.sram.mem); //assembly

    clk = 0; reset = 1; 
    #1 reset = 0;
    #1 reset = 1;

    #100 $finish;

    end
endmodule

1 个答案:

答案 0 :(得分:1)

import pandas as pd

data = pd.read_csv('train.csv')
col = data.columns

c  = [i for i in col if data[i].dtypes=='int64' or data[i].dtypes=='float64']   # dropping columns as dtype == object
main_col = ['SalePrice']        # column with which we have to compare correlation

corr_saleprice = data.corr().filter(main_col).drop(main_col)    

c1 =(corr_saleprice['SalePrice']>=0.5) & (corr_saleprice['SalePrice']<=0.9)
c2 =(corr_saleprice['SalePrice']>=-0.9) & (corr_saleprice['SalePrice']<=-0.5)

req_index= list(corr_saleprice[c1 | c2].index)   # selecting column with given criteria

#req_index.append('SalePrice')      #if you want SalePrice column in your final dataframe too , uncomment this line

data = data[req_index]  

data

使用for循环也不是那么有效,直接实现是有利的。希望这就是您想要的!

要生成热图,可以使用以下代码:

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

a  =data.corr()
mask = np.triu(np.ones_like(a, dtype=np.bool))
plt.figure(figsize=(10,10))
_ = sns.heatmap(a,cmap=sns.diverging_palette(250, 20, n=250),square=True,mask=mask,annot=True,center=0.5)