我一直在使用一个数据集,该数据集包含一列状态,包含3,000个观察值。为了运行神经网络,我试图将状态编码为区域列,包括太平洋,中部,东部,AK,HI和山。
以下代码有效但我觉得必须有一种更简单的方法。
已安装的软件包:
{
"name": "ASPCoreWithAngular",
"private": true,
"version": "0.0.0",
"scripts": {
"test": "karma start ClientApp/test/karma.conf.js"
},
"devDependencies": {
"@angular/animations": "4.2.5",
"@angular/common": "4.2.5",
"@angular/compiler": "4.2.5",
"@angular/compiler-cli": "4.2.5",
"@angular/core": "4.2.5",
"@angular/forms": "4.2.5",
"@angular/http": "4.2.5",
"@angular/platform-browser": "4.2.5",
"@angular/platform-browser-dynamic": "4.2.5",
"@angular/platform-server": "4.2.5",
"@angular/router": "4.2.5",
"@ngtools/webpack": "1.5.0",
"@types/chai": "4.0.1",
"@types/jasmine": "2.5.53",
"@types/webpack-env": "1.13.0",
"angular2-router-loader": "0.3.5",
"angular2-template-loader": "0.6.2",
"aspnet-prerendering": "^3.0.1",
"aspnet-webpack": "^2.0.1",
"awesome-typescript-loader": "3.2.1",
"bootstrap": "3.3.7",
"chai": "4.0.2",
"css": "2.2.1",
"css-loader": "0.28.4",
"es6-shim": "0.35.3",
"event-source-polyfill": "0.0.9",
"expose-loader": "0.7.3",
"extract-text-webpack-plugin": "2.1.2",
"file-loader": "0.11.2",
"html-loader": "0.4.5",
"isomorphic-fetch": "2.2.1",
"jasmine-core": "2.6.4",
"jquery": "3.2.1",
"json-loader": "0.5.4",
"karma": "1.7.0",
"karma-chai": "0.1.0",
"karma-chrome-launcher": "2.2.0",
"karma-cli": "1.0.1",
"karma-jasmine": "1.1.0",
"karma-webpack": "2.0.3",
"preboot": "4.5.2",
"raw-loader": "0.5.1",
"reflect-metadata": "0.1.10",
"rxjs": "5.4.2",
"style-loader": "0.18.2",
"to-string-loader": "1.1.5",
"typescript": "2.4.1",
"url-loader": "0.5.9",
"webpack": "2.5.1",
"webpack-hot-middleware": "2.18.2",
"webpack-merge": "4.1.0",
"zone.js": "0.8.12"
}
}
我一直在使用的for循环。
library(tidyverse)
library(readr)
library(FNN)
library(rpart)
library(C50)
library(nnet)
library(FME)
这是我在这里发表的第一篇文章,希望我能得到我需要的一切。谢谢你的帮助!
答案 0 :(得分:2)
你可以使用ifelse& amp; %in operator:
#FIRST STATEMENT
east <- c("CT", "DE", "FL", "GA", "IN", "ME", "MD", "MA", "MI", "NH", "NJ", "NY", "NC", "OH", "PA", "RI", "SC", "VT", "VA", "DC", "WV")
churn$state.cat.east <- ifelse(churn$State %in% east,1,0)
对中心值重复相同
#2ND STATEMENT
central <- c("AL" , "AR" , "IL" , "IA" , "KS" , "KY" , "LA" , "MN" , "MS" , "MO" , "NE" , "ND" , "OK" , "SD" , "TN" , "TX" , "WI")
churn$state.cat.central <- ifelse(churn$State %in% central,1,0)
希望这会有所帮助
Gottavianoni
答案 1 :(得分:1)
另一种选择可能是使用R
#Sample data
churn <- data.frame(state=c('CA', 'NY', 'TX', 'CA', 'TX'), stringsAsFactors = F)
#map each state to it's division using inbuilt data
data(state)
churn$state_division <- sapply(churn$state, function(x) state.division[which(state.abb==x)])
#dummy code the new column created using above mapping
library(dummies)
churn <- dummy.data.frame(churn, names="state_division", sep = "-")
答案 2 :(得分:0)
我们可以在没有ifelse
as.integer
强制二进制
churn$state.cat.east <- with(churn,as.integer( State %in% c("CT", "DE", "FL", ...)))
churn$state.cat.central <- with(churn,as.integer( State %in% c("AL" , "AR", ...)))
注意:...
指的是其他状态
如果我们需要为所有地区创建
library(purrr)
library(dplyr)
state.region %>%
unique %>%
as.character %>%
set_names(.) %>%
map_df(~ as.integer(setNames(state.region, state.abb)[churn$State] %in% .x) ) %>%
bind_cols(churn, .)
set.seed(24)
churn <- data.frame(State = sample(state.abb, 100, replace = TRUE), stringsAsFactors = FALSE)