r - 从单个状态列创建美国区域的虚拟列

时间:2018-02-23 06:22:46

标签: r for-loop gis

我一直在使用一个数据集,该数据集包含一列状态,包含3,000个观察值。为了运行神经网络,我试图将状态编码为区域列,包括太平洋,中部,东部,AK,HI和山。

以下代码有效但我觉得必须有一种更简单的方法。

已安装的软件包:

  {
  "name": "ASPCoreWithAngular",
  "private": true,
  "version": "0.0.0",
  "scripts": {
    "test": "karma start ClientApp/test/karma.conf.js"
  },
  "devDependencies": {
    "@angular/animations": "4.2.5",
    "@angular/common": "4.2.5",
    "@angular/compiler": "4.2.5",
    "@angular/compiler-cli": "4.2.5",
    "@angular/core": "4.2.5",
    "@angular/forms": "4.2.5",
    "@angular/http": "4.2.5",
    "@angular/platform-browser": "4.2.5",
    "@angular/platform-browser-dynamic": "4.2.5",
    "@angular/platform-server": "4.2.5",
    "@angular/router": "4.2.5",
    "@ngtools/webpack": "1.5.0",
    "@types/chai": "4.0.1",
    "@types/jasmine": "2.5.53",
    "@types/webpack-env": "1.13.0",
    "angular2-router-loader": "0.3.5",
    "angular2-template-loader": "0.6.2",
    "aspnet-prerendering": "^3.0.1",
    "aspnet-webpack": "^2.0.1",
    "awesome-typescript-loader": "3.2.1",
    "bootstrap": "3.3.7",
    "chai": "4.0.2",
    "css": "2.2.1",
    "css-loader": "0.28.4",
    "es6-shim": "0.35.3",
    "event-source-polyfill": "0.0.9",
    "expose-loader": "0.7.3",
    "extract-text-webpack-plugin": "2.1.2",
    "file-loader": "0.11.2",
    "html-loader": "0.4.5",
    "isomorphic-fetch": "2.2.1",
    "jasmine-core": "2.6.4",
    "jquery": "3.2.1",
    "json-loader": "0.5.4",
    "karma": "1.7.0",
    "karma-chai": "0.1.0",
    "karma-chrome-launcher": "2.2.0",
    "karma-cli": "1.0.1",
    "karma-jasmine": "1.1.0",
    "karma-webpack": "2.0.3",
    "preboot": "4.5.2",
    "raw-loader": "0.5.1",
    "reflect-metadata": "0.1.10",
    "rxjs": "5.4.2",
    "style-loader": "0.18.2",
    "to-string-loader": "1.1.5",
    "typescript": "2.4.1",
    "url-loader": "0.5.9",
    "webpack": "2.5.1",
    "webpack-hot-middleware": "2.18.2",
    "webpack-merge": "4.1.0",
    "zone.js": "0.8.12"
  }
}

我一直在使用的for循环。

library(tidyverse)
library(readr)
library(FNN)
library(rpart)
library(C50)
library(nnet)
library(FME)

这是我在这里发表的第一篇文章,希望我能得到我需要的一切。谢谢你的帮助!

3 个答案:

答案 0 :(得分:2)

你可以使用ifelse& amp; %in operator:

#FIRST STATEMENT

east <- c("CT", "DE", "FL", "GA", "IN", "ME", "MD", "MA", "MI", "NH", "NJ", "NY", "NC", "OH", "PA", "RI", "SC", "VT", "VA", "DC", "WV")
churn$state.cat.east <- ifelse(churn$State %in% east,1,0)

对中心值重复相同

#2ND STATEMENT

central <- c("AL" , "AR" , "IL" , "IA" , "KS" , "KY" , "LA" , "MN" , "MS" , "MO" , "NE" , "ND" , "OK" , "SD" , "TN" , "TX" , "WI")
churn$state.cat.central <- ifelse(churn$State %in% central,1,0)

希望这会有所帮助

Gottavianoni

答案 1 :(得分:1)

另一种选择可能是使用R

中提供的内置数据
#Sample data
churn <- data.frame(state=c('CA', 'NY', 'TX', 'CA', 'TX'), stringsAsFactors = F)

#map each state to it's division using inbuilt data
data(state)
churn$state_division <- sapply(churn$state, function(x) state.division[which(state.abb==x)])

#dummy code the new column created using above mapping
library(dummies)
churn <- dummy.data.frame(churn, names="state_division", sep = "-")

答案 2 :(得分:0)

我们可以在没有ifelse as.integer强制二进制

的情况下执行此操作
churn$state.cat.east <- with(churn,as.integer( State %in% c("CT", "DE", "FL", ...)))

churn$state.cat.central <- with(churn,as.integer( State %in% c("AL" , "AR",  ...)))

注意:...指的是其他状态

如果我们需要为所有地区创建

library(purrr)
library(dplyr)
state.region %>% 
      unique %>% 
      as.character %>% 
      set_names(.) %>%
      map_df(~ as.integer(setNames(state.region, state.abb)[churn$State] %in% .x) )  %>%
      bind_cols(churn, .)

数据

set.seed(24)
churn <- data.frame(State = sample(state.abb, 100, replace = TRUE), stringsAsFactors = FALSE)