使用.SD,lapply和函数转换data.table只返回一行

时间:2017-05-26 08:28:33

标签: r data.table transformation

我正在使用flights14.csv data来测试data.table中的一些代码。

特别是,我想将所有整数变量转换为数字。

> glimpse(flights)
Observations: 253,316
Variables: 17
$ year      <int> 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,...
$ month     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ day       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ dep_time  <int> 914, 1157, 1902, 722, 1347, 1824, 2133, 1542, 1509, 1848, 1655, 1752, 1253, 1907, 1720, 1733, 1640, 1714, 1611, 553, 62...
$ dep_delay <int> 14, -3, 2, -8, 2, 4, -2, -3, -1, -2, -5, 7, 3, 142, -5, 18, 25, -1, 191, -7, -7, -8, -2, -3, 44, -1, 3, -5, 26, 52, 55,...
$ arr_time  <int> 1238, 1523, 2224, 1014, 1706, 2145, 37, 1906, 1828, 2206, 2003, 2120, 1351, 2223, 1819, 2024, 2001, 2036, 1910, 739, 81...
$ arr_delay <int> 13, 13, 9, -26, 1, 0, -18, -14, -17, -14, -17, -5, 1, 133, -26, 69, 36, 1, 185, -6, 0, -17, 15, 1, 42, -2, 12, 24, 46, ...
$ cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ carrier   <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA", "AA",...
$ tailnum   <chr> "N338AA", "N335AA", "N327AA", "N3EHAA", "N319AA", "N3DEAA", "N323AA", "N328AA", "N5FJAA", "N3HYAA", "N5CFAA", "N332AA",...
$ flight    <int> 1, 3, 21, 29, 117, 119, 185, 133, 145, 235, 172, 177, 178, 181, 256, 199, 211, 291, 300, 301, 303, 305, 307, 313, 317, ...
$ origin    <chr> "JFK", "JFK", "JFK", "LGA", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "EWR", "JFK", "JFK", "JFK", "JFK", "JFK", "JFK", ...
$ dest      <chr> "LAX", "LAX", "LAX", "PBI", "LAX", "LAX", "LAX", "LAX", "MIA", "SEA", "MIA", "SFO", "BOS", "LAX", "BOS", "ORD", "IAH", ...
$ air_time  <int> 359, 363, 351, 157, 350, 339, 338, 356, 161, 349, 161, 365, 39, 345, 35, 155, 234, 232, 214, 142, 143, 139, 145, 139, 1...
$ distance  <int> 2475, 2475, 2475, 1035, 2475, 2454, 2475, 2475, 1089, 2422, 1085, 2586, 187, 2475, 187, 740, 1417, 1521, 1372, 733, 733...
$ hour      <int> 9, 11, 19, 7, 13, 18, 21, 15, 15, 18, 16, 17, 12, 19, 17, 17, 16, 17, 16, 5, 6, 6, 7, 8, 10, 14, 10, 11, 13, 16, 17, 18...
$ min       <int> 14, 57, 2, 22, 47,

我的代码如下:

> data <- flights[, lapply(.SD, function(x){ifelse(is.integer(x), as.numeric(x), x)})]

转换成功执行,但只返回一条记录。

> glimpse(data)
Observations: 1
Variables: 18
$ year      <dbl> 2014
$ month     <dbl> 1
$ day       <dbl> 1
$ dep_time  <dbl> 914
$ dep_delay <dbl> 14
$ arr_time  <dbl> 1238
$ arr_delay <dbl> 13
$ cancelled <dbl> 0
$ carrier   <chr> "AA"
$ tailnum   <chr> "N338AA"
$ flight    <dbl> 1
$ origin    <chr> "JFK"
$ dest      <chr> "LAX"
$ air_time  <dbl> 359
$ distance  <dbl> 2475
$ hour      <dbl> 9
$ min       <dbl> 14
$ speed     <dbl> 6.89415

你能告诉我这个结果并告诉我如何修复代码吗?

您的建议将不胜感激。

1 个答案:

答案 0 :(得分:1)

library(data.table)
flights <- fread("https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv")
needed_names <- names(c(do.call(c,c(flights[,lapply(.SD,class)])))[c(do.call(c,c(flights[,lapply(.SD,class)])))=="integer"])

flights[, c(needed_names) := lapply(.SD, as.numeric), .SDcols = needed_names]

str(flights)
Classes ‘data.table’ and 'data.frame':  253316 obs. of  11 variables:
 $ year     : num  2014 2014 2014 2014 2014 ...
 $ month    : num  1 1 1 1 1 1 1 1 1 1 ...
 $ day      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ dep_delay: num  14 -3 2 -8 2 4 -2 -3 -1 -2 ...
 $ arr_delay: num  13 13 9 -26 1 0 -18 -14 -17 -14 ...
 $ carrier  : chr  "AA" "AA" "AA" "AA" ...
 $ origin   : chr  "JFK" "JFK" "JFK" "LGA" ...
 $ dest     : chr  "LAX" "LAX" "LAX" "PBI" ...
 $ air_time : num  359 363 351 157 350 339 338 356 161 349 ...
 $ distance : num  2475 2475 2475 1035 2475 ...
 $ hour     : num  9 11 19 7 13 18 21 15 15 18 ...
 - attr(*, ".internal.selfref")=<externalptr>