试图使用ddply和seq

时间:2017-08-08 15:47:27

标签: r plyr

我试图通过使用ddply函数而不是expand.grid函数来解决R中的Windows 32位内存限制。我正在关注示例代码,但是我的数据集收到了错误消息。我想通过两个变量对数据集中的三个变量进行排序。

示例数据集

 dput(new3)
structure(list(StationID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("201502003", "201502004", 
"201502005", "201502006", "201502011", "201502014", "201502016", 
"201502019", "201502020", "201502021", "201502022", "201502023", 
"201502024", "201502026", "201502027", "201502028", "201502029", 
"201502030", "201502032", "201502033", "201502034", "201502035", 
"201502184", "201502185", "201502186", "201502187", "201502188", 
"201502189", "201502190", "201502191", "201502192", "201502193", 
"201502194", "201502195", "201502196", "201502197", "201502198", 
"201502199", "201502200", "201502201", "201502203", "201503013", 
"201503014", "201503015", "201503017", "201503018", "201503019", 
"201503020", "201503021", "201503022", "201503023", "201503024", 
"201503025", "201503028", "201503029", "201503031", "201503032", 
"201503033", "201503034", "201503035", "201503036", "201503037", 
"201503038", "201503184", "201503186", "201503190", "201503191", 
"201503192", "201503195", "201503196", "201503197", "201503198", 
"201503199", "201503200", "201503201", "201503203", "201503204", 
"201503205", "201503206", "201503210", "201503211", "201503212", 
"201603048", "201603049", "201603050", "201603051", "201603052", 
"201603053", "201603054", "201603055", "201603056", "201603057", 
"201603058", "201603059", "201603061", "201603062", "201603064", 
"201603065", "201603066", "201603068", "201603070", "201603071", 
"201603072", "201603073", "201603074", "201603075", "201603076", 
"201603077", "201603078", "201603079", "201603080", "201603081", 
"201603082", "201603083", "201603090", "201603091", "201603092", 
"201603093", "201603094", "201603095", "201603096", "201603097", 
"201603101", "201603102", "201603103", "201603105", "201603106", 
"201603107", "201603108", "201603109", "201603110", "201603111", 
"201603112", "201603113", "201603114", "201603115", "201603116", 
"201603117", "201603118", "201603119", "201603120", "201603121", 
"201603122", "201603123", "201603124", "201603125", "201603126", 
"201603127", "201603128", "201603129", "201603130", "201603131", 
"201603132", "201603133", "201603134", "201603135", "201603136", 
"201603137", "201603139", "201603140", "201603141", "201603142", 
"201603143", "201603144", "201603145", "201603146", "201603147", 
"201603148", "201603149", "201603150", "201603151", "201603152", 
"201603153", "201603159", "201603160", "201603161", "201603162", 
"201603163", "201705064", "201705065", "201705066", "201705067", 
"201705068", "201705069", "201705070", "201705071", "201705072", 
"201705073", "201705074", "201705075", "201705076", "201705077", 
"201705078", "201705079", "201705080", "201705081", "201705082", 
"201705083", "201705084", "201705085", "201705086", "201705087", 
"201705088", "201705089", "201705090", "201705091", "201705092", 
"201705093", "201705094", "201705095", "201705096", "201705097", 
"201705098", "201705099", "201705100", "201705101", "201705102", 
"201705103", "201705104", "201705105", "201705106", "201705109", 
"201705110", "201705111", "201705112", "201705113", "201705114", 
"201705115", "201705116", "201705117", "201705118", "201705119", 
"201705120", "201705121", "201705122", "201705123", "201705124", 
"201705125", "201705126", "201705127", "201705138", "201705139", 
"201705141", "201705142", "201705145", "201705146", "201705147", 
"201705148", "201705149", "201705150", "201705151", "201705156", 
"201705157", "201705158", "201705159", "201705160", "201705161", 
"201705162", "201705163", "201705164", "201705165", "201705166", 
"201705167", "201705168", "201705169", "201705170", "201705171", 
"201705172", "201705173", "201705174", "201705175", "201705176", 
"201705177", "201705183", "201705184", "201705185", "201705186"
), class = "factor"), Shell_Height = c(114, 114, 125, 123, 109, 
72, 134, 144, 117, 110, 107, 112, 109, 122, 116, 114, 114, 125, 
123, 109, 72, 134, 144, 117, 110, 107, 112, 109, 122, 116), lat = c(40.7723, 
40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 
40.7723, 40.7723, 40.76885, 40.76885, 40.76885, 40.76885, 40.76885, 
40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 40.7723, 
40.7723, 40.7723, 40.7723, 40.76885, 40.76885, 40.76885, 40.76885, 
40.76885), long = c(-71.8960166666667, -71.8960166666667, -71.8960166666667, 
-71.8960166666667, -71.8960166666667, -71.8960166666667, -71.8960166666667, 
-71.8960166666667, -71.8960166666667, -71.8960166666667, -71.8393166666667, 
-71.8393166666667, -71.8393166666667, -71.8393166666667, -71.8393166666667, 
-71.8960166666667, -71.8960166666667, -71.8960166666667, -71.8960166666667, 
-71.8960166666667, -71.8960166666667, -71.8960166666667, -71.8960166666667, 
-71.8960166666667, -71.8960166666667, -71.8393166666667, -71.8393166666667, 
-71.8393166666667, -71.8393166666667, -71.8393166666667), Year = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("2015", 
"2016", "2017"), class = "factor"), SAMS = structure(c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "LI", class = "factor")), .Names = c("StationID", 
"Shell_Height", "lat", "long", "Year", "SAMS"), row.names = c("21", 
"22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", 
"33", "34", "35", "211", "221", "231", "241", "251", "261", "271", 
"281", "291", "301", "311", "321", "331", "341", "351"), class = "data.frame")

ddply code

gam_predict <- ddply(new3, 
               .(SAMS,Year), summarize,Shell_Height= seq(min(Shell_Height)
            ,max(Shell_Height),by=5),
            long= seq(min(long),max(long),by=0.01),
             lat= seq(min(lat),max(lat),by=0.01))

错误消息是: summarise_impl(.data,dots)中的错误:   列Shell_Height必须是长度1(汇总值),而不是4

如果我只是单独尝试Shell_Height,我会收到相同的错误消息。如果我单独尝试lat或long,我会得到一个值。

R信息     R版本3.3.2(2016-10-31)     平台:i386-w64-mingw32 / i386(32位)     运行于:Windows 7 x64(内部版本7601)Service Pack 1

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] grid      datasets  utils     stats     graphics 
[6] grDevices methods   base     

other attached packages:
 [1] gamm4_0.2-4         mgcv_1.8-17        
 [3] fields_9.0          spam_2.1-1         
 [5] dotCall64_0.9-04    gstat_1.1-5        
 [7] raster_2.5-8        shapefiles_0.7     
 [9] foreign_0.8-67      classInt_0.1-24    
[11] mapplots_1.5        rgdal_1.2-5        
[13] GISTools_0.7-4      rgeos_0.3-23       
[15] maptools_0.9-1      sp_1.2-4           
[17] mapdata_2.2-6       maps_3.2.0         
[19] bindrcpp_0.2        GGally_1.3.2       
[21] ggplot2_2.2.1       AICcmodavg_2.1-1   
[23] car_2.1-5           lme4_1.1-13        
[25] Matrix_1.2-10       nlme_3.1-131       
[27] openxlsx_4.0.17     dplyr_0.7.1        
[29] plyr_1.8.4          MASS_7.3-45        
[31] RODBC_1.3-14        latticeExtra_0.6-28
[33] RColorBrewer_1.1-2  lattice_0.20-34    

loaded via a namespace (and not attached):
 [1] magrittr_1.5       nloptr_1.0.4       gtable_0.2.0      
 [4] class_7.3-14       minqa_1.2.4        splines_3.3.2     
 [7] FNN_1.1            survival_2.41-3    unmarked_0.12-2   
[10] SparseM_1.77       pbkrtest_0.4-7     zoo_1.8-0         
[13] pkgconfig_2.0.1    R6_2.2.2           stats4_3.3.2      
[16] colorspace_1.3-2   reshape_0.8.6      VGAM_1.0-3        
[19] lazyeval_0.2.0     tibble_1.3.3       intervals_0.15.1  
[22] bindr_0.1          xtable_1.8-2       munsell_0.4.3     
[25] spacetime_1.2-0    Rcpp_0.12.12       quantreg_5.33     
[28] parallel_3.3.2     MatrixModels_0.4-1 assertthat_0.2.0  
[31] tools_3.3.2        xts_0.10-0         e1071_1.6-8       
[34] nnet_7.3-12        scales_0.4.1       glue_1.1.1        
[37] rlang_0.1.1       

1 个答案:

答案 0 :(得分:1)

不是你所要求的,但它可以实现你想要的东西

tidyverse解决方案

library(tidyverse)
gam_predict <- df %>%
                 group_by(SAMS,Year) %>%                
                 nest(Shell_Height,lat,long) %>%             # nest data and use expand.grid
                 mutate(data = map(data, ~ expand.grid(seq(min(.x$Shell_Height),max(.x$Shell_Height),5),
                                                            seq(min(.x$long),max(.x$long),by=0.01),
                                                            seq(min(.x$lat),max(.x$lat),by=0.01)))) %>%
                 unnest(data) %>%                           # unnest data
                 setNames(c("SAMS","Year","Shell_Height","long","lat"))  # rename column names

输出

    SAMS   Year Shell_Height      long      lat
 1     LI   2015           72 -71.89602 40.76885
 2     LI   2015           77 -71.89602 40.76885
 3     LI   2015           82 -71.89602 40.76885
 4     LI   2015           87 -71.89602 40.76885
 5     LI   2015           92 -71.89602 40.76885
 # etc