R:具有重复数据的数据帧上的扩展函数

时间:2015-07-03 14:40:48

标签: r aggregate dplyr tidyr spread

我有一个数据框需要转动,但数据框有重复的标识符,因此spread函数会出错[{1}}

Error: Duplicate identifiers for rows (5, 6)

然后我合并了行并粘贴了Dimension = c("A","A","B","B","A","A") Date = c("Mon","Tue","Mon","Wed","Fri","Fri") Metric = c(23,25,7,9,7,8) df = data.frame(Dimension,Date,Metric) df Dimension Date Metric 1 A Mon 23 2 A Tue 25 3 B Mon 7 4 B Wed 9 5 A Fri 7 6 A Fri 8 library(tidyr) df1 = spread(df, Date, Metric, fill = " ") Error: Duplicate identifiers for rows (5, 6)

Metric

然后重复,当然现在有效:

dfa = aggregate(df[3], df[-3], 
                FUN = function(X) paste(unique(X), collapse=", "))

  Dimension Date Metric
1         A  Fri   7, 8
2         A  Mon     23
3         B  Mon      7
4         A  Tue     25
5         B  Wed      9

问题:是否有一种“更容易”的方法,或者我的方法是否足够有效,所以我不需要失去睡眠?谢谢!

EDIT。 所有代码 - 我和2 akrun都可以使用这个小数据集。但是,akrun的df1 = spread(dfa, Date, Metric, fill = " ") df1 Dimension Fri Mon Tue Wed 1 A 7, 8 23 25 2 B 7 9 版本打破了我的真实数据集。这是dplyr

dput

2 个答案:

答案 0 :(得分:6)

您可以使用dcast的开发版本中的data.table即可。 v1.9.5。安装说明为here

library(data.table)#v1.9.5+
dcast(setDT(df), Dimension~Date, value.var='Metric', 
               fun.aggregate=function(x) toString(unique(x)))
#   Dimension  Fri Mon Tue Wed
#1:         A 7, 8  23  25    
#2:         B        7       9

或者

library(dplyr)
library(tidyr)
df %>%
   group_by(Dimension, Date) %>% 
   summarise(Metric=toString(unique(Metric))) %>% 
   spread(Date, Metric, fill='')
#   Dimension  Fri Mon Tue Wed
#1         A 7, 8  23  25    
#2         B        7       9

更新

使用来自'OP的帖子

的新数据集
 setDF(df2)
 df2 %>% 
     group_by(Dimension, Date) %>% 
     summarise(Metric=toString(unique(Metric))) %>%
     spread(Date, Metric, fill='') %>%
     head(2) %>%
     select(1:3)
 #    Dimension 16 analog tuner
 #1 10994030020  9             
 #2 12300245685            NTSC

答案 1 :(得分:0)

我解决了所有问题,现在所有3个解决方案都在运行:1个是我的,2个和3个是@ akrun的。代码完全可重现,如下所示。实际上,正如@akrun所设想的那样,带有dplyr和tidyr的版本3在较大的数据集上运行得最快(测试一行是300行以适应30000个字符),diff在更大的集合上更明显。我自己的版本“1”在较小的数据集(100行左右)上最快,至少在我的机器上。希望这有助于某人!

library(microbenchmark)
library(data.table)#v1.9.5+
library(reshape2)
library(tidyr)
library(dplyr)

df = structure(list(GTIN = c(10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 13201067215, 13201067215, 13201067215, 13201067215, 
                            13201067215, 13964253832, 13964253832, 13964253832, 13964253832, 
                            13964253832, 14818899589, 14818899589, 14818899589, 14818899589, 
                            14818899589, 14818899589, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748359455, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748383566, 19748383566, 19748383566, 19748383566, 
                            19748383566, 19748383566, 19748383566, 19748383566, 19748383566, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012), 
                   Key = structure(c(1L, 10L, 12L, 14L, 
                                     16L, 20L, 21L, 24L, 26L, 29L, 35L, 43L, 44L, 45L, 49L, 56L, 57L, 
                                     59L, 61L, 62L, 64L, 67L, 68L, 69L, 77L, 78L, 79L, 84L, 85L, 87L, 
                                     92L, 93L, 96L, 99L, 100L, 110L, 111L, 112L, 113L, 114L, 115L, 
                                     118L, 119L, 122L, 123L, 125L, 129L, 131L, 134L, 6L, 9L, 12L, 
                                     14L, 15L, 20L, 21L, 30L, 41L, 45L, 47L, 57L, 58L, 62L, 64L, 65L, 
                                     69L, 71L, 79L, 80L, 83L, 85L, 86L, 96L, 99L, 108L, 118L, 121L, 
                                     122L, 123L, 125L, 126L, 127L, 128L, 14L, 57L, 64L, 75L, 118L, 
                                     11L, 13L, 14L, 57L, 62L, 63L, 64L, 70L, 74L, 76L, 81L, 82L, 89L, 
                                     109L, 116L, 118L, 14L, 57L, 62L, 64L, 118L, 14L, 57L, 62L, 64L, 
                                     118L, 14L, 17L, 52L, 57L, 64L, 118L, 14L, 17L, 53L, 57L, 62L, 
                                     62L, 64L, 75L, 118L, 14L, 17L, 52L, 53L, 57L, 62L, 64L, 75L, 
                                     118L, 5L, 7L, 14L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 38L, 
                                     42L, 46L, 48L, 50L, 57L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 91L, 
                                     95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 133L, 
                                     5L, 7L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 36L, 
                                     39L, 42L, 46L, 48L, 50L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 
                                     88L, 91L, 95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 
                                     133L, 5L, 7L, 8L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 
                                     34L, 38L, 39L, 42L, 46L, 48L, 55L, 57L, 62L, 64L, 66L, 72L, 73L, 
                                     79L, 88L, 91L, 95L, 98L, 97L, 101L, 102L, 117L, 118L, 120L, 124L, 
                                     132L, 133L, 2L, 3L, 4L, 5L, 7L, 14L, 18L, 19L, 22L, 23L, 25L, 
                                     27L, 28L, 31L, 33L, 32L, 34L, 36L, 37L, 39L, 40L, 42L, 46L, 48L, 
                                     50L, 51L, 54L, 55L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 
                                     90L, 91L, 98L, 97L, 102L, 103L, 104L, 105L, 107L, 117L, 118L), 
                                   .Label = c("16","16:9 Mode", "24p Technology", "3D", "Additional Features", "Analog Tuner", 
                                              "Analog TV Tuner", "Analog Video Input Signals", "aspect ratio", 
                                              "Aspect Ratio", "Assembled in Country of Origin", "Backlight Technology", 
                                              "Battery Type", "Brand", "Brightness", "Color Class", "Color Name", 
                                              "Color Temperature Control", "Compatible with Windows 7", "Component Video", 
                                              "Composite Video", "Connectivity", "Connector Type", "Country of Origin", 
                                              "Depth (Shipping)", "Depth with Stand", "Diagonal Size", "Diagonal Size (cm)", 
                                              "Digital Audio Output", "Digital Tuner", "Digital TV Tuner", 
                                              "Dimensions", "Dimensions & Weight Details", "Display Format", 
                                              "Display Technology", "DLNA", "Dynamic Contrast Ratio", "Enclosure Color", 
                                              "ENERGY STAR Qualified", "Expansion Slots", "Features", "Flat Panel Mount Interface", 
                                              "Green Compliance Certificate/Authority", "Green Compliant", 
                                              "HDMI", "HDMI Ports Qty", "headphone jack", "Height (Shipping)", 
                                              "Height with Stand", "Image Aspect Ratio", "Internet Streaming Services", 
                                              "Item Package Quantity", "Item Weight", "LAN Protocol", "LCD Backlight Technology", 
                                              "Limited Warranty", "Manufacturer", "maximum resolution", "Media Player", 
                                              "Motion Enhancement Technology", "Motion Interpolation Technology", 
                                              "MPN", "Multi Pack Indicator", "Name", "Native Contrast Ratio", 
                                              "Nominal Voltage", "Number of HDMI Ports", "Number of USB Ports", 
                                              "Operating Power Consumption", "Origin of Components", "package contents", 
                                              "PC Interface", "Power Device", "Primary Color", "Product Dimensions", 
                                              "Product in Inches (L x W x H)", "Product Model", "Product Series", 
                                              "Product Type", "remote control included", "Remote Included", 
                                              "Resolution", "response time", "RMS Output Power", "Scan Format", 
                                              "screen size", "Screen Size", "Series", "Shipping Weight (in pounds)", 
                                              "Sound Effects", "Sound Output Mode", "Sound System", "Speaker Output Power (W)", 
                                              "Speaker System", "Speaker(s)", "Speakers", "Stand", "Stand Design", 
                                              "Standard Refresh Rate", "Standby Power Consumption", "Stereo Reception System", 
                                              "Supported Audio Formats", "Supported Memory Cards", "Supported Pictures Formats", 
                                              "Supported Video Formats", "Surround Mode", "Timer Functions", 
                                              "Total Number of HDMI Ports", "Tv Definition", "TV Features", 
                                              "TV Refresh Rate (Hz)", "TV Resolution", "TV Screen Size (inches)", 
                                              "TV Screen Size Range", "TV Speakers", "Tv Technology", "TV Tuner", 
                                              "UNSPSC", "USB", "USB Port", "Vertical Viewing Angle", "VESA Mount Standard", 
                                              "VGA", "Video Interface", "Video Signal Standard", "viewing angle", 
                                              "warranty length", "wattage", "Weight (Approximate)", "Weight (Shipping)", 
                                              "Weight with Stand (Approximate)", "Widescreen Modes", "Width (Shipping)", 
                                              "Width with Stand"), class = "factor"), 
                   Value = structure(c(83L, 
                                       19L, 118L, 156L, 90L, 176L, 176L, 92L, 78L, 176L, 115L, 91L, 
                                       176L, 176L, 21L, 5L, 156L, 176L, 97L, 39L, 157L, 40L, 1L, 46L, 
                                       39L, 114L, 120L, 17L, 80L, 38L, 88L, 16L, 176L, 77L, 42L, 109L, 
                                       76L, 80L, 37L, 30L, 176L, 68L, 176L, 28L, 176L, 105L, 8L, 14L, 
                                       35L, 133L, 19L, 103L, 141L, 51L, 176L, 176L, 86L, 150L, 176L, 
                                       176L, 141L, 22L, 138L, 142L, 6L, 66L, 33L, 120L, 175L, 74L, 11L, 
                                       32L, 176L, 77L, 24L, 68L, 20L, 9L, 176L, 106L, 20L, 2L, 72L, 
                                       122L, 122L, 123L, 7L, 68L, 107L, 100L, 151L, 151L, 113L, 132L, 
                                       152L, 107L, 90L, 50L, 176L, 13L, 34L, 105L, 116L, 68L, 122L, 
                                       122L, 60L, 124L, 68L, 146L, 146L, 165L, 147L, 68L, 170L, 90L, 
                                       1L, 170L, 171L, 68L, 146L, 90L, 75L, 146L, 166L, 167L, 163L, 
                                       47L, 68L, 146L, 89L, 1L, 73L, 146L, 164L, 162L, 48L, 68L, 110L, 
                                       133L, 156L, 55L, 81L, 56L, 10L, 87L, 137L, 173L, 12L, 90L, 29L, 
                                       53L, 36L, 19L, 156L, 59L, 159L, 169L, 140L, 117L, 22L, 143L, 
                                       153L, 27L, 26L, 154L, 108L, 176L, 23L, 68L, 178L, 99L, 62L, 61L, 
                                       129L, 133L, 156L, 176L, 54L, 81L, 63L, 15L, 87L, 134L, 174L, 
                                       12L, 176L, 176L, 57L, 53L, 45L, 19L, 156L, 95L, 64L, 160L, 169L, 
                                       140L, 117L, 22L, 144L, 153L, 27L, 26L, 154L, 108L, 176L, 23L, 
                                       68L, 178L, 99L, 71L, 67L, 111L, 133L, 133L, 156L, 176L, 3L, 65L, 
                                       43L, 82L, 87L, 136L, 173L, 80L, 90L, 176L, 29L, 25L, 31L, 121L, 
                                       156L, 44L, 158L, 85L, 169L, 140L, 119L, 145L, 153L, 125L, 154L, 
                                       108L, 131L, 126L, 23L, 68L, 178L, 99L, 155L, 52L, 176L, 94L, 
                                       177L, 101L, 133L, 156L, 176L, 98L, 172L, 41L, 84L, 69L, 18L, 
                                       87L, 135L, 173L, 12L, 176L, 79L, 176L, 4L, 58L, 53L, 49L, 19L, 
                                       130L, 104L, 121L, 156L, 96L, 70L, 161L, 168L, 139L, 119L, 22L, 
                                       93L, 102L, 153L, 154L, 108L, 127L, 148L, 112L, 128L, 149L, 23L, 
                                       68L), .Label = c("1", "1-year limited", "1 x composite video/audio input ( RCA phono x 3 ) - rear 1 x USB ( 4 pin USB Type A ) - side 1 x component video input ( RCA phono x 3 ) - rear 2 x audio line-in ( RCA phono x 2 ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 2 x HDMI ( 19 pin HDMI Type A ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 1 x audio input ( mini-phone 3.5 mm ) 1 x digital audio input (optical) 1 x antenna", 
                                                        "1 x SD Memory Card", "1 Year", "1,000:1", "1,140 x 145 x 705 inches ; 65.6 pounds", 
                                                        "10.40 lb", "100 x 100", "102 cm", "1080p", "1080p (FullHD)", 
                                                        "1080p (HDTV)", "11.20 lb", "117 cm", "14", "14 W", "140 cm ( 138.7 cm viewable )", 
                                                        "16:09", "178°", "18.30\"", "1920 x 1080", "1x analog, 1x digital", 
                                                        "2", "2 port(s)", "2 speakers", "2 x main channel speaker - built-in", 
                                                        "200 x 100", "200 x 200 mm", "21", "21.8 in", "22\"", "22\" FHD LED TV; Remote Control", 
                                                        "25.4", "26.20\"", "28.9 in", "29", "29\"", "29L1350U", "3", 
                                                        "3 x HDMI input ( 19 pin HDMI Type A ) - rear 1 x HDMI input ( 19 pin HDMI Type A ) - side 1 x component video input - rear 2 x USB ( 4 pin USB Type A ) - side 1 x network ( RJ-45 ) - side 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) - side 1 x antenna - rear 1 x digital audio output (optical) - rear 1 x composite video/audio input ( RCA phono x 3 ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - rear", 
                                                        "300 mW", "32\"", "32SL400", "33.1 in", "33.80 W", "34 x 7.8 x 7.6 inches", 
                                                        "34.5 x 7.4 x 7.2 inches", "34.6 in", "36.5 x 6.5 x 23.0", "365 Nit", 
                                                        "38 in", "4 port(s)", "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x Ethernet ( RJ-45 )", 
                                                        "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x USB ( 4 pin USB Type A )", 
                                                        "40\"", "400 x 200 mm", "400 x 400 mm", "40E200U", "42LN5400", 
                                                        "44.3 in", "45.2 lbs", "46\"", "46G300U", "5.4 in", "50 W", "50.9 in", 
                                                        "52161505", "55\" Class ( 54.6\" viewable )", "55WX800", "57.3 lbs", 
                                                        "6 W", "6.2 pounds", "6.50 ms", "6.7 pounds", "60", "60 Hz", 
                                                        "7.10\"", "7000000:1", "720p", "8.8 in", "82 cm", "9", "9 in", 
                                                        "AC 120/230 V ( 50/60 Hz )", "ATSC", "ATSC, QAM", "Audyssey EQ", 
                                                        "black", "Black", "CEC", "China", "Cinema", "Cinema Mode 24 fps", 
                                                        "ClearFrame 120Hz", "ClearFrame 240Hz", "ClearScan 120Hz", "Compatible with Windows 7 software and devices carry Microsoft’s assurance that these products have passed tests for compatibility and reliability with 32-bit and 64-bit Windows 7.", 
                                                        "Component, composite, HDMI", "Does Not Contain a Battery", "Dolby Volume, Invisible Speaker System, Audyssey EQ", 
                                                        "Dynamic Bass Boost (DBB)", "Edge LED", "Ethernet", "HDTV", "HDTV 1080p", 
                                                        "Imported", "Included", "Internet Apps", "Invisible Speaker System", 
                                                        "JPEG photo playback, sleep timer, CrystalCoat, DynaLight, ColorStream HD Component Video Inputs, Digital Noise Reduction (DNR)", 
                                                        "JPG", "KDL40W600B", "L1350U", "LCD", "LCD , Internet Connected , LED", 
                                                        "LCD TV", "LED", "LED-backlit LCD TV", "LED-LCD TV", "LED backlight", 
                                                        "LG", "LG 47LY340C - 47\" - commercial use LED-backlit LCD flat panel display - 1080p (FullHD) - direct-lit LED - dark titan", 
                                                        "Lg LG 42-Inch LED-Backlit LCD TV - 42LN5400 1080p 120Hz HDTV (42LN5400)", 
                                                        "Main channel speaker", "MP3", "MP3, AAC, LPCM", "MPEG-2, MPEG-4, AVCHD", 
                                                        "Mute button, Invisible Speaker System, Audyssey EQ", "Net TV", 
                                                        "NICAM", "No", "NTSC", "Panel with stand - 44.3 in x 13.7 in x 30.2 in x 46.3 lbs", 
                                                        "Panel with stand - 50.4 in x 14 in x 33.5 in x 71 lbs Panel without stand - 50.4 in x 1.1 in x 30.4 in x 59.3 lbs", 
                                                        "Panel without stand - 30.5 in x 1.4 in x 18.9 in", "Panel without stand - 39.1 in x 3.5 in x 25.3 in x 33.5 lbs", 
                                                        "PLED2243A", "Power adapter", "Power supply - internal", "ProScan", 
                                                        "PROSCAN PLED2243A 22\" 1080p 60Hz LED HDTV - PTR-PLED2243A", 
                                                        "REGZA E Series", "REGZA G Series", "REGZA SL Series", "Samsung", 
                                                        "Samsung UN46D7900 46-Inch 1080p 240HZ 3D LED HDTV Bundle with 3D Starter Kit and 3D Blu-Ray Player (Silver)", 
                                                        "SD Memory Card", "Sleep", "Sleep Timer; Auto Program", "Sony", 
                                                        "Sony KDL40W600B 40-Inch 1080p 60Hz Smart LED TV (2014 Model)", 
                                                        "Stereo", "Tabletop", "TheaterWide", "Toshiba", "Toshiba 29L1350U 29\" 720p LED-LCD TV - 16:9 - HDTV - Audyssey EQ, Audyssey ABX - 3 x HDMI - USB - Media PlayerShow More +", 
                                                        "Toshiba 32SL400U 32-Inch 720p Ultra Thin LED HDTV, Black", "Toshiba 40E200U 40-Inch 1080p LCD HDTV (Black Gloss)", 
                                                        "Toshiba 46G300U 46-Inch 1080p 120 Hz LCD HDTV (Black Gloss)", 
                                                        "Toshiba 55WX800U - 55\" LED TV - 1080p (FullHD)", "TV Wall Mount Kit for Samsung 40\" TV for UN40H6350, UN40H5500, UN40FH6030, UN40H5203, UN40H6400, UN40H4005, UN40H6203, UN40H5003, UN40EH5300, UN40EH5000, UN40HU6950, RM40D, UN40F6300, UN40H5203AF, UN40H5203AFXZA, UN40F5500, UN40EH6000, UN40F6400, UN40ES6100, DB40D, UN40ES6500, UN40B6000, LN40A550, H40B, HG40NA577LF, LN40E550, UN40C6300, LN40C530, UN40H5003AF, UA-40H5100, DM40D, LN40D630, MD40C, LN40D550, UN40EH6030, PE40C, LN40B530, LN40B650, LN40D503, ME40C, LN-S4052D, LN40A530, UN40EH5300FXZAB, LNT4065F, LNT4061F, LN40A750, LNT4071F, LNT4069FX Tvs. Includes 32\"-60\" Flat TV Wall Mount + 2 High Speed Gold Plated HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth.", 
                                                        "Ultra Slim Flat Wall Mount for Samsung 55\" for (ME55C, UN55H6350, UN55H6400, UN55H7150, UN55FH6030, UN55F8000, UN55HU8550, UN55H8000, UN55F9000, UN55H6203, UN55HU9000, UN55F7500, UN55FH6200, UN55FH6003, UN55HU7250, UN55FH6003, UN55F7100, UN55HU8700, UN55HU6950, UN55F6300, UN55F6400, UN55D8000, UN55ES6100, UN55EH6000, UN55ES7500, UN55ES8000, UN55D7000, MD55C, 55UB8500, UN55ES6600, UN55ES7100, UN55B8000, UN55C6300, UN55C7000, UN55C8000, UN55ES6500, UN55D6300, UN55D6400, UN55ES6580, UN55EH6070, UN55B7000, UN55B6000, UN55HU6840, 55UB8200, UN55D6500, UN55ES6003, UN55D6000, UN55B8500, LN55C630, UN55D6050, UN55HU7200, UN55HU7200H, UN55D7050, UN55ES6150, UN55HU6950FXZA, UN55JS9000, UE55D, UN55HU8550FXZA, UN55HU7250FXZA) Tvs. Includes Tilt Wall Mount + 2 HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth", 
                                                        "UN40H5003", "UN46D7900", "UN55ES6003", "UN55FH6003", "VGA (HD-15)", 
                                                        "VGA (HD-15), HDMI", "Vizio", "Vizio M220NV M221NV HDTV Vesa Mount Adapter 1712-0101-7920", 
                                                        "Wi-Fi, LAN", "With stand", "Without stand", "yes", "Yes", "Yes ( 3D glasses sold separately )", 
                                                        "Yes , 1 port(s)"), class = "factor")), 
              .Names = c("GTIN", "Key", 
                         "Value"), class = "data.frame", row.names = c(NA, -300L))

df = setDF(df)

############################
dfa = aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", "))
test1 = spread(dfa, Key, Value, fill = '')

print("My solution")
print(microbenchmark(aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", ")),spread(dfa, Key, Value, fill = " ")),times=100)
cat("\n")

############################
#akrun's solution #1
#Dcast and data.table
test2 = dcast(setDT(df), GTIN~Key, value.var='Value', 
              fun.aggregate=function(x) toString(unique(x)))

############################
#akrun's solution #2
#dplyr 
test3 = df %>%
  group_by(GTIN, Key) %>% 
  summarise(Value=toString(unique(Value))) %>% 
  spread(Key, Value, fill='')

#timing
print("dcast and aggregate")
print(microbenchmark(dcast(setDT(df), GTIN~Key, value.var='Value', 
                           fun.aggregate=function(x) toString(unique(x))),times=100))
cat("\n")

print("dplyr and tidyr")
print(microbenchmark(df %>%
                       group_by(GTIN, Key) %>% 
                       summarise(Value=toString(unique(Value))) %>% 
                       spread(Key, Value, fill=''),times=100))

结果:

[1] "My solution"
Unit: milliseconds
                                                                          expr       min        lq      mean    median
 aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse = ", ")) 40.423792 41.549188 44.496005 43.840846
                                           spread(dfa, Key, Value, fill = " ")  1.421713  1.533474  1.717194  1.626987
       uq       max neval cld
 44.84422 75.155029   100   b
  1.71860  4.422517   100  a 

[1] "dcast and aggregate"
Unit: milliseconds
                                                                                               expr      min       lq
 dcast(setDT(df), GTIN ~ Key, value.var = "Value", fun.aggregate = function(x) toString(unique(x))) 45.48038 47.35578
    mean  median       uq      max neval
 50.2578 48.9037 50.40563 67.42152   100

[1] "dplyr and tidyr"
Unit: milliseconds
                                                                                                             expr      min
 df %>% group_by(GTIN, Key) %>% summarise(Value = toString(unique(Value))) %>%      spread(Key, Value, fill = "") 40.70889
       lq     mean   median       uq      max neval
 41.41405 43.97719 43.06974 43.97275 93.33509   100

1000行的结果显示解决方案#3比#1快10%。行越多 - 越快越快