可以说我有以下数据:
quote_id = c(123,123,123,123,789,789,789,789,456,456,456,456)
Cake = c( 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' )
Egg = c(1,0,0,0,0,0,0,0,0,0,1,0)
Regular_Milk = c(0,0,1,0,0,0,0,0,0,0,0,0)
Almond_Milk = c(0,0,0,0,1,0,0,0,0,0,0,0)
Coconut_Milk = c(0,0,0,0,0,0,0,0,1,0,0,0)
Regular_Sugar = c(0,0,0,0,0,0,0,0,0,1,0,0)
Cane_Sugar = c(0,1,0,0,0,0,1,0,0,0,0,0)
Regular_Flour = c(0,0,0,1,0,0,0,0,0,0,0,0)
Oat_Flour = c(0,0,0,0,0,0,0,0,0,0,0,0)
Wheat_Flour = c(0,0,0,0,0,0,0,0,0,0,0,1)
Almond_Flour = c(0,0,0,0,0,0,0,1,0,0,0,0)
Old_Cake_Data = data.frame(quote_id, Cake , Egg, Regular_Milk, Almond_Milk, Coconut_Milk , Regular_Sugar, Cane_Sugar, Regular_Flour, Oat_Flour, Wheat_Flour, Almond_Flour)
在SQL或R中是否可以获取以下输出:
quote_id = c(123,789,456)
Cake = c( 'chocolate' , 'chocolate' ,'chocolate' )
Egg = c(1,0,1)
Regular_Milk = c(1,0,0)
Almond_Milk = c(0,1,0)
Coconut_Milk = c(0,0,1)
Regular_Sugar = c(0,0,1)
Cane_Sugar = c(1,1,0)
Regular_Flour = c(1,0,0)
Oat_Flour = c(0,0,0)
Wheat_Flour = c(0,0,1)
Almond_Flour = c(0,1,0)
New_Cake_Data = data.frame(quote_id, Cake , Egg, Regular_Milk, Almond_Milk, Coconut_Milk , Regular_Sugar, Cane_Sugar, Regular_Flour, Oat_Flour, Wheat_Flour, Almond_Flour)
我考虑过对每一列求和,但是问题是我需要按quote_id对输出进行排序。
本质上,我希望每个项目ID的输出为一条记录。
答案 0 :(得分:0)
在R中,您可以尝试以下操作:
quote_id = c(123,123,123,123,789,789,789,789,456,456,456,456)
Cake = c( 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' , 'chocolate' )
Egg = c(1,0,0,0,0,0,0,0,0,0,1,0)
Regular_Milk = c(0,0,1,0,0,0,0,0,0,0,0,0)
Almond_Milk = c(0,0,0,0,1,0,0,0,0,0,0,0)
Coconut_Milk = c(0,0,0,0,0,0,0,0,1,0,0,0)
Regular_Sugar = c(0,0,0,0,0,0,0,0,0,1,0,0)
Cane_Sugar = c(0,1,0,0,0,0,1,0,0,0,0,0)
Regular_Flour = c(0,0,0,1,0,0,0,0,0,0,0,0)
Oat_Flour = c(0,0,0,0,0,0,0,0,0,0,0,0)
Wheat_Flour = c(0,0,0,0,0,0,0,0,0,0,0,1)
Almond_Flour = c(0,0,0,0,0,0,0,1,0,0,0,0)
Old_Cake_Data = data.frame(quote_id, Cake , Egg, Regular_Milk, Almond_Milk, Coconut_Milk , Regular_Sugar, Cane_Sugar, Regular_Flour, Oat_Flour, Wheat_Flour, Almond_Flour)
# find quote_id's levels
lev<-levels(as.factor(Old_Cake_Data$quote_id))
# create a dataframe
New_Cake_Data <- Old_Cake_Data[1:length(lev),]
New_Cake_Data$quote_id<-lev
for( i in 1:length(lev)){
d<-which(Old_Cake_Data$quote_id==lev[i])
New_Cake_Data$Cake[i]<-Old_Cake_Data$Cake[d][1]
New_Cake_Data$Egg [i]<-sum(Old_Cake_Data$Egg[d])
New_Cake_Data$Regular_Milk [i]<-sum(Old_Cake_Data$Regular_Milk[d])
New_Cake_Data$Almond_Milk [i]<-sum(Old_Cake_Data$Almond_Milk[d])
New_Cake_Data$Coconut_Milk[i]<-sum(Old_Cake_Data$Coconut_Milk[d])
New_Cake_Data$Regular_Sugar[i]<-sum(Old_Cake_Data$Regular_Sugar[d])
New_Cake_Data$Cane_Sugar [i]<-sum(Old_Cake_Data$Cane_Sugar[d])
New_Cake_Data$Regular_Flour[i]<-sum(Old_Cake_Data$Regular_Flour[d])
New_Cake_Data$Oat_Flour [i]<-sum(Old_Cake_Data$Oat_Flour[d])
New_Cake_Data$Wheat_Flour [i]<-sum(Old_Cake_Data$Wheat_Flour[d])
New_Cake_Data$Almond_Flour [i]<-sum(Old_Cake_Data$Almond_Flour[d])
}
### final data
print(New_Cake_Data)
如果您不知道列数和种类(数字还是字符)
# create a dataframe
New_Cake_Data <- Old_Cake_Data[1:length(lev),]
New_Cake_Data$quote_id<-lev
for( i in 1:length(lev)){
d<-which(Old_Cake_Data$quote_id==lev[i])
k<-i
for(t in 2:ncol(New_Cake_Data)){
if(is.numeric(Old_Cake_Data[,t])){
New_Cake_Data[k,t]<-sum(Old_Cake_Data[d,t])
}else{
New_Cake_Data[k,t]<-Old_Cake_Data[d[1],t]
}
}
}