R:不同列中的和值如何不重复?

时间:2018-06-06 18:18:55

标签: r date

这是我的数据集

的示例
head(visite_cliente_date)
  CodCliente                                  museo       data Month January February March April May June July
1      76330                BORGO E ROCCA MEDIEVALE 2012-12-01    12       0        0     0     0   0    0    0
2      86660                BORGO E ROCCA MEDIEVALE 2012-12-01    12       0        0     0     0   0    0    0
3      87629                BORGO E ROCCA MEDIEVALE 2012-12-01    12       0        0     0     0   0    0    0
4     255734                BORGO E ROCCA MEDIEVALE 2012-12-01    12       0        0     0     0   0    0    0
5       1496 GAM - GALLERIA CIVICA ARTE MODERNA E C 2012-12-01    12       0        0     0     0   0    0    0
6      16689 GAM - GALLERIA CIVICA ARTE MODERNA E C 2012-12-01    12       0        0     0     0   0    0    0
  August September October November Dicember
1      0         0       0        0        1
2      0         0       0        0        1
3      0         0       0        0        1
4      0         0       0        0        1
5      0         0       0        0        1
6      0         0       0        0        1

CodCliente:ID客户

Mueso:博物馆参观了

数据:访问日期

月:访问月份

每个月:我根据“月份”值

为每个月创建一个假人

我想创建一个新变量,其中包含客户去博物馆的月份总和。

示例:如果客户在3月,4月和6月至少去过博物馆一次,则该变量的值为“3”。

示例2:如果客户在六月份去博物馆三次,变量的值为“1”。

提前感谢您的回复!

1 个答案:

答案 0 :(得分:0)

我终于做到了,但是很长一段时间

首先,我为每个月创建了一个数据集来总结访问量。 在这里,我发布了我在1月份所做的事情

Gennaio <- subset(visite_cliente_date, January == 1)
Gennaio$Month <- NULL
Gennaio$February <- NULL
Gennaio$March <- NULL
Gennaio$April <- NULL
Gennaio$May <- NULL
Gennaio$June <- NULL
Gennaio$July <- NULL
Gennaio$August <- NULL
Gennaio$September <- NULL
Gennaio$October <- NULL
Gennaio$November <- NULL
Gennaio$Dicember <- NULL

Gennaio <- aggregate(Gennaio$January, list("ID" = Gennaio$CodCliente), sum)

Gennaio$January <- Gennaio$x
Gennaio$x <- NULL

head(Gennaio)
   ID January
1  41       1
2  94       1
3 106       5
4 111       1
5 113       1
6 118       1

然后我合并了所有数据集,将NA值替换为0

> Visite_mesi <- merge(Gennaio, Febbraio, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Marzo, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Aprile, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Maggio, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Giugno, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Luglio, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Agosto, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Settembre, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Ottobre, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Novembre, by="ID", all=TRUE)
> Visite_mesi <- merge(Visite_mesi, Dicembre, by="ID", all=TRUE)

> Visite_mesi[is.na(Visite_mesi)] <- 0
    > 
   > head(Visite_mesi)
   ID January February March April May June July August September
1  41       1        0     0     0   0    0    0      0         0
2  94       1        2     1     1   1    1    0      0         0
3 106       5        9     6     3   4    6    1      7         7
4 111       1        3     1     1   3    5    0      0         0
5 113       1        1     0     0   0    0    0      0         3
6 118       1        0     1     1   0    1    0      1         0
  October November Dicember
1       0        0        0
2       1        0        0
3       3        4        5
4       0        0        0
5       0        0        0
6       0        0        0

最后,I转换值大于2 in 1

Visite_mesi$January[Visite_mesi$January >= 2] <- 1
Visite_mesi$February[Visite_mesi$February >= 2] <- 1
Visite_mesi$March[Visite_mesi$March >= 2] <- 1
Visite_mesi$April[Visite_mesi$April >= 2] <- 1
Visite_mesi$May[Visite_mesi$May >= 2] <- 1
Visite_mesi$June[Visite_mesi$June >= 2] <- 1
Visite_mesi$July[Visite_mesi$July >= 2] <- 1
Visite_mesi$August[Visite_mesi$August >= 2] <- 1
Visite_mesi$September[Visite_mesi$September >= 2] <- 1
Visite_mesi$October[Visite_mesi$October >= 2] <- 1
Visite_mesi$November[Visite_mesi$November >= 2] <- 1
Visite_mesi$Dicember[Visite_mesi$Dicember >= 2] <- 1

最后我只是总结了月份列中的所有值,所以我创建了我想要的变量

> Visite_mesi$sum_month <- rowSums( Visite_mesi[,2:13] )
> head(Visite_mesi)
   ID January February March April May June July August September
1  41       1        0     0     0   0    0    0      0         0
2  94       1        1     1     1   1    1    0      0         0
3 106       1        1     1     1   1    1    1      1         1
4 111       1        1     1     1   1    1    0      0         0
5 113       1        1     0     0   0    0    0      0         1
6 118       1        0     1     1   0    1    0      1         0
  October November Dicember sum_month
1       0        0        0         1
2       1        0        0         7
3       1        1        1        12
4       0        0        0         6
5       0        0        0         3
6       0        0        0         5