我是R新手。很抱歉提出基本问题。 我有“数据”,包括三列(作为示例)命名数据$ engine,data $ unit和data $ AvailableLeft。 data $ AvailableLeft是dummy(0或1)。而对于数据$ engine的每个唯一值,可以有几个数据值$ unit。我想分别计算数据$ engine和data $ unit的数据$ AvailableLeft中“1”的百分比。我有几十万行,但只粘贴前13行。
data$engine data$unit data$AvailableLeft
10158 207 1
10158 207 0
10158 207 1
10158 207 0
10147 142 1
10147 142 1
10147 142 1
10147 142 0
10147 142 1
10147 142 0
10147 142 1
10161 244 0
10161 244 0
data$engine data$unit Percentage
10158 207 20%
10147 142 10%
10161 244 3%
. . .
. . .
. . .
##calculate the percentage of "1s" for whole data and not for each data$engine and data$unit
# tried to do it in parts but was not able to divide the two columns at last...
df11 <- data.frame(data$engine, data$unit, data$AvailbleLeft)
leftwarn1=aggregate(data$AvailableLeft ~ data$engine + data$unit, data = df11, sum) #Counting number of "1s" per unit per engine
leftwarn10 = count(data$AvailableLeft == 0, c("data$engine","data$unit")) #counting number of "1 and 0" per unit per engine
答案 0 :(得分:0)
dta <- read.table(text = "
data$engine data$unit data$AvailableLeft
10158 207 1
10158 207 0
10158 207 1
10158 207 0
10147 142 1
10147 142 1
10147 142 1
10147 142 0
10147 142 1
10147 142 0
10147 142 1
10161 244 0
10161 244 0",
header = TRUE)
# dta[, 3] for example, returns the third column.
# aggregate as its helpfile (?aggregate) says allows you to compute summary statistics of data subsets
aggregate(dta[, 3], by = list(dta[, 1], dta[, 2]), mean)
答案 1 :(得分:0)
"/dta.txt"), header = TRUE) # I transcribed your examble above to notepad and
# saved it as dta.txt, so I could read the table in R
enginevalues<-unique(data$engine) # Unique values of "engine" column
unitvalues<-unique(data$unit) # Unique values of "unit" column
output<-matrix(ncol=3) # Matrix where I stored the outputs
digitsafterdot<-2 # Number of digits after dot (or comma, whathever)
# After, I did two for loops: one for "engine" and other for "unit" values
# You can understand it as a combinatorial analysis
for(eng in enginevalues){
for(un in unitvalues){
# Percentage: Number of 1's x 100 divided by the total number of AvailableLeft values
percentage<-round(sum(dtunit[,"AvailableLeft"] == 1)*100/nrow(dtunit),
# Division by zero is not allowed!
if(nrow(dtunit) == 0) percentage<-0
output<-output[-1,] # Just removing the initial NA values
colnames(output)<-c("engine","unit","percentage") # Renaming the output
# engine unit percentage
# [1,] 10158 207 50.00
# [2,] 10158 142 0.00
# [3,] 10158 244 0.00
# [4,] 10147 207 0.00
# [5,] 10147 142 71.43
# [6,] 10147 244 0.00
# [7,] 10161 207 0.00
# [8,] 10161 142 0.00
# [9,] 10161 244 0.00
# Output without zero values
# engine unit percentage
# [1,] 10158 207 50.00
# [2,] 10147 142 71.43
答案 2 :(得分:0)
df11 <- data.frame(data$engine, data$unit, data$AvailableLeft)
warn = aggregate(data$AvailableLeft ~ data$engine + data$unit, data = df11, mean)
答案 3 :(得分:0)
dta <- read.table(text = "
data$engine data$unit data$AvailableLeft
10158 207 1
10158 207 0
10158 207 1
10158 207 0
10147 142 1
10147 142 1
10147 142 1
10147 142 0
10147 142 1
10147 142 0
10147 142 1
10161 244 0
10161 244 0",
header = TRUE)
dt <- as.data.table(dta)
data.engine data.unit V1
1: 10158 207 50.00000
2: 10147 142 71.42857
3: 10161 244 0.00000
data.engine data.unit V1
1: 10158 207 50 %
2: 10147 142 71.43 %
3: 10161 244 0 %
答案 4 :(得分:-1)
subset(as.data.frame(with(df, prop.table(table(engine, unit, AvailableLeft))*100)), AvailableLeft==1, select=-AvailableLeft)
df <- read.table(col.names=c("engine", "unit", "left"), text="
10158 207 1
10158 207 0
10158 207 1
10158 207 0
10147 142 1
10147 142 1
10147 142 1
10147 142 0
10147 142 1
10147 142 0
10147 142 1
10161 244 0
10161 244 0")
subset(as.data.frame(with(df, prop.table(table(engine, unit, left))*100)), left==1, select=-left)
# engine unit Freq
# 10 10147 142 38.46154
# 11 10158 142 0.00000
# 12 10161 142 0.00000
# 13 10147 207 0.00000
# 14 10158 207 15.38462
# 15 10161 207 0.00000
# 16 10147 244 0.00000
# 17 10158 244 0.00000
# 18 10161 244 0.00000