
时间:2017-09-12 16:55:47

标签: r join


incomes <- c(1175,1520,1865,2210,2555) # incomes
population <- rep(1/5,5)*100           # population shares (5 times 1/5)

income <- incomes*population/sum(incomes*population) # income * frequency / total income
data <- as.data.frame(cbind(incomes,income,population/100))
names(data) <- c("incomes","income","population")

data <- data[order(as.numeric(data$incomes)),] # sort by percentage of income

for (i in 1:length(income)){
    data$richer[i] <- 1-sum(data$population[1:i])
data$score <- data$income * (data$population + 2 * data$richer)
gini <- round(1-sum(data$score),4) # gini


data$population2 <- data$richer + data$population # cumulative
x <- as.data.frame(matrix(data=NA,ncol=1,nrow=20))
names(x) <- c("population2")
x$population2 <- rev(seq(0.05,1,0.05))

data.graph <- join(x, data, by = "population2")

所以&#39;数据$ population2&#39;变量的值为1,0.8,0.6,0.4,0.2,x $ population2的值为1,0.95,0.9,0.85,0.8等,直到0.05。但是,join函数只加入值为1,0.8,0.2,而不是0.6和0.4的值!任何人都可以帮助我吗?

1 个答案:

答案 0 :(得分:0)

欢迎来到first circle of R hell。 :)


> x$population2
 [1] 1.00 0.95 0.90 0.85 0.80 0.75 0.70 0.65 0.60 0.55 0.50 0.45 0.40 0.35 0.30 0.25 0.20 0.15 0.10 0.05
> data$population2
[1] 1.0 0.8 0.6 0.4 0.2


> x$population2[9]
[1] 0.6
> data$population2[3]
[1] 0.6

> data$population2[3] == x$population2[9]
> all.equal(data$population2[3], x$population2[9]) 
[1] TRUE
# all.equal tolerates numerical differences smaller than 1.5e-8 by default

> print(x$population2[9], digits = 20)
[1] 0.60000000000000009
> print(data$population2[3], digits = 20)
[1] 0.59999999999999987


library(plyr); library(dplyr)

join(x %>% mutate(population2 = round(population2, 3)), 
     data%>% mutate(population2 = round(population2, 3)), 
     by = "population2")

   population2 incomes    income population richer      score
1         1.00    1175 0.1260054        0.2    0.8 0.22680965
2         0.95      NA        NA         NA     NA         NA
3         0.90      NA        NA         NA     NA         NA
4         0.85      NA        NA         NA     NA         NA
5         0.80    1520 0.1630027        0.2    0.6 0.22820375
6         0.75      NA        NA         NA     NA         NA
7         0.70      NA        NA         NA     NA         NA
8         0.65      NA        NA         NA     NA         NA
9         0.60    1865 0.2000000        0.2    0.4 0.20000000
10        0.55      NA        NA         NA     NA         NA
11        0.50      NA        NA         NA     NA         NA
12        0.45      NA        NA         NA     NA         NA
13        0.40    2210 0.2369973        0.2    0.2 0.14219839
14        0.35      NA        NA         NA     NA         NA
15        0.30      NA        NA         NA     NA         NA
16        0.25      NA        NA         NA     NA         NA
17        0.20    2555 0.2739946        0.2    0.0 0.05479893
18        0.15      NA        NA         NA     NA         NA
19        0.10      NA        NA         NA     NA         NA
20        0.05      NA        NA         NA     NA         NA



# use this
data <- data %>% mutate(richer = 1-cumsum(population))

# instead of this
for (i in 1:length(income)){
  data$richer[i] <- 1-sum(data$population[1:i])

对于循环操作在R中相对较慢(在较大的数据集中很明显)。 R针对矢量化操作进行了优化。