我有一个名为data_v的数据,其中一列是工资。数据范围在0到140 000之间。我想找到不同的范围(范围1:0-10000,范围2:10000-20000 ......)计算每个范围的中位数,并用中位数替换范围。
使用此功能,我可以获得所需的输出:
first = data_v$salaries[data_v$salaries>=0 & data_v$salaries<10000]
data_v$salaries[data_v$salaries>=0 & data_v$salaries<10000] = median(first)
second = data_v$salaries[data_v$salaries>=10000 & data_v$salaries<20000]
data_v$salaries[data_v$salaries>=10000 & data_v$salaries<20000] = median(second)
.............
ten=data_v$salaries[data_v$salaries>=90000 & data_v$salaries<=100000]
data_v$salaries[data_v$salaries >= 90000 & data_v$salaries <= 100000] = median(ten)
输出:
table(data_v$salaries)
median 7949 17523 25939 34302 42827 56840 65423 73292 81900 95479.75
# 130 2022 8481 9233 2661 1270 3864 2232 176 4
我尝试使用while循环实现相同的功能但没有成功:
i <- 0;
while(i <=140000) {
m = data_v$salaries[data_v$salaries >= i & data_v$salaries < (i + 10000)]
data_v$salaries[data_v$salaries >= i & data_v$salaries < (i + 10000)] = median(m)
i <- i + 10000; }
欢迎任何帮助/建议。
答案 0 :(得分:2)
data(mtcars) # data for test
step = 10 # interval length, 10000 for your data
n = ceiling(max(mtcars$mpg)/step) # number of intervals
mtcars$mpg_interval = cut(mtcars$mpg, step*(0:n))
mtcars$mpg_median = ave(mtcars$mpg, mtcars$mpg_interval, FUN = median)