Question

假设我有一个文件，其中包含2列与此相似的列：

     ID   Order
      2       1
     10       2
     70       3
     85       4
     70       5
    213       6
      2       7
    293       8
     10       9
    313      10
    ...     ...

其中IDs包含1到1000之间的数字，每个数字出现4次。在Order中，数字从1到4000。

R中是否可以通过以下方式修改Order列，使其对于以下3个相同的ID将Order替换为与第一次出现的位置对应的Order？对于示例数据集，它将例如输出以下内容：

     ID   Order
      2       1
     10       2
     70       3
     85       4
     70       3
    213       6
      2       1
    293       8
     10       2
    313      10
    ...     ...

Answer 1

您可以使用dplyr库轻松完成此操作。您可以按ID分组，然后使用mutate函数查找每个ID的最小订购量。

library(dplyr)
df %>% 
  group_by(ID) %>% 
  mutate(Order = min(Order)) %>% 
  ungroup()

# A tibble: 10 x 2
      ID Order
   <int> <dbl>
 1     2     1
 2    10     2
 3    70     3
 4    85     4
 5    70     3
 6   213     6
 7     2     1
 8   293     8
 9    10     2
10   313    10

Answer 2

基本R：

    private static long getRemainingCompanyBalance ( long initialBalance, List<ArrayList<PersonalExpense>> total) {
        long remainingBalance = initialBalance;
        for (List<PersonalExpense> departmentExpense : total) {
            for (PersonalExpense personalExpense : departmentExpense) {
                System.out.println(personalExpense.getName());
                remainingBalance = remainingBalance - personalExpense.getCurrentExpenses();
            }
        }
        return remainingBalance;
    }

    public static long getRemainingCompanyBalanceLambda ( long initialBalance, List<ArrayList<PersonalExpense>> total) {
        long remainingBalance = initialBalance;


        Integer sum = total
        .stream()
        .flatMap(Collection::stream)
        .filter(pe -> pe instanceof PersonalExpense)
        .map (pe -> (PersonalExpense) pe)
        .collect(Collectors.toList())
        .mapToInt(PersonalExpense::getCurrentExpenses)
        .sum();


        return remainingBalance -= sum;
    }

}

data.table：

df$Order <- with(df, ave(Order, ID, FUN = `[`, 1))

Answer 3

另一种选择是过滤第一次出现的事件，然后重新加入原始ID的完整列表

library(dplyr)
library(readr)
library(purrr)

df <- read_table2(" ID   Order
      2       1
     10       2
     70       3
     85       4
     70       5
    213       6
      2       7
    293       8
     10       9
    313      10")

df %>% 
  filter(!duplicated(ID)) %>% 
  list(., df %>% select(ID)) %>%
  reduce(full_join, by = "ID")
#> # A tibble: 10 x 2
#>       ID Order
#>    <int> <int>
#>  1     2     1
#>  2     2     1
#>  3    10     2
#>  4    10     2
#>  5    70     3
#>  6    70     3
#>  7    85     4
#>  8   213     6
#>  9   293     8
#> 10   313    10

由reprex package（v0.2.0）于2018-09-25创建。

R-根据另一列的首次出现来修改列值

3 个答案: