我有一个拥有公司财务信息的庞大数据库。我想根据特定的日期范围来计算“ LEASE_EXP”列的平均值。
让我计算一下列“ Bal_Stmt_Date”介于“ 1/11/2018”和“ 31/10/2019”之间的所有公司(一起)的LEASE_EXP的平均值。
library(readxl)
library(zoo)
library(dplyr)
library(lubridate)
df <- read.csv("C:\\Users\\talgotra\\Desktop\\Tech Project\\OperatingLease\\finInfo_q.csv")
start = as.Date("1998-11-01")
end = as.Date("2019-10-31")
mean(subset(transform(df, Bal_Stmt_Date = as.Date(Bal_Stmt_Date, '%m/%d/%Y')),
Bal_Stmt_Date >= start & Bal_Stmt_Date <= end, select = LEASE_EXP)[[1]], na.rm = FALSE)
答案 0 :(得分:2)
library(tidyverse) # for data manipulation
library(lubridate) # for dates
df <- tribble( # create a sample dataframe
~Coded_Name, ~Bal_Stmt_Date, ~LEASE_EXP
, 1, 20190304, 42
, 1, 20190305, 42
, 1, 20190307, 42
, 2, 20190304, 42
, 2, 20190305, 42
, 3, 20190306, 42
, 3, 20190304, 42
)
df %>% # take the dataframe
mutate(Bal_Stmt_Date = ymd(Bal_Stmt_Date)) %>% # turn dates into dates
mutate(timeRangeOfInterest = Bal_Stmt_Date > ymd(20190303) & # create a logical variable identifying the time range of interest
Bal_Stmt_Date < ymd(20190306)) %>%
filter(timeRangeOfInterest) %>% # filter out only the time range of interest
group_by(Coded_Name) %>% # and then per company...
summarise(sum_LEASE = sum(LEASE_EXP)) # ...calculate the sum
答案 1 :(得分:1)
在基数R中,您可以执行以下操作:
start = as.Date("2018-11-01")
end = as.Date("2019-10-31")
mean(subset(transform(df, Bal_Stmt_Date = as.Date(Bal_Stmt_Date, '%m/%d/%Y')),
Bal_Stmt_Date >= start & Bal_Stmt_Date <= end, select = LEASE_EXP)[[1]],
na.rm = TRUE)
或者使用dplyr
和lubridate
library(dplyr)
library(lubridate)
df %>%
filter(between(mdy(Bal_Stmt_Date), start, end)) %>%
summarise(mean = mean(LEASE_EXP, na.rm = TRUE))