df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-timeseries/master/countryReport/raw/rawReport.csv',
stringsAsFactors = FALSE)
我找到了该代码,该代码找到了报告死亡人数最多并按地区恢复的国家。
yesterday <- function() Sys.Date() - 1L
yesterday()
# [1] "if it doesn't work yesterday()-1 do it"
library(tidyverse)
death_df <- df %>%
filter(as.Date(day) == yesterday()) %>%
group_by(region) %>%
filter(death == max(death)) %>%
select(Date = day,
countryName,
region,
death,
recovered)
recovered_df <- df %>%
filter(as.Date(day) == yesterday()) %>%
group_by(region) %>%
filter(recovered == max(recovered)) %>%
select(Date = day,
countryName,
region,
death,
recovered)
full_df <- bind_rows(death_df, recovered_df)
但是,我需要找到报告死亡人数最多并已康复的国家。
这是我想要的输出:
date countryName death recovered
2020/05/06 united State **19580** 500
2020/05/06 İran 11500 **98567**
请注意,这些值不是真实的。
数据集每天更新。但是,它可能没有更新1 -2天。让我们注意一下。
答案 0 :(得分:0)
这是一种使用dplyr
计算每天所需信息的方法。
library(dplyr)
result <- df %>% group_by(day) %>%
filter(death == max(death) | recovered == max(recovered)) %>%
mutate(death = case_when(death == max(death) ~ paste0("**",death[death == max(death)],"**"),
TRUE ~ as.character(death)),
recovered = case_when(recovered == max(recovered) ~ paste0("**",recovered[recovered == max(recovered)],"**"),
TRUE ~ as.character(recovered)))
result %>%
filter(day == "2020/04/06")
# A tibble: 2 x 9
# Groups: day [1]
day countryCode countryName region lat lon confirmed recovered death
<chr> <chr> <chr> <chr> <dbl> <dbl> <int> <chr> <chr>
1 2020/04/06 CN China Asia 35 105 81708 **77029** 3331
2 2020/04/06 IT Italy Europe 42.8 12.8 132547 22837 **16523**
答案 1 :(得分:0)
下面的代码将选择记录中的最大每日death
和最大每日recovered
以及数据中的最大date
。
## call the dplyr library
library(dplyr)
## read the data into R
df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-timeseries/master/countryReport/raw/rawReport.csv', stringsAsFactors = FALSE)
## determine the max date contained within the data
max.date <- df[which.max(as.Date(df$day)),"day"]
## copy the data to preserve original
df1 <- df
## filter the data to only entries from the max day
df1 <- filter(df1, as.Date(date, "%Y/%m/%d") == as.Date(max.date))
## determine the entry with the most deaths
max.deaths <- df1[which.max(df1$death),]
## format the number of deaths as given in the example
max.deaths$death <- paste0("**",max.deaths$death,"**")
## determine the entry with the most recovered
max.recovered <- df1[which.max(df1$recovered),]
## format the number recovered to match the format of the example
max.recovered$recovered <- paste0("**",max.recovered$recovered,"**")
## create a data frame containing our max death and max recovered entries
max.records <- rbind(max.deaths, max.recovered)
## attach a column with the max date which corresponds to the date of the entries selected
max.records$date <- max.date
## organize the data as shown in the example
max.records <- select(max.records, c("day","countryName","death","recovered"))
并且此代码将计算每个国家的死亡总数(或总数)为totalDeaths
,并将死亡总数恢复为totalRecovered
。然后,它将返回数据中包含最大totalDeath
和最大totalRecovered
以及最大日期的记录。
## call the dplyr library
library(dplyr)
## read the data into R
df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-timeseries/master/countryReport/raw/rawReport.csv', stringsAsFactors = FALSE)
## determine the max date contained within the data
max.date <- df[which.max(as.Date(df$day)),"day"]
## copy the data to preserve the original
df1 <- df
## group the data by countries
df1 <- group_by(df1, countryName)
## sum the death and recovered of each country
df1 <- summarise(df1, totalDeaths = sum(death), totalRecovered = sum(recovered))
## ungroup your data to avoid errors
df1 <- ungroup(df1)
## determine country with most total deaths reported
max.deaths <- df1[which.max(df1$totalDeaths),]
## format death numbers to match example
max.deaths$totalDeaths <- paste0("**",max.deaths$totalDeaths,"**")
## determine country with most total recovered reported
max.recovered <- df1[which.max(df1$totalRecovered),]
## format recovered numbers to match example
max.recovered$totalRecovered <- paste0("**",max.recovered$totalRecovered,"**")
## create a data frame containing our max entries
max.records <- rbind(max.deaths, max.recovered)
## attach a column with the max date which corresponds to the most current date the data reports
max.records$date <- max.date
## organize the data as shown in the example
max.records <- select(max.records, c("day","countryName","death","recovered"))
注意:两种方法都依赖于dplyr
R包。可以通过在R或RStudio中运行dplyr
来安装install.packages(dplyr)
。
我希望这会有所帮助!