df <- read.csv ('https://raw.githubusercontent.com/ulklc/covid19-
timeseries/master/countryReport/raw/rawReport.csv',
stringsAsFactors = FALSE)
df8 <- read.csv ('https://raw.githubusercontent.com/hirenvadher954/Worldometers-
Scraping/master/countries.csv',
stringsAsFactors = FALSE)
install.packages("tidyverse")
library(tidyverse)
df %>%
left_join(df8, by = c("countryName" = "country_name")) %>%
mutate(population = as.numeric(str_remove_all(population, ","))) %>%
group_by(countryName) %>%
group_by(countryName) %>%
unique() %>%
summarize(population = sum(population, na.rm = TRUE),
confirmed = sum(confirmed, na.rm = TRUE),
recovered = sum(recovered, na.rm = TRUE),
death = sum(death, na.rm = TRUE),
death_prop = paste0(as.character(death), "/", as.character(population))
)
在此代码中
计算了人口/死亡率。
最高人口/死亡率
查找10个国家/地区。
已确认并恢复
不可用。
10 x 6
countryName population confirmed recovered death death_prop
<chr> <dbl> <int> <int> <int> <chr>
1 Afghanistan 4749258212 141652 16505 3796 3796/4749258212
2 Albania 351091234 37233 22518 1501 1501/351091234
3 Algeria 5349827368 206413 88323 20812 20812/5349827368
4 Andorra 9411324 38518 18054 2015 2015/9411324
5 Angola 4009685184 1620 435 115 115/4009685184
6 Anguilla 1814018 161 92 0 0/1814018
7 Antigua and Barbuda 11947338 1230 514 128 128/11947338
8 Argentina 5513884428 232975 66155 10740 10740/5513884428
9 Armenia 361515646 121702 46955 1626 1626/361515646
10 Aruba 13025452 5194 3135 91 91/13025452
数据是一个例子。
信息不正确。
答案 0 :(得分:0)
数据为累积格式,表示今天的所有值都具有直到昨天的所有值。因此,每列仅取max
的值并计算death_prop
。
library(dplyr)
df %>%
left_join(df8, by = c("countryName" = "country_name")) %>%
mutate(population = as.numeric(str_remove_all(population, ","))) %>%
group_by(countryName) %>%
summarise_at(vars(population:death), max, na.rm = TRUE) %>%
mutate(death_prop = death/population * 100) %>%
arrange(desc(death_prop))
# A tibble: 215 x 5
# countryName population year death death_prop
# <chr> <dbl> <dbl> <int> <dbl>
# 1 San Marino 33860 2019 42 0.124
# 2 Belgium 11589623 2020 9312 0.0803
# 3 Andorra 77142 2019 51 0.0661
# 4 Spain 46754778 2020 28752 0.0615
# 5 Italy 60461826 2020 32877 0.0544
# 6 United Kingdom 67886011 2020 36914 0.0544
# 7 France 65273511 2020 28432 0.0436
# 8 Sweden 10099265 2020 4029 0.0399
# 9 Sint Maarten 42388 2019 15 0.0354
#10 Netherlands 17134872 2020 5830 0.0340
# … with 205 more rows