根据乔恩·斯普林(Jon Spring)的建议,我尝试重新编写以下大部分代码。
library(tidyverse)
ka2 <- ka %>%
gather(Year, Export, -c(Economy, Partner)) %>%
group_by(Year) %>%
arrange(Year, -Export) %>%
top_n(10, wt = Export) %>%
ungroup()
ka2$Year <- gsub("X", "", ka2$Year)
ka2$Economy <- NULL
ka2 <- droplevels(ka2)
sapply(ka2, class)
ka2$Year <- as.integer(ka2$Year)
library(ggplot2)
library(scales)
ggplot(ka2, aes(x=reorder(Partner, -Export), y = Export/1000000, fill = Partner)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") +
labs(title = "Kazhakhstan Exports to Largest Partners, 2000-2015",
y = "Bln USD", x = element_blank()) +
facet_wrap(~ Year, scales = 'free_x')
适当时生成的data.frame如下:
structure(list(Partner = c("Switzerland", "Italy", "Russia",
"China", "France", "Iran", "Netherlands", "Israel", "Azerbaijan",
"Spain", "Switzerland", "Italy", "Russia", "France", "China",
"Iran", "Netherlands", "USA", "Israel", "Canada", "Italy", "Switzerland",
"Russia", "China", "France", "Iran", "Netherlands", "UK", "Spain",
"Romania", "Italy", "Switzerland", "China", "Russia", "France",
"Netherlands", "Iran", "UK", "Ukraine", "Israel", "Italy", "Switzerland",
"China", "Russia", "France", "Netherlands", "Israel", "Iran",
"Ukraine", "Turkey", "Italy", "China", "Russia", "France", "Switzerland",
"Netherlands", "Canada", "Ukraine", "Iran", "UK"), Year = c(2004L,
2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L, 2004L,
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L,
2005L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L,
2008L, 2008L, 2008L, 2008L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L), Export = c(3760396L, 3108975L,
2836286L, 1966911L, 1468224L, 712011L, 464600L, 322423L, 287125L,
281365L, 5509511L, 4190531L, 2926578L, 2665146L, 2422507L, 886118L,
877836L, 666028L, 661641L, 528132L, 6891644L, 6721180L, 3730037L,
3592514L, 3346969L, 2077598L, 1704555L, 1143876L, 968365L, 747116L,
7774224L, 7475877L, 5635914L, 4658919L, 3982705L, 2464262L, 2451368L,
1133234L, 1113097L, 1058817L, 11920317L, 11281326L, 7676609L,
6227049L, 5388682L, 4638669L, 2226504L, 2039530L, 2003343L, 1903764L,
6686756L, 5888593L, 3546967L, 3381509L, 2668219L, 2222452L, 1385352L,
1289161L, 1279004L, 1235083L)), .Names = c("Partner", "Year",
"Export"), row.names = c(NA, -60L), class = c("tbl_df", "tbl",
"data.frame"))
然后,我得到的图像就是这样。
但是,您可以看到,即使每年显示的是前十大出口目的地,它们也没有按照降序排列。 data.frame是通过按降序排列值来保护的,但显示并非如此。希望这可以解决。
根据Uwe的链接和建议,我进一步完善了代码,如下所示。
ka2$ord <- rep(10:1,len=120)
ggplot(ka2, aes(x = -ord, y = Export/1000000, fill = Partner)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = comma) +
scale_x_discrete(labels = ka2[, setNames(as.character("Partner"), "ord")]) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") +
labs(title = "Kazhakhstan Exports to Largest Partners, 2004-2015",
y = "Bln USD", x = "Partner") +
facet_wrap(~ Year, scales = "free_x")
它给我的result2如下。
问题出在axis.x标签上。
答案 0 :(得分:1)
使用基数R,我们可以选择要查找其前10个条目的列(在本例中为3:5
),然后对于每个列,我们将order
与{{1 }}以降序获取条目并仅选择前10个条目。然后,我们使用这些条目作为索引来创建一个数据框,并从decreasing = TRUE
列中将其作为子集,以获取国家/地区,最后使用Partner
来获取所有数据框列表cbind
do.call
答案 1 :(得分:0)
这是使用public class Tester {
public static void main(String[] args) {
LinkedList ll = new LinkedList();
ll.push(35);
ll.push(100);
ll.push(14);
ll.push(44);
ll.push(10);
ll.push(8);
System.out.println("Created Linked list is:");
ll.printList();
System.out.println(ll.getSize());
}
}
和tidyverse
的一种方法。
第一部分很简单,将广泛的数据转换成一个长长的列表,并收集每年的前十名。
purrr
要使输出具有重复标题的宽格式,我首先创建一个函数,将年份作为输入,并输出该年份的国家和值。然后,我每年每年使用library(tidyverse)
df2_long <- df %>%
select(-Economy) %>%
gather(year, value, X2000:X2002) %>%
group_by(year) %>%
arrange(year, -value) %>%
top_n(10, wt = value) %>%
ungroup()
运行该功能,然后将它们绑定在一起。
purrr::map
输出
library(purrr)
year_cols <- unique(df2_long$year)
grab_chunk <- function(year_col) {
df2_long %>%
filter(year == year_col) %>%
mutate(Partner = fct_reorder(Partner, -value)) %>% # Added to keep order
spread(year, value)
}
df2_wide <- map(year_cols, grab_chunk) %>%
bind_cols()
答案 2 :(得分:0)
注意:以下答案是指OP完全更改问题之前的原始问题:
原始问题要求将Partner
的排名按每年的降序排列,并以广泛的格式显示排名前10位的合作伙伴以及每年的相应值。
为了完整起见,这也是使用melt()
和dcast()
的答案。
该解决方案可以灵活地与任意数量的年份列X2000
,X2001
等一起使用。
library(data.table)
top <- 10L
val_cols <- c("Partner", "value")
# reshape from wide to long format
long <- melt(setDT(ka), id.vars = c("Economy", "Partner"), variable.name = "year")
# order by decreasing value and append rank for each year
long[order(-value), rank := rowid(year)]
# reshape from long to wide format and keep only the top rows, remove rank
wide <- dcast(long, Economy + rank ~ year, value.var = val_cols)[, rank := NULL][1:top]
# reorder columns for convenience
setcolorder(wide, c("Economy",
as.vector(outer(val_cols, unique(long$year), paste, sep = "_"))))
wide
Economy Partner_X2000 value_X2000 Partner_X2001 value_X2001 Partner_X2002 value_X2002 1: Kazakhstan Russia 1710262 Russia 1733412 Russia 1497738 2: Kazakhstan Italy 917604 Italy 956196 China 1018680 3: Kazakhstan China 672549 China 646651 Italy 904222 4: Kazakhstan Germany 546887 Germany 495533 Switzerland 773596 5: Kazakhstan Switzerland 463171 Ukraine 490215 UAE 472277 6: Kazakhstan Ukraine 254241 Switzerland 378243 Poland 320482 7: Kazakhstan Netherlands 219459 UAE 330340 Iran 309882 8: Kazakhstan UK 219420 UK 271586 Ukraine 291469 9: Kazakhstan Iran 203270 Iran 208925 Germany 219732 10: Kazakhstan USA 176198 Poland 164157 UK 131824
编辑: rank
列已根据OP的请求从结果中删除。
OP发布了一个相关问题reshaping data frame with tidyverse,因为ggplot2
需要长格式的绘图数据。
此答案中的中间结果long
可用于绘制:
long[rank <= 10]
完整的答案:
library(ggplot2)
ggplot(
long[, ord := sprintf("%03i", frank(long, year, -value, ties.method = "first"))][rank <= 10],
aes(x = ord, y = value/1000000, fill = Partner)) +
geom_col() +
scale_y_continuous(labels = scales::comma) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") +
labs(title = "Kazhakhstan Exports to Largest Partners, 2000-2015",
y = "Bln USD", x = element_blank()) +
facet_wrap(~ year, scales = 'free_x', drop = TRUE) +
scale_x_discrete(labels = long[, setNames(as.character(Partner), ord)])
以下是OP在原始问题中提供的数据:
ka <- structure(list(Economy = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Kazakhstan", "Kyrgyzstan", "Tajikistan",
"Turkmenistan", "Uzbekistan"), class = "factor"), Partner = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 16L,
17L, 18L, 19L, 20L, 31L, 21L, 22L, 23L, 25L, 26L, 27L, 28L, 24L,
29L, 30L, 32L, 33L, 35L, 36L, 37L, 34L, 40L, 38L, 39L, 41L, 42L,
15L), .Label = c("Austria", "Azerbaijan", "Bangladesh", "Belarus",
"Belgium", "Canada", "China", "Czechia", "France", "Georgia",
"Germany", "Greece", "Hungary", "India", "Indonesia", "Iran",
"Israel", "Italy", "Japan", "Kazakhstan", "Kyrgyzstan", "Malaysia",
"Mexico", "Moldova", "Mongolia", "Netherlands", "Pakistan", "Poland",
"Romania", "Russia", "SouthKorea", "Spain", "Switzerland", "Tajikistan",
"Thailand", "Turkey", "Turkmenistan", "UAE", "UK", "Ukraine",
"USA", "Uzbekistan"), class = "factor"), X2000 = c(556L, 46816L,
2839L, 16155L, 7584L, 4393L, 672549L, 7740L, 4319L, 7646L, 546887L,
473L, 2717L, 29497L, 203270L, 7033L, 917604L, 9733L, NA, 36430L,
57247L, 10005L, 6342L, 12240L, 219459L, 1455L, 56626L, 1076L,
523L, 1710262L, 6514L, 463171L, 15174L, 62280L, 7100L, 52602L,
254241L, 11008L, 219420L, 176198L, 133526L, NA), X2001 = c(3170L,
69309L, 3111L, 5120L, 3627L, 333L, 646651L, 4167L, 7217L, 2954L,
495533L, 4381L, 7904L, 9279L, 208925L, 3599L, 956196L, 15474L,
NA, 43400L, 83778L, 1374L, NA, 9768L, 141327L, 489L, 164157L,
2709L, 7837L, 1733412L, 6782L, 378243L, 3153L, 74231L, 14178L,
61198L, 490215L, 330340L, 271586L, 142412L, 150234L, NA), X2002 = c(3307L,
112657L, 1170L, 11815L, 7384L, 9492L, 1018680L, 15064L, 27434L,
5238L, 219732L, 30678L, 12626L, 4213L, 309882L, 5839L, 904222L,
22998L, NA, 48866L, 108462L, 2624L, NA, 8329L, 123553L, 438L,
320482L, 2734L, 121044L, 1497738L, 11549L, 773596L, 17849L, 97449L,
15254L, 45740L, 291469L, 472277L, 131824L, 116942L, 101022L,
NA)), row.names = c(NA, -42L), class = "data.frame")
ka
Economy Partner X2000 X2001 X2002 1 Kazakhstan Austria 556 3170 3307 2 Kazakhstan Azerbaijan 46816 69309 112657 3 Kazakhstan Bangladesh 2839 3111 1170 4 Kazakhstan Belarus 16155 5120 11815 5 Kazakhstan Belgium 7584 3627 7384 6 Kazakhstan Canada 4393 333 9492 7 Kazakhstan China 672549 646651 1018680 8 Kazakhstan Czechia 7740 4167 15064 9 Kazakhstan France 4319 7217 27434 10 Kazakhstan Georgia 7646 2954 5238 11 Kazakhstan Germany 546887 495533 219732 12 Kazakhstan Greece 473 4381 30678 13 Kazakhstan Hungary 2717 7904 12626 14 Kazakhstan India 29497 9279 4213 15 Kazakhstan Iran 203270 208925 309882 16 Kazakhstan Israel 7033 3599 5839 17 Kazakhstan Italy 917604 956196 904222 18 Kazakhstan Japan 9733 15474 22998 19 Kazakhstan Kazakhstan NA NA NA 20 Kazakhstan SouthKorea 36430 43400 48866 21 Kazakhstan Kyrgyzstan 57247 83778 108462 22 Kazakhstan Malaysia 10005 1374 2624 23 Kazakhstan Mexico 6342 NA NA 24 Kazakhstan Mongolia 12240 9768 8329 25 Kazakhstan Netherlands 219459 141327 123553 26 Kazakhstan Pakistan 1455 489 438 27 Kazakhstan Poland 56626 164157 320482 28 Kazakhstan Moldova 1076 2709 2734 29 Kazakhstan Romania 523 7837 121044 30 Kazakhstan Russia 1710262 1733412 1497738 31 Kazakhstan Spain 6514 6782 11549 32 Kazakhstan Switzerland 463171 378243 773596 33 Kazakhstan Thailand 15174 3153 17849 34 Kazakhstan Turkey 62280 74231 97449 35 Kazakhstan Turkmenistan 7100 14178 15254 36 Kazakhstan Tajikistan 52602 61198 45740 37 Kazakhstan Ukraine 254241 490215 291469 38 Kazakhstan UAE 11008 330340 472277 39 Kazakhstan UK 219420 271586 131824 40 Kazakhstan USA 176198 142412 116942 41 Kazakhstan Uzbekistan 133526 150234 101022 42 Kazakhstan Indonesia NA NA NA