Question

我有一个像这样的数据框：

> dput(df)
structure(list(OBBLIGATORIO = structure(c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no", 
"yes"), class = "factor"), COUNTRY = structure(c(16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 16L), .Label = c("Austria", "Belgium", "Bulgaria", 
"Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", 
"Finland", "France", "Germany", "Greece", "Hungary", "Iceland", 
"Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", 
"Norway", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", 
"Spain", "Sweden", "United Kingdom of Great Britain and Northern Ireland"
), class = "factor"), YEAR = c(2003L, 2006L, 2007L, 2008L, 2009L, 
2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 
2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 
2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 
2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 
2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 
2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 
1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 
2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 
2000L, 2001L, 2002L), AGE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Total", class = "factor"), 
    `CAUSE OF DEATH` = c("Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", 
    "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Measles", 
    "Measles", "Measles", "Measles", "Measles", "Measles", "Measles", 
    "Measles", "Measles", "Measles", "Measles", "Measles", "Measles", 
    "Measles", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", 
    "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", 
    "Tetanus", "Tetanus", "Tetanus", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Viral hepatitis", 
    "Viral hepatitis", "Viral hepatitis", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough", 
    "Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough"
    ), VALUE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 6L, 7L, 7L, 1L, 2L, 
    3L, 2L, 5L, 12L, 9L, 13L, 9L, 13L, 8L, 17L, 14L, 16L, 18L, 
    15L, 19L, 11L, 10L, 25L, 24L, 21L, 22L, 23L, 20L, 34L, 32L, 
    31L, 30L, 29L, 28L, 27L, 26L, 41L, 42L, 43L, 45L, 46L, 47L, 
    33L, 35L, 36L, 37L, 38L, 39L, 40L, 44L, 1L, 2L, 1L, 1L, 1L, 
    2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("0", "1", 
    "2", "3", "6", "7", "9", "17", "18", "19", "21", "22", "27", 
    "28", "30", "31", "37", "41", "42", "301", "329", "333", 
    "344", "350", "396", "413", "415", "460", "517", "558", "597", 
    "609", "622", "647", "681", "1087", "1349", "1413", "1448", 
    "1499", "1576", "1654", "1725", "1948", "2531", "2665", "2757"
    ), class = "factor"), ID = 1:98), .Names = c("OBBLIGATORIO", 
"COUNTRY", "YEAR", "AGE", "CAUSE OF DEATH", "VALUE", "ID"), row.names = c(NA, 
-98L), class = "data.frame")

我想获得一张图表：

在x轴上有YEAR列的值
在y轴上有
值除以CAUSE OF DEATH列

类似于：

我试试：

x11()
ggplot(df, aes(x = df$`YEAR`, y = df$`VALUE`, fill = df$`CAUSE OF DEATH`, colour = df$`CAUSE OF DEATH`)) +
  geom_density(alpha = 0.1) +
  xlim(1995, 2010)

但结果与我想要的结果完全不同。

由于

Answer 1

我不确定您的实际问题是什么，但您的数据框的一个问题是VALUE列目前被定义为因子，而不是数字即可。我认为纠正这个问题对解决问题有很大帮助。我在下面事后执行此操作（即在已创建数据框之后），但如果您通过read.table()或类似命令将数据导入R，则可以指定列的class数据帧创建时间，这可能是一种更好的方法。

在下面的代码中，我使用dplyr包来操作数据帧。它非常强大，但对于这个特殊的例子，它没有做任何基于R不能做的事情。

require(ggplot2)
require(dplyr)
require(magrittr)

df <- ### YOUR dput output goes here ###

# fix the problem with the `VALUE` column
df %<>% mutate(VALUE = VALUE %>% as.character %>% as.numeric)

# equivalent in base R:
#   df$VALUE <- as.numeric(as.character(df$VALUE))

# make a graph (is it the one you want?)
df %>% group_by(YEAR, `CAUSE OF DEATH`) %>% 
       summarize(value = sum(VALUE)) %>% 
       ggplot(aes(x = YEAR, y = value, color = `CAUSE OF DEATH`)) +
            geom_line() + 
            theme_bw() +
            geom_point()

# save graph for uploading to SO
ggsave('SO37230266.png')

结果如下图所示：

绘制ggplot geom_lines中的值

1 个答案: