Question

我试图创建一个表来理解这些数据。

bsa_2010 <-read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England", 
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L, 
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L, 
    4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married", 
    "Not married", "Separated or divorced after marrying", "Widowed"
    ), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L, 
    8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express", 
    "(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror", 
    "Daily Record", "Daily Star", "Daily Telegraph", "Financial Times", 
    "Skip,not read paper normally", "The Guardian", "The Independent", 
    "The Sun/ Scottish Sun", "The Times"), class = "factor"), 
    PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer (WRITE IN)", "Other party (WRITE IN)", 
    "Plaid Cymru", "Refused to say", "Scottish National Party", 
    "UK Independence Party (UKIP)/Veritas"), class = "factor"), 
    Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer", "Other party", "Plaid Cymru", "Refusal", 
    "Scottish National Party", "UK Independence Party (UKIP)/Veritas"
    ), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L, 
    4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour", 
    "Liberal Democrat", "None", "Other party", "Other/DK/Ref"
    ), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L, 
    3L, 4L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Social security benefits"
    ), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L, 
    9L, 10L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Skip,no 1st priority", 
    "Social security benefits"), class = "factor"), RClassGp = structure(c(4L, 
    6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers", 
    "Intermediate occupations", "Lower supervisory & technical occupations", 
    "Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations", 
    "Skip, never had a job+DK+NA last job"), class = "factor"), 
    RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1", 
    "1.2", "Intermediate occupations", "Lower managerial and professional occupations", 
    "Lower supervisory & technical occupations", "Not classified", 
    "Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
    ), class = "factor"), CanLegal = structure(c(1L, 1L, 1L, 
    2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal", 
    "should be legal, only licenced shops"), class = "factor"), 
    RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin", 
    "ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin", 
    "ASIAN: of other origin (WRITE IN)", "BLACK: of African origin", 
    "BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)", 
    "OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"), 
    Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]", 
    "(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]", 
    "(88,98]"), class = "factor"), Agecat2 = structure(c(3L, 
    1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]", 
    "(78,98]"), class = "factor")), .Names = c("Country", "RSex", 
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1", 
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal", 
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")

基本上，我试图研究年龄，政治地位和大麻合法化观点之间的关系。

使用的变量是：RAge（年龄）PartyIDN（确定政党）CanLegal（大麻合法化）

为简化起见，我将年龄分类并保留了两个最大的政党。

   bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))

    Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")

   Parties$PartyIDN <- factor(Parties$PartyIDN)

如何生成一个表格，显示RAge，PartyIDN和CanLegal之间的关系，以便清楚地了解3？

Answer 1

table()是一个准系统列表函数，但仍然非常有用：

尝试

table(Parties[c("Agecat1", "PartyIDN", "CanLegal")])

你会得到：

, , CanLegal = should be legal, only licenced shops

         PartyIDN
Agecat1   Conservative Labour
  (18,28]            7      6
  (28,38]           12     17
  (38,48]           19     18
  (48,58]            7     11
  (58,68]           16     14
  (68,78]            9      4
  (78,88]            2      1
  (88,98]            0      1

, , CanLegal = Taking cannabis should remain illegal

         PartyIDN
Agecat1   Conservative Labour
  (18,28]           14     21
  (28,38]           25     29
  (38,48]           27     39
  (48,58]           23     17
  (58,68]           45     43
  (68,78]           36     28
  (78,88]           16     18
  (88,98]            4      2

更新：

要为类别添加标签，请使用cut＆＃39; labels参数既然你提到你想进一步简化年龄类别（只有三个标签，＆＃34;千禧一代＆＃34;，＆＃34;工作年龄＆＃34;，＆＃34;退休人员＆＃34;）你的代码你的代码看起来像这样：

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, 
                        breaks = c(18,35,65, max(bsa_2010$RAge)), 
                        labels = c("millennials", "working age", "retirees"))

Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")

Parties$PartyIDN <- factor(Parties$PartyIDN)


tabulation <- table(Parties[c("CanLegal", "Agecat1", "PartyIDN")])

tabulation

结果将如下所示：

, , PartyIDN = Conservative

                                       Agecat1
CanLegal                                millennials working age retirees
  should be legal, only licenced shops           18          37       17
  Taking cannabis should remain illegal          27          90       73

, , PartyIDN = Labour

                                       Agecat1
CanLegal                                millennials working age retirees
  should be legal, only licenced shops           21          42        9
  Taking cannabis should remain illegal          38         101       58

您还可以使用mosaicplot：

轻松绘制表格

mosaicplot(tabulation)

使用3个变量将R中的数据制表？

1 个答案: