我试图创建一个表来理解这些数据。
bsa_2010 <-read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England",
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L,
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"),
RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L,
4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married",
"Not married", "Separated or divorced after marrying", "Widowed"
), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L,
8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express",
"(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror",
"Daily Record", "Daily Star", "Daily Telegraph", "Financial Times",
"Skip,not read paper normally", "The Guardian", "The Independent",
"The Sun/ Scottish Sun", "The Times"), class = "factor"),
PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front",
"Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat",
"None", "Other answer (WRITE IN)", "Other party (WRITE IN)",
"Plaid Cymru", "Refused to say", "Scottish National Party",
"UK Independence Party (UKIP)/Veritas"), class = "factor"),
Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front",
"Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat",
"None", "Other answer", "Other party", "Plaid Cymru", "Refusal",
"Scottish National Party", "UK Independence Party (UKIP)/Veritas"
), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L,
4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour",
"Liberal Democrat", "None", "Other party", "Other/DK/Ref"
), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L,
3L, 4L), .Label = c("(None of these)", "Defence", "Education",
"Health", "Help for industry", "Housing", "Overseas aid",
"Police and prisons", "Public transport", "Roads", "Social security benefits"
), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L,
9L, 10L), .Label = c("(None of these)", "Defence", "Education",
"Health", "Help for industry", "Housing", "Overseas aid",
"Police and prisons", "Public transport", "Roads", "Skip,no 1st priority",
"Social security benefits"), class = "factor"), RClassGp = structure(c(4L,
6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers",
"Intermediate occupations", "Lower supervisory & technical occupations",
"Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations",
"Skip, never had a job+DK+NA last job"), class = "factor"),
RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1",
"1.2", "Intermediate occupations", "Lower managerial and professional occupations",
"Lower supervisory & technical occupations", "Not classified",
"Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
), class = "factor"), CanLegal = structure(c(1L, 1L, 1L,
2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal",
"should be legal, only licenced shops"), class = "factor"),
RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin",
"ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin",
"ASIAN: of other origin (WRITE IN)", "BLACK: of African origin",
"BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)",
"OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"),
Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]",
"(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]",
"(88,98]"), class = "factor"), Agecat2 = structure(c(3L,
1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]",
"(78,98]"), class = "factor")), .Names = c("Country", "RSex",
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1",
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal",
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")
基本上,我试图研究年龄,政治地位和大麻合法化观点之间的关系。
使用的变量是:RAge(年龄)PartyIDN(确定政党)CanLegal(大麻合法化)
为简化起见,我将年龄分类并保留了两个最大的政党。
bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
如何生成一个表格,显示RAge,PartyIDN和CanLegal之间的关系,以便清楚地了解3?
答案 0 :(得分:0)
table()
是一个准系统列表函数,但仍然非常有用:
尝试
table(Parties[c("Agecat1", "PartyIDN", "CanLegal")])
你会得到:
, , CanLegal = should be legal, only licenced shops
PartyIDN
Agecat1 Conservative Labour
(18,28] 7 6
(28,38] 12 17
(38,48] 19 18
(48,58] 7 11
(58,68] 16 14
(68,78] 9 4
(78,88] 2 1
(88,98] 0 1
, , CanLegal = Taking cannabis should remain illegal
PartyIDN
Agecat1 Conservative Labour
(18,28] 14 21
(28,38] 25 29
(38,48] 27 39
(48,58] 23 17
(58,68] 45 43
(68,78] 36 28
(78,88] 16 18
(88,98] 4 2
更新:
要为类别添加标签,请使用cut
&#39; labels
参数
既然你提到你想进一步简化年龄类别(只有三个标签,&#34;千禧一代&#34;,&#34;工作年龄&#34;,&#34;退休人员&#34;)你的代码
你的代码看起来像这样:
bsa_2010$Agecat1 <- cut(bsa_2010$RAge,
breaks = c(18,35,65, max(bsa_2010$RAge)),
labels = c("millennials", "working age", "retirees"))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
tabulation <- table(Parties[c("CanLegal", "Agecat1", "PartyIDN")])
tabulation
结果将如下所示:
, , PartyIDN = Conservative
Agecat1
CanLegal millennials working age retirees
should be legal, only licenced shops 18 37 17
Taking cannabis should remain illegal 27 90 73
, , PartyIDN = Labour
Agecat1
CanLegal millennials working age retirees
should be legal, only licenced shops 21 42 9
Taking cannabis should remain illegal 38 101 58
您还可以使用mosaicplot
:
mosaicplot(tabulation)