Hello Stack Overflow人,我花了一段时间寻找解决我问题的方法,但没找到任何东西,所以我想发帖。
基本上我有按字母顺序列出的196个国家/地区的数据集。其中一个变量根据该国家所在的地区指定1-10的数字。例如,东欧= 1,西欧= 2,中东= 3,南美= 4,依此类推。
阿富汗------------ 3 ------------------------ 180
阿根廷--------------- 4 ------------------------ 65
法国------------------ 2 ------------------------ 12
德国--------------- 2 ------------------------ 10
波兰------------------ 1 ----------------------- 16
我想知道创建虚拟变量(1 =东欧,0 =其他等)的必要代码是什么,然后如何在单个和多元回归中测试它们的效果。
structure(list(Country.Name = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 11L, 7L, 9L, 10L, 12L, 13L, 14L, 8L, 15L, 17L, 20L, 21L,
22L, 23L, 24L, 18L, 156L, 25L, 26L, 120L, 28L, 16L, 29L, 30L,
31L, 32L, 33L, 160L, 34L, 35L, 36L, 170L, 37L, 38L, 39L, 40L,
41L, 43L, 44L, 45L, 46L, 19L, 47L, 49L, 50L, 51L, 53L, 54L, 57L,
55L, 56L, 58L, 59L, 60L, 48L, 61L, 63L, 62L, 64L, 65L, 88L, 66L,
67L, 68L, 69L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L,
81L, 82L, 42L, 83L, 84L, 86L, 85L, 87L, 89L, 90L, 91L, 92L, 93L,
95L, 96L, 94L, 97L, 98L, 99L, 100L, 101L, 103L, 104L, 105L, 106L,
107L, 108L, 110L, 111L, 112L, 115L, 116L, 114L, 117L, 118L, 119L,
130L, 121L, 122L, 123L, 124L, 189L, 125L, 126L, 127L, 128L, 129L,
113L, 109L, 132L, 131L, 133L, 134L, 135L, 136L, 137L, 138L, 139L,
70L, 174L, 140L, 141L, 142L, 143L, 161L, 162L, 163L, 145L, 146L,
147L, 148L, 149L, 151L, 152L, 153L, 154L, 191L, 155L, 157L, 158L,
194L, 159L, 164L, 165L, 166L, 167L, 168L, 169L, 171L, 173L, 175L,
176L, 177L, 184L, 178L, 179L, 180L, 181L, 182L, 183L, 102L, 52L,
185L, 172L, 186L, 27L, 187L, 188L, 190L, 144L, 192L, 150L, 193L
), .Label = c("Afghanistan", "Albania", "Algeria", "Andorra",
"Angola", "Antigua and Barbuda", "Argentina", "Armenia", "Australia",
"Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh",
"Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan",
"Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei",
"Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon",
"Canada", "Cape Verde", "Central African Republic", "Chad", "Chile",
"China", "Colombia", "Comoros", "Congo", "Congo, Democratic Republic",
"Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czech Republic",
"Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador",
"Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia",
"Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia",
"Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea",
"Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hungary", "Iceland",
"India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy",
"Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati",
"Korea, North", "Korea, South", "Kuwait", "Kyrgyzstan", "Laos",
"Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein",
"Lithuania", "Luxembourg", "Macedonia", "Madagascar", "Malawi",
"Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands",
"Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova",
"Monaco", "Mongolia", "Montenegro", "Morocco", "Mozambique",
"Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand",
"Nicaragua", "Niger", "Nigeria", "Norway", "Oman", "Pakistan",
"Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines",
"Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda",
"Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia",
"Senegal", "Serbia", "Serbia and Montenegro", "Seychelles", "Sierra Leone",
"Singapore", "Slovakia", "Slovenia", "Solomon Islands", "Somalia",
"South Africa", "Spain", "Sri Lanka", "St Kitts and Nevis", "St Lucia",
"St Vincent and the Grenadines", "Sudan", "Suriname", "Swaziland",
"Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania",
"Thailand", "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago",
"Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Uganda", "Ukraine",
"United Arab Emirates", "United Kingdom", "United States", "Uruguay",
"Uzbekistan", "Vanuatu", "Venezuela", "Vietnam", "Yemen", "Zambia",
"Zimbabwe"), class = "factor"), Country.Region = c(8L, 1L, 3L,
5L, 4L, 10L, 1L, 2L, 5L, 5L, 10L, 3L, 8L, 1L, 10L, 5L, 8L, 2L,
1L, 4L, 2L, 10L, 9L, 7L, 1L, 7L, 4L, 1L, 7L, 4L, 5L, 4L, 4L,
8L, 4L, 2L, 6L, 6L, 2L, 4L, 4L, 4L, 2L, 1L, 2L, 3L, 1L, 4L, 5L,
10L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 9L, 5L, 5L, 4L, 4L, 1L, 4L,
5L, 4L, 9L, 5L, 10L, 2L, 4L, 10L, 2L, 2L, 1L, 5L, 8L, 7L, 3L,
3L, 5L, 3L, 5L, 4L, 10L, 6L, 1L, 3L, 4L, 6L, 6L, 3L, 1L, 7L,
3L, 4L, 1L, 4L, 3L, 5L, 1L, 5L, 4L, 4L, 7L, 8L, 4L, 5L, 4L, 4L,
2L, 5L, 6L, 1L, 1L, 3L, 4L, 3L, 4L, 9L, 8L, 5L, 9L, 5L, 2L, 4L,
4L, 5L, 9L, 9L, 9L, 8L, 2L, 9L, 2L, 2L, 7L, 1L, 5L, 4L, 7L, 3L,
1L, 1L, 4L, 10L, 10L, 10L, 5L, 4L, 3L, 4L, 1L, 4L, 4L, 7L, 1L,
7L, 1L, 4L, 4L, 4L, 5L, 4L, 10L, 4L, 5L, 5L, 3L, 1L, 7L, 4L,
9L, 10L, 3L, 3L, 3L, 1L, 9L, 4L, 1L, 1L, 3L, 5L, 4L, 5L, 4L,
2L, 1L, 2L, 9L, 3L, 1L, 4L), Under.5.Mortality.Rate = c(137.3500061,
20.40999985, 30.80999947, 6.579999924, 178.6000061, 22.02000046,
51.13999939, 20.05999947, 6.059999943, 5.46999979, 19.12000084,
11.18999958, 79.55999756, 28.54000092, 19.89999962, 5.639999866,
79.80999756, 56.77999878, 9.569999695, 58.18000031, 28.07999992,
29.54999924, 34.72999954, 9.199999809, 15.46000004, 72.59999847,
145.4600067, 14.72000027, 85.63999939, 132.8600006, 6.480000019,
42.68000031, 150.5, 15.02999973, 185.2100067, 10.13000011, 27.06999969,
7.619999886, 22.79000092, 78.52999878, 113.0199966, 165.1199951,
13.39999962, 7.949999809, 7.730000019, 5.590000153, 6.460000038,
128.3200073, 5.489999771, 20.05999947, 35.97000122, 31.18000031,
30.44000053, 180.1799927, 126.4899979, 95.69000244, 9.210000038,
30.03000069, 4.010000229, 4.949999809, 83.83000183, 80.19999695,
31.62000084, 110.2300034, 4.889999866, 93.91000366, 56.91999817,
6.400000095, 20.76000023, 45.81999969, 163.9900055, 44.61000061,
90.98000336, 33.29999924, 9.079999924, 3.730000019, 79.45999908,
46.09999847, 43.70999908, 39.90000153, 6.769999981, 6.690000057,
5.730000019, 123.8099976, 23.86000061, 4.079999924, 39.5, 21.85000038,
96.69000244, 44.25, 8.93999958, 11.47000027, 49.27000046, 91.37999725,
12.98999977, 105.8600006, 12.56000042, 151.6100006, 19.94000053,
NA, 9.520000458, 4.71999979, 94.59999847, 129.8999939, 8.5, 28.04000092,
199.6399994, 6.849999905, 97.87999725, 16.95999908, 23.54999924,
NA, 52.43000031, 19.60000038, 12.39999962, 46.31000137, 150.2299957,
14.13000011, 61.52000046, NA, 69.44999695, 6.139999866, 32.08000183,
7.300000191, 36.22999954, 205.1699982, 172.3699951, 4.639999866,
34.79000092, 45.15000153, NA, 90.91000366, 22.34000015, 90.08000183,
26.45000076, 36.41999817, 36.63000107, 8.770000458, 6.639999866,
183.9799957, 79.04000092, 12.32999992, 19.40999985, 19.03000069,
142.6300049, NA, 16.13999939, 23.86000061, NA, 67.91999817, 20.27000046,
115.0800018, 7.21999979, 17.17000008, 184.1300049, 3.680000067,
9.020000458, 17.10000038, 4.900000095, 137.2100067, 42.95999908,
74.22000122, 5.260000229, 101.0999985, 42.54000092, 106.4199982,
4.489999771, 5.639999866, 16.70999908, 73.68000031, 12.68999958,
109.0500031, 21.54000092, 32.61999893, 5.940000057, 24.13999939,
37.29999924, 61.74000168, NA, 134.8099976, 18.44000053, 17.59000015,
40.22000122, 6.190000057, 115.1299973, 8.050000191, 161.7100067,
15.60999966, 54.25, 21.29999924, 23.29999924, 85.33999634, NA,
133.4700012)), .Names = c("Country.Name", "Country.Region", "Under.5.Mortality.Rate"
), class = "data.frame", row.names = c(NA, -194L))
答案 0 :(得分:0)
由于您的目标是为每个区域生成模型,我认为您需要的是 plyr 功能,您需要将其作为包安装。对于新的R用户来说,这听起来可能令人生畏,但绝对值得学习,因为它可以轻松地对子集数据进行分析。
models = ddply(df, .(CountryRegion), lm, formula = y ~ x1 + x2)
希望这会有所帮助,并确保您查看 plyr !
答案 1 :(得分:0)
x <- factor(1:10,ordered=FALSE)
contrasts(x) <- contr.treatment(10)