如何使用许多数据框进行回归

时间:2015-11-02 17:37:00

标签: r regression

我有关于性别和请愿的数据。我想在“女性”(女性)变量和请愿书的不同问题之间做出回归。我已将这些问题重新分解为一般主题下的数据框架,这些主题是我想用“女性”(女性)变量回归的。

P.S。:一些请愿书有许多问题(例如:水+科学)。因此,一个请愿书可以同时计入两个数据框。

1)这是我为所有问题所做的,这是一个带有“原住民”问题的例子,向您展示我如何编码初始问题(您还可以在开头看到“Femme”变量,这是已经在“女性”下的原始数据集中编码为“0”和“1”:

DataPetitions$Femme <- DataPetitions$Female
DataPetitions$Aboriginal <- NA
DataPetitions$Aboriginal[grepl("Aboriginal", DataPetitions$Issue)] <-1 
DataPetitions$Aboriginal[!grepl("Aboriginal", DataPetitions$Issue)] <-0
# ... (same for all 24 specific issues)

2)为一般请愿主题创建7个数据框:

EnvironmentalIssues <- c(DataPetitions$AirQuality,DataPetitions$Biological, DataPetitions$Climate, DataPetitions$Environmental, DataPetitions$Toxic, DataPetitions$Waste, DataPetitions$Water)
EconomicIssues <- c(DataPetitions$Natural, DataPetitions$Transport) 
SocialIssues <- c(DataPetitions$Aboriginal, DataPetitions$Health)   
AgriculturalIssues <- c(DataPetitions$Agriculture,
DataPetitions$Fisheries, DataPetitions$Pesticides)    
PoliticalIssues <- c(DataPetitions$Compliance, DataPetitions$Federal,
DataPetitions$Governance, DataPetitions$International)    
ScientificIssues <- c(DataPetitions$Science)
OtherIssues <- c(DataPetitions$Other)

3)尝试做回归。这是我的glm代码:

model7 <- glm(DataPetitions$Femme ~ SocialIssues + PoliticalIssues +
ScientificIssues + EnvironmentalIssues + EconomicIssues +
AgriculturalIssues + OtherIssues, data = DataPetitions)

# When I try to run it, I get this error message: 

Error in model.frame.default(formula = DataPetitions$Femme ~
SocialIssues +  :    variable lengths differ (found for
'SocialIssues')

使用dput(head(DataPetitions,20)),我明白了:

 [...] class = "factor"), Femme = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 
    0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1), AuMoinsUneFemme = c(1, 
    1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1), 
    Homme = c(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 
    1, 1, 1, 2), AuMoinsUnHomme = c(1, 0, 0, 0, 1, 1, 0, 1, 1, 
    1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1), Individual1 = c(0, 0, 0, 
    1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0), Group1 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1), 
    Organisation1 = c(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 
    0, 0, 1, 0, 0, 0, 0), Aboriginal = c(1, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0), Agriculture = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), 
    AirQuality = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
    0, 0, 1, 0, 0, 0), Biological = c(0, 1, 0, 0, 1, 0, 0, 0, 
    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), Climate = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1), Compliance = c(0, 
    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0), 
    Environmental = c(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), Federal = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Fisheries = c(0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0), Governance = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    Health = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), International = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), Natural = c(0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), Other = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    Pesticides = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), Science = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Toxic = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Transport = c(0, 
    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    Waste = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), Water = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Data.", "Title", "Number",  "Issue", "Petitioner", "Individual", "Group", "Organisation", 
"Female", "Male", "Unknown", "DateReceived", "Status", "Summary", 
"Hyperlink", "Femme", "AuMoinsUneFemme", "Homme", "AuMoinsUnHomme", 
"Individual1", "Group1", "Organisation1", "Aboriginal", "Agriculture",
"AirQuality", "Biological", "Climate", "Compliance", "Environmental", 
"Federal", "Fisheries", "Governance", "Health", "International", 
"Natural", "Other", "Pesticides", "Science", "Toxic", "Transport", 
"Waste", "Water"), row.names = c(NA, 20L), class = "data.frame")

0 个答案:

没有答案