我对R来说还很陌生,我正在动手处理泰坦尼克号数据集(可在线获得)。我正在尝试分析数据。运行功能ggplot时出现以下错误。
library(ggplot2)
library(Amelia)
titanic <- read.csv("D:/Data Science/Datasets-20180505T065250Z- 001/titanic.csv",
header=TRUE)
summary(titanic)
str(titanic)
names(titanic)
# Checking missing values (missing values or empty values)
colSums(is.na(titanic)|titanic=='')
# Explore how much missing data we have
missmap(titanic, main = "Titanic Data - Missing map",
col=c("yellow", "black"), legend = FALSE)
#Next step is to fill missing data
#Missing fare data imputation
#Extract the fare which contains missing data
fare_na <- dplyr::filter(titanic, is.na(titanic$fare)==TRUE|titanic$fare=='')
#fare_na <- subset(titanic,is.na(titanic$fare))
fare_na
fare_na1 <- dplyr::filter(titanic, titanic$ï..pclass==3 & titanic$embarked=="S")
fare_na1
ggplot(dplyr::filter(titanic,
titanic$ï..pclass==3 & titanic$embarked=="S"),
aes(titanic$fare)) +
geom_density(fill="blue", alpha=0.5) +
geom_vline(aes(xintercept=median(titanic$fare, na.rm = T)),
colour='darkblue', linetype='dashed',size=2) +
geom_vline(aes(xintercept=mean(titanic$fare, na.rm = T)),
colour='red', linetype='dashed',size=2) +
ggtitle("Fare details of 3rd class passengers \n embarked from Southampton port") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))