Question

我正在尝试在r studio中为以下代码创建R markdown文件。此代码是为R编程平台中的数据分析项目完成的。但是r studio创建的HTML文档包含我的名字和今天的日期。

我使用ctrl + shift + k创建R markdown HTML代码。我需要在R markdown HTML文档中显示所有代码并输出什么？ Here is the R markdown HTML document output.

#Import the .csv data and store it as a data frame.
remove(list=ls())
setwd("D:/Data Visualisation")
school_df<-read.csv("school_performance_updated.csv",header=TRUE,sep = ",",stringsAsFactors = FALSE)
#View the data
View(school_df)
#Verify that the imported data is stored as a data frame.
class(school_df)
#Check the dimension of the data frame
dim(school_df)
#View the summary statistics of all the columns of the data frame.
summary(school_df)

#Step 2 Correcting the errors
#Check the class of each columns of data in order to verify if any 
#columns have incorrect data type
lapply(school_df, class)
#checking for any missing values in the entire data frame
any(is.na(school_df))
#Finding out the school information of particular district with highest pass percentage rate

dist_passrate<-school_df[school_df$District=="Kathmandu"&school_df$PASS.PERCENT==max(school_df$PASS.PERCENT),]
dist_passrate
#Finding out the school information of particular zone with highest pass percentage rate
zone_passrate<-school_df[school_df$Zone=="Bagmati"&school_df$PASS.PERCENT==max(school_df$PASS.PERCENT),]
zone_passrate
#Finding out the school information of particular Geographical region with highest pass percentage rate
gregion_passrate<-school_df[school_df$Geographical.Region=="Mountain"&school_df$PASS.PERCENT==max(school_df$PASS.PERCENT),]
gregion_passrate

#Finding out the school information of particular Development Region with highest pass percentage rate
dregion_passrate<-school_df[school_df$Development.Region=="Central"&school_df$PASS.PERCENT==max(school_df$PASS.PERCENT),]
dregion_passrate



library(tidyverse)
#selecting the columns with only the numeric class from school_df data frame
clustering_school<-school_df%>% select_if(is.numeric)
#scaling data for easy computation of k mean clustering
school_kmean<-scale(clustering_school[-1])
#calculating k means
kmf<-kmeans(school_kmean,3)
#checking the attributes of cluster such as cluster size, centroid etc
attributes(kmf)
kmf$size
kmf$cluster
#check which data falls into which cluster 
c1<-cbind(kmf$cluster)
c1
library(cluster)
clusplot(school_kmean,kmf$cluster,main = "2D representation of the cluster",shade=TRUE,labels=2,lines=0)
View(school_kmean)
#Simple Correlation between english and nepali
cor(school_df$ENGLISH,school_df$NEPALI)

#install.packages("psych")
library(psych)
#Data Profiling
#calculating the number of rows, mean standard deviation, median , trimmed, mean absoulte deviation,min, max range ,skew , kurtosis and standard errors.  
describe(clustering_school)

#Data Visualization
#install.packages("ggplot2")
#plotting histogram of pass percentage vs its frequency
hist(school_df$PASS.PERCENT,main="Pass percentage vs its frequency",xlab = "Pass_percent",col="blue",border = "green",xlim = c(0,100),ylim = c(0,1500),breaks=10,labels = TRUE)
#finding out the mean pass percentage value of the Mountain Region and creating bar diagram
mountain_pass<-filter(school_df, school_df$Geographical.Region=="Mountain")
mean_mountain_pass<-mean(mountain_pass$PASS.PERCENT)
#finding out the mean pass percentage value of the hill Region
hill_pass<-filter(school_df, school_df$Geographical.Region=="Hill")
mean_hill_pass<-mean(hill_pass$PASS.PERCENT)
#finding out the mean pass percentage value of the Terai Region
terai_pass<-filter(school_df, school_df$Geographical.Region=="Terai")
mean_terai_pass<-mean(terai_pass$PASS.PERCENT)
#combining the mean pass percentage value of all the geographic region
data<-data.frame(mean_mountain_pass,mean_hill_pass,mean_terai_pass)
#converting the data data frame into the matrix form in order to plot bar diagram
data<-data.matrix(data)
class(data)
#bar diagram for the geographical region vs the mean pass percentage
xx<-barplot(data,xlab="Mean Value of pass percentage of each Geographical Region",ylab="mean Value ",ylim = c(0,80))
text(x = xx, y = data, label = data, pos = 3, cex = 0.8, col = "red")

#bar diagram according to the geographical region and its mean pass percentage value
eastern_pass<-filter(school_df,school_df$Development.Region=="Eastern")
mean_ER<-mean(eastern_pass$PASS.PERCENT)
central_pass<-filter(school_df,school_df$Development.Region=="Central")
mean_CR<-mean(central_pass$PASS.PERCENT)
far_western_pass<-filter(school_df,school_df$Development.Region=="Far-Western")
mean_FWR<-mean(far_western_pass$PASS.PERCENT)
mid_western_pass<-filter(school_df,school_df$Development.Region=="Mid-Western")
mean_MWR<-mean(mid_western_pass$PASS.PERCENT)
western_pass<-filter(school_df,school_df$Development.Region=="Western")
mean_WR<-mean(western_pass$PASS.PERCENT)

data1<-data.frame(mean_CR,mean_ER,mean_WR,mean_FWR,mean_MWR)
#converting the data data frame into the matrix form in order to plot bar diagram
data1<-data.matrix(data1)
class(data1)
data1

#bar diagram for the geographical region vs the mean pass percentage
xx<-barplot(data1,xlab="Mean Value of pass percentage of each Development Region",ylab="mean Pass Percentage ",ylim = c(0,100))
text(x = xx, y = data1, label = data1, pos = 3, cex = 0.8, col = "red")

#finding out the relation between english and mathematics using scatter plot
plot(school_df$ENGLISH,school_df$MATHS)
#finding out the correlation 
cor(school_df$ENGLISH,school_df$MATHS)
#finding out the simple linear regression model
r<-lm(MATHS~ENGLISH,data = school_df)
#add regression line
abline(r)
summary(r)
#names to access regression object
names(r)
#fitted values for x english and plot fitted y maths
r$fitted.values
#same thing can be done in this way
fitted(r)
plot(school_df$ENGLISH,r$fitted)
#what if you want to make prediction for certain marks of english i.e. 40?
  #manually
#estimate+english intercept * predicted values
5.847249+1.164421*40
  #using coefficient  
coef(r)
r$coefficients[1]+r$coefficients[2]*40
#predict command
predict(r,data.frame(ENGLISH=40))
#for multiple prediction values may be 50, 80, 100
predict(r,data.frame(ENGLISH=c(50,80)))

#linear mode l
pass_model<-lm(school_df$ENGLISH~school_df$NEPALI,data = school_df)
summary(pass_model)
# R square is square of correlation (R). 
# R square is the percentage of variation in output variable captured by our input variable. 
# 0 R square means no variation in output captured by input, 1 means all variation in input captured by output.

### Residuals / Errors test 
### Residuals should be random and near to normal distribution 

pass_model$residuals
plot(pass_model$residuals)
hist(pass_model$residuals)
plot(density(pass_model$residuals))```

R markdown仅显示用户名和创建日期，不显示代码

0 个答案: