#Evaluation Data – Statistical Analysis
#—-Section_1—-
#Importing the data set
data=read.csv(“C:\\csv\\NBA_improve3.csv”,header=TRUE)
#Question:What factors are most important to win MVP?
#Question:Who is the all time most efficient MVP?
#Summarized the data’s file and variables
summary(data)
summary(data$PTS)
summary(data$Assists)
summary(data$Minutes.Played)
summary(data$Total.Rebounds)
#To view data set
View(data)
#To make sure that all variables are there
data[!is.na(data$Minutes.Played),]
#To check if there is any NA
data[is.na(data$Steals),]
#No need to insert na.string for no NA detected in variables only in co-variables
#—-Section_2—-
#library
#Need to install packages to make plots and data set to run smoothly
install.packages(“lme4”)
#Which includes lmer
install.packages(“splines”)
install.packages(“ggplot2”)
install.packages(“quantreg”)
install.packages(“insight”)
#To make sure that the packages are installed in the library
library(“lme4”)
library(“splines”)
library(“ggplot2”)
library(“quantreg”)
library(“insight”)
#—-Section_3—-
#Using ggplot to graph data
#Graph 1 colored bar graph
ggplot(data=data, aes(x=Season,y=PTS, fill=Player)) + geom_bar(stat=”identity”)+coord_flip()+geom_text(aes(label=Player),nudge_y=-8)+theme(axis.text.x = element_text(angle=90))
#Graph 2 colored scatter plot
ggplot(data=data) + geom_point(mapping = aes(x = Player, y = Minutes.Played, color=PTS),size=4)+theme(axis.text.x=element_text(angle=90))
#Graph 3 colored scatter plot
ggplot(data=data) + geom_point(mapping = aes(x = Player, y = Minutes.Played, color=Assists),size=4)+theme(axis.text.x=element_text(angle=90))
#Graph 4 quantile graph
ggplot (data=data, aes(Total.Rebounds,Player))+geom_point(size=4)+geom_quantile(quantiles=1)
#—-Section_4—-
#Anova checking the hypotheses
#Null hypothesis
data.null = lmer(PTS ~ (1|Player), data=data, REML=FALSE)
#Alternative hypothesis M1
data.M1 = lmer(PTS ~ (1|Player) + Minutes.Played, data=data, REML=FALSE)
#Alternative hypothesis M2
data.M2 = lmer(PTS ~ (1|Player) + Assists, data=data, REML=FALSE)
#Alternative hypothesis M3
data.M3 = lmer(PTS ~ (1|Player) + Minutes.Played + Assists, data=data, REML=FALSE)
#Summary of data
summary(data.null)
summary(data.M1)
summary(data.M2)
summary(data.M3)
#One-way Anova
anova(data.null,data.M1)
anova(data.M1,data.M2)
anova(data.null,data.M3)
#Hypothesis residuals plots
plot(data.null, main = “Null hypothesis”, xlab = “Fitted Values for points per player”, ylab = “Residuals”)
plot(data.M1, main = “Hypothesis M1”, xlab = “Fitted Values for points per player with minutes played”, ylab = “Residuals”)
plot(data.M2, main = “Hypothesis M2”, xlab = “Fitted Values for points per player with assists”, ylab = “Residuals”)
plot(data.M3, main = “Hypothesis M3”, xlab = “Fitted Values for points per player with minutes played and assists”, ylab = “Residuals”)
#R^2 values calculation – the conditional R^2 is the fixed+random effects variance divided by the total variance, and indicates how much of the model variance is explained by your complete model
#var.fixed – variance with fixed effect
#var.random – variance of random effect
#var.residual-residual variance (sum of dispersion)
varsn<-insight::get_variance(data.null)
r2_conditionaln <- (varsn$var.fixed + varsn$var.random) / (varsn$var.fixed + varsn$var.random + varsn$var.residual)
print(r2_conditionaln)
varsn_1<-insight::get_variance(data.M1)
r2_conditionaln_1 <- (varsn_1$var.fixed + varsn_1$var.random) / (varsn_1$var.fixed + varsn_1$var.random + varsn_1$var.residual)
print(r2_conditionaln_1)
varsn_2<-insight::get_variance(data.M2)
r2_conditionaln_2 <- (varsn_2$var.fixed + varsn_2$var.random) / (varsn_2$var.fixed + varsn_2$var.random + varsn_2$var.residual)
print(r2_conditionaln_2)
varsn_3<-insight::get_variance(data.M3)
r2_conditionaln_3 <- (varsn_3$var.fixed + varsn_3$var.random) / (varsn_3$var.fixed + varsn_3$var.random + varsn_3$var.residual)
print(r2_conditionaln_3)
