R Project


#Evaluation Data – Statistical Analysis

#—-Section_1—-

#Importing the data set

data=read.csv(“C:\\csv\\NBA_improve3.csv”,header=TRUE)

#Question:What factors are most important to win MVP?

#Question:Who is the all time most efficient MVP?

#Summarized the data’s file and variables

summary(data)

summary(data$PTS)

summary(data$Assists)

summary(data$Minutes.Played)

summary(data$Total.Rebounds)

#To view data set

View(data)

#To make sure that all variables are there

data[!is.na(data$Minutes.Played),]

#To check if there is any NA

data[is.na(data$Steals),]

#No need to insert na.string for no NA detected in variables only in co-variables

#—-Section_2—-

#library

#Need to install packages to make plots and data set to run smoothly

install.packages(“lme4”)

#Which includes lmer

install.packages(“splines”)

install.packages(“ggplot2”)

install.packages(“quantreg”)

install.packages(“insight”)

#To make sure that the packages are installed in the library

library(“lme4”)

library(“splines”)

library(“ggplot2”)

library(“quantreg”)

library(“insight”)

#—-Section_3—-

#Using ggplot to graph data

#Graph 1 colored bar graph

ggplot(data=data, aes(x=Season,y=PTS, fill=Player)) + geom_bar(stat=”identity”)+coord_flip()+geom_text(aes(label=Player),nudge_y=-8)+theme(axis.text.x = element_text(angle=90))

#Graph 2 colored scatter plot

ggplot(data=data) + geom_point(mapping = aes(x = Player, y = Minutes.Played, color=PTS),size=4)+theme(axis.text.x=element_text(angle=90))

#Graph 3 colored scatter plot

ggplot(data=data) + geom_point(mapping = aes(x = Player, y = Minutes.Played, color=Assists),size=4)+theme(axis.text.x=element_text(angle=90))

#Graph 4 quantile graph

ggplot (data=data, aes(Total.Rebounds,Player))+geom_point(size=4)+geom_quantile(quantiles=1)

#—-Section_4—-

#Anova checking the hypotheses

#Null hypothesis

data.null = lmer(PTS ~ (1|Player), data=data, REML=FALSE)

#Alternative hypothesis M1

data.M1 = lmer(PTS ~ (1|Player) + Minutes.Played, data=data, REML=FALSE)

#Alternative hypothesis M2

data.M2 = lmer(PTS ~ (1|Player) + Assists, data=data, REML=FALSE)

#Alternative hypothesis M3

data.M3 = lmer(PTS ~ (1|Player) + Minutes.Played + Assists, data=data, REML=FALSE)

#Summary of data

summary(data.null)

summary(data.M1)

summary(data.M2)

summary(data.M3)

#One-way Anova

anova(data.null,data.M1)

anova(data.M1,data.M2)

anova(data.null,data.M3)

#Hypothesis residuals plots

plot(data.null, main = “Null hypothesis”, xlab = “Fitted Values for points per player”, ylab = “Residuals”)

plot(data.M1, main = “Hypothesis M1”, xlab = “Fitted Values for points per player with minutes played”, ylab = “Residuals”)

plot(data.M2, main = “Hypothesis M2”, xlab = “Fitted Values for points per player with assists”, ylab = “Residuals”)

plot(data.M3, main = “Hypothesis M3”, xlab = “Fitted Values for points per player with minutes played and assists”, ylab = “Residuals”)

#R^2 values calculation – the conditional R^2 is the fixed+random effects variance divided by the total variance, and indicates how much of the model variance is explained by your complete model

#var.fixed – variance with fixed effect

#var.random – variance of random effect

#var.residual-residual variance (sum of dispersion)

varsn<-insight::get_variance(data.null)

r2_conditionaln <- (varsn$var.fixed + varsn$var.random) / (varsn$var.fixed + varsn$var.random + varsn$var.residual)

print(r2_conditionaln)

varsn_1<-insight::get_variance(data.M1)

r2_conditionaln_1 <- (varsn_1$var.fixed + varsn_1$var.random) / (varsn_1$var.fixed + varsn_1$var.random + varsn_1$var.residual)

print(r2_conditionaln_1)

varsn_2<-insight::get_variance(data.M2)

r2_conditionaln_2 <- (varsn_2$var.fixed + varsn_2$var.random) / (varsn_2$var.fixed + varsn_2$var.random + varsn_2$var.residual)

print(r2_conditionaln_2)

varsn_3<-insight::get_variance(data.M3)

r2_conditionaln_3 <- (varsn_3$var.fixed + varsn_3$var.random) / (varsn_3$var.fixed + varsn_3$var.random + varsn_3$var.residual)

print(r2_conditionaln_3)

Leave a comment