R code

R is a programming language and free software environment for statistical computing and graphics supported by the R Foundation for Statistical Computing. The R language is widely used among statisticians and data miners for developing statistical software and data analysis.

R code

setwd("C:/Users/243886/OneDrive - Universitetet i Stavanger/ML-5spot-SGS/Rcode")
NPVpl <- data.frame(N)
NPVpl <- NPVpl[, c(1)]
input1 <- data.frame(input)
geoin <- read.csv('INPUTPCA489.csv',nrows = F)
Final_Input <- cbind(input1,NPVpl)
install.packages(c("e1071", "caret", "doSNOW", "ipred", "xgboost"))
install.packages(c('lattice','ggplot2'))
library(caret)
library(doSNOW)
INPUTMEAN <- data.frame(INPUTMEAN)
train <- INPUT
#=================================================================
# Data Wrangling
#=================================================================
# Set up factors.
# Subset data to features we wish to keep/use.

features <- c('MC1','MC2','MC3','MC4','MPV','NPVpl')
c('En1C1','En2C1','En3C1','En4C1','En5C1','En6C1','En7C1','En8C1','En9C1',
              'En10C1','En1C2','En2C2','En3C2','En4C2','En5C2','En6C2','En7C2','En8C2'
              ,'En9C2','En10C2','En1C3','En2C3','En3C3','En4C3','En5C3','En6C3','En7C3',
              'En8C3','En9C3','En10C3','En1C4','En2C4','En3C4','En4C4','En5C4','En6C4',
              'En7C4','En8C4','En9C4','En10C4','En1PV','En2PV','En3PV','En4PV','En5PV',
              'En6PV','En7PV','En8PV','En9PV','En10PV','NPVpl')
train <- train[, features]

#=================================================================
# Split Data
#=================================================================

names(train)[26]<-"NPVpl"
# Use caret to create a 70/30% split of the training data,
# keeping the proportions of the Survived class label the
# same across splits.
set.seed(54321)
indexes <- createDataPartition(train$NPVpl,
                               times = 1,
                               p = 0.7,
                               list = FALSE)
profs.train <- train[indexes,]
profs.test <- train[-indexes,]


# Examine the proportions of the Survived class lable across
# the datasets.
prop.table(table(train$NPVpl))
prop.table(table(proonebyone.train$NPVpl))
prop.table(table(proonebyone.test$NPVpl))

#=================================================================
# Train Model
#=================================================================
# nrounds max_depth  eta gamma colsample_bytree min_child_weight subsample
#4    4000         6 0.01     0              0.4                2         1
#   nrounds max_depth   eta gamma colsample_bytree min_child_weight subsample
#10    4000         6 0.025     0              0.4             2.25         1
# Set up caret to perform 10-fold cross validation repeated 3 
# times and to use a grid search for optimal model hyperparamter
# values.
train.control <- trainControl(method = "repeatedcv",
                              number = 10,
                              repeats = 3,
                              search = "grid")


# Leverage a grid search of hyperparameters for xgboost. See 
# the following presentation for more information:
# https://www.slideshare.net/odsc/owen-zhangopen-sourcetoolsanddscompetitions1
tune.grid <- expand.grid(eta = c(0.0025),
                         nrounds = c(4000),
                         max_depth = 6,
                         min_child_weight = c(2.25),
                         colsample_bytree = c(0.4),
                         gamma = 0,
                         subsample = 1)
View(tune.grid)


# Use the doSNOW package to enable caret to train in parallel.
# While there are many package options in this space, doSNOW
# has the advantage of working on both Windows and Mac OS X.
#
# Create a socket cluster using 10 processes. 
#
# NOTE - Tune this number based on the number of cores/threads 
# available on your machine!!!
#
cl <- makeCluster(10, type = "SOCK")

# Register cluster so that caret will know to train in parallel.
registerDoSNOW(cl)

library(foreach)
install.packages('doParallel')
library(doParallel)
cl <- makeCluster(30)
registerDoParallel(cl)


# Train the xgboost model using 10-fold CV repeated 3 times 
# and a hyperparameter grid search to train the optimal model.
library('xgboost')
caret.cv <- train(NPVpl ~ ., 
                  data = profs.train,
                  method = "xgbTree",
                  tuneGrid = tune.grid,
                  trControl = train.control)
stopCluster(cl)
caret.cv$bestTune
# Examine caret's processing results
# Make predictions on the test set using a xgboost model 
# trained on all 625 rows of the training set using the 
# found optimal hyperparameter values.
preds <- predict(caret.cv, profs.test)
plot(preds,profs.test$NPVpl,col='red',type = 'p',pch=1 ,xlab = 
       'NPV predicted by ML ($MM)',ylab = 'NPV of the real Test Data ($MM)',
     main = 'Test Data vs. ML Prediction')
abline(a=0,b=1,col=4,lwd=3)    
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 2)))
text(x = 40, y = 15, labels = mylabel)
caret::R2(preds,profs.test$NPVpl)
# Use caret's confusionMatrix() function to estimate the 
# effectiveness of this model on unseen, new data
caret.cv$bestTune

Posted by: Guest on August-25-2021

Source

Code answers related to "R code"

Code answers related to "R"

Browse Popular Code Answers by Language

Answers for "R code"

Code answers related to "R code"

Code answers related to "R"

Browse Popular Code Answers by Language

Popular Programming Languages

Advertisements

Company

Compilers

Help

Connect with us