Answers for "R code"

R
1

r

R is a programming language and free software 
environment for statistical computing and graphics
supported by the R Foundation for Statistical
Computing. The R language is widely used among 
statisticians and data miners for developing 
statistical software and data analysis.
Posted by: Guest on February-03-2021
0

R code

setwd("C:/Users/243886/OneDrive - Universitetet i Stavanger/ML-5spot-SGS/Rcode")
NPVpl <- data.frame(N)
NPVpl <- NPVpl[, c(1)]
input1 <- data.frame(input)
geoin <- read.csv('INPUTPCA489.csv',nrows = F)
Final_Input <- cbind(input1,NPVpl)
install.packages(c("e1071", "caret", "doSNOW", "ipred", "xgboost"))
install.packages(c('lattice','ggplot2'))
library(caret)
library(doSNOW)
INPUTMEAN <- data.frame(INPUTMEAN)
train <- INPUT
#=================================================================
# Data Wrangling
#=================================================================
# Set up factors.
# Subset data to features we wish to keep/use.

features <- c('MC1','MC2','MC3','MC4','MPV','NPVpl')
c('En1C1','En2C1','En3C1','En4C1','En5C1','En6C1','En7C1','En8C1','En9C1',
              'En10C1','En1C2','En2C2','En3C2','En4C2','En5C2','En6C2','En7C2','En8C2'
              ,'En9C2','En10C2','En1C3','En2C3','En3C3','En4C3','En5C3','En6C3','En7C3',
              'En8C3','En9C3','En10C3','En1C4','En2C4','En3C4','En4C4','En5C4','En6C4',
              'En7C4','En8C4','En9C4','En10C4','En1PV','En2PV','En3PV','En4PV','En5PV',
              'En6PV','En7PV','En8PV','En9PV','En10PV','NPVpl')
train <- train[, features]

#=================================================================
# Split Data
#=================================================================

names(train)[26]<-"NPVpl"
# Use caret to create a 70/30% split of the training data,
# keeping the proportions of the Survived class label the
# same across splits.
set.seed(54321)
indexes <- createDataPartition(train$NPVpl,
                               times = 1,
                               p = 0.7,
                               list = FALSE)
profs.train <- train[indexes,]
profs.test <- train[-indexes,]


# Examine the proportions of the Survived class lable across
# the datasets.
prop.table(table(train$NPVpl))
prop.table(table(proonebyone.train$NPVpl))
prop.table(table(proonebyone.test$NPVpl))

#=================================================================
# Train Model
#=================================================================
# nrounds max_depth  eta gamma colsample_bytree min_child_weight subsample
#4    4000         6 0.01     0              0.4                2         1
#   nrounds max_depth   eta gamma colsample_bytree min_child_weight subsample
#10    4000         6 0.025     0              0.4             2.25         1
# Set up caret to perform 10-fold cross validation repeated 3 
# times and to use a grid search for optimal model hyperparamter
# values.
train.control <- trainControl(method = "repeatedcv",
                              number = 10,
                              repeats = 3,
                              search = "grid")


# Leverage a grid search of hyperparameters for xgboost. See 
# the following presentation for more information:
# https://www.slideshare.net/odsc/owen-zhangopen-sourcetoolsanddscompetitions1
tune.grid <- expand.grid(eta = c(0.0025),
                         nrounds = c(4000),
                         max_depth = 6,
                         min_child_weight = c(2.25),
                         colsample_bytree = c(0.4),
                         gamma = 0,
                         subsample = 1)
View(tune.grid)


# Use the doSNOW package to enable caret to train in parallel.
# While there are many package options in this space, doSNOW
# has the advantage of working on both Windows and Mac OS X.
#
# Create a socket cluster using 10 processes. 
#
# NOTE - Tune this number based on the number of cores/threads 
# available on your machine!!!
#
cl <- makeCluster(10, type = "SOCK")

# Register cluster so that caret will know to train in parallel.
registerDoSNOW(cl)

library(foreach)
install.packages('doParallel')
library(doParallel)
cl <- makeCluster(30)
registerDoParallel(cl)


# Train the xgboost model using 10-fold CV repeated 3 times 
# and a hyperparameter grid search to train the optimal model.
library('xgboost')
caret.cv <- train(NPVpl ~ ., 
                  data = profs.train,
                  method = "xgbTree",
                  tuneGrid = tune.grid,
                  trControl = train.control)
stopCluster(cl)
caret.cv$bestTune
# Examine caret's processing results
# Make predictions on the test set using a xgboost model 
# trained on all 625 rows of the training set using the 
# found optimal hyperparameter values.
preds <- predict(caret.cv, profs.test)
plot(preds,profs.test$NPVpl,col='red',type = 'p',pch=1 ,xlab = 
       'NPV predicted by ML ($MM)',ylab = 'NPV of the real Test Data ($MM)',
     main = 'Test Data vs. ML Prediction')
abline(a=0,b=1,col=4,lwd=3)    
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 2)))
text(x = 40, y = 15, labels = mylabel)
caret::R2(preds,profs.test$NPVpl)
# Use caret's confusionMatrix() function to estimate the 
# effectiveness of this model on unseen, new data
caret.cv$bestTune
Posted by: Guest on August-25-2021
0

R

R - Functional programming language and environment for 
statistical computing and graphics.
R's Awesome List
https://github.com/qinwf/awesome-R#readme
Posted by: Guest on January-02-2021
0

r language

R is a programming language and free software environment for statistical computing and graphics supported by the R Foundation for Statistical Computing.
Posted by: Guest on August-28-2020

Browse Popular Code Answers by Language