fit a high dimensional linear regression model in r
create_example_data <- function(n, p, s = 5, sdX = 1, sdU = 0.5,
sdEpsilon = 0.1, family = "gaussian") {
# Independent true covariates with mean zero and standard deviation sdX
X <- matrix(rnorm(n * p, sd = sdX), nrow = n, ncol = p)
# if Gaussian, response has standard deviation sdEpsilon, and zero intercept
# if binomial, response is binomial with mean (1 + exp(-X %*% beta))^(-1)
beta <- c(-2, -1, 0.5, 1, 2, rep(0, p - s))
if(family == "gaussian") {
# True coefficient vector has s non-zero elements and p-s zero elements
y <- X %*% beta + rnorm(n, sd = sdEpsilon)
} else if (family == "binomial") {
# Need an amplification in the binomial case
beta <- beta * 3
y <- rbinom(n, size = 1, prob = (1 + exp(-X %*% beta))**(-1))
}
# The measurements W have mean X and standard deviation sdU.
# We assume uncorrelated measurement errors
W <- X + matrix(rnorm(n * p, sd = sdU), nrow = n, ncol = p)
return(list(X = X, W = W, y = y, beta = beta, sigmaUU = diag(p) * sdU))
}