library(mcreplicate)
library(tidyverse)
library(tictoc)
library(CVXR)
Using Simulation to Calculate Power and Minimum Detectable Effects
This posting will walk through the steps to perform power and minimum detectable effect calculations via simulation.
Define the Data Generation Process
<-
params list(
n = 100,
p = 1,
beta = 0
)
Now we define the data generation process formula:
<- function(params) {
dgp with(params,{
<- matrix(rnorm(n * p), nrow=n)
X colnames(X) <- paste0("beta_", beta)
<- X %*% beta + rnorm(n)
Y data.frame(Y,X)
}) }
Define the Estimator
<- function(df) {
est <- df[,"Y"]
Y <- as.matrix(df[,setdiff(colnames(df),"Y")])
X <- lm(Y ~ 0 + X) # There is no intercept in our model above
ls.model <- data.frame(ls.est = coef(ls.model))
m rownames(m) <- gsub("Xbeta","beta",rownames(m))
<- cbind(m,confint(ls.model))
m
m }
Define the discrimination function
- Reject if 0 is within the 95% confidence interval.
- Accept is 1 minus the above.
<- function(fit) {
disc <- fit["X",]
beta.hat <- 1-as.integer(dplyr::between(0,beta.hat[,2],beta.hat[,3]))
accept return(accept)
}
<- function(params) {
est_power cat(params$beta)
cat("\n")
%>%
params dgp() %>%
est() %>%
disc()
}
Results
Power
= 1000
B =
power seq(100,1000,100) %>%
map(~(mc_replicate(B,est_power(modifyList(params,list(beta = .1,n=.x))),mc.cores=10))) %>%
map_dbl(~(mean(.x))) %>%
data.frame(row.names = seq(100,1000,100)) %>%
rownames_to_column(var = "N") %>%
set_names(c("N","power")) %>%
as_tibble() %>%
mutate(N = as.numeric(paste0(N)))
%>% knitr::kable() power
N | power |
---|---|
100 | 0.151 |
200 | 0.298 |
300 | 0.407 |
400 | 0.510 |
500 | 0.612 |
600 | 0.695 |
700 | 0.748 |
800 | 0.805 |
900 | 0.865 |
1000 | 0.885 |
%>%
power ggplot(aes(x = N , y = power)) + geom_line() +
::theme_few() ggthemes
Minimum Detectable Effects
<- function(.x, B=10000) {
calc_mde <- mc_replicate(B,est_power(modifyList(params,list(beta = .x,n=1000))),mc.cores=10) %>% mean()
power abs(power-.8)
}
<- seq(0,.1,0.01)
search_grid =
mde %>%
search_grid map(~(mc_replicate(B,est_power(modifyList(params,list(beta = .x,n=1000))),mc.cores=10))) %>%
map_dbl(~(mean(.x))) %>%
data.frame(row.names = search_grid) %>%
rownames_to_column(var = "N") %>%
set_names(c("beta","power")) %>%
as_tibble() %>%
mutate(beta = as.numeric(paste0(beta)))
%>%
mde ggplot(aes(x = beta, y = power)) + geom_line() +
::theme_few() +
ggthemesylim(c(0,1)) + geom_hline(aes(yintercept =0.8),lty=2)
<- mde$beta[which(abs(mde$power-.8)==min(abs(mde$power-.8)))]
closest closest
[1] 0.09