Chapter 11: Monte Carlo Integration
Exercise 11.2: Drawing frmm standard distributions
# Sample size
n1 <- 10L
n2 <- 100L
n3 <- 100000L
# We use integers as indicated by the "L" at the end of the number.
# This only makes the results look nicer
# Reset random number generator for reproducibilty
set.seed(123456)
Uniform
The standard specification of R’s random number generator (RNG) runif
is min = 0
and max = 1
, which is exactly what we need. So we only have to specify the number of draws.
unif1 <- runif(n1)
unif2 <- runif(n2)
unif3 <- runif(n3)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(unif1), mean(unif2), mean(unif3)),
sd = c(sd(unif1), sd(unif2), sd(unif3)))
result
## size mean sd
## 1 10 0.4630601 0.2979562
## 2 100 0.4929105 0.3095004
## 3 100000 0.4990746 0.2891135
Standard normal
The standard specification of R’s RNG for the normal distribution rnorm
is mean = 0
and sd = 1
, which is by definition a standard normal distribution. So we only have to specify the number of draws.
norm1 <- rnorm(n1)
norm2 <- rnorm(n2)
norm3 <- rnorm(n3)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(norm1), mean(norm2), mean(norm3)),
sd = c(sd(norm1), sd(norm2), sd(norm3)))
result
## size mean sd
## 1 10 -0.149059541 1.149290
## 2 100 -0.076840784 1.090117
## 3 100000 0.001551597 1.001076
Student t(3)
t1 <- rt(n1, df = 3)
t2 <- rt(n2, df = 3)
t3 <- rt(n3, df = 3)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(t1), mean(t2), mean(t3)),
sd = c(sd(t1), sd(t2), sd(t3)))
result
## size mean sd
## 1 10 0.523968595 1.427517
## 2 100 -0.100952189 1.361346
## 3 100000 -0.008228298 1.720145
Beta(3,2)
beta1 <- rbeta(n1, shape1 = 3, shape2 = 2)
beta2 <- rbeta(n2, shape1 = 3, shape2 = 2)
beta3 <- rbeta(n3, shape1 = 3, shape2 = 2)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(beta1), mean(beta2), mean(beta3)),
sd = c(sd(beta1), sd(beta2), sd(beta3)))
result
## size mean sd
## 1 10 0.6460647 0.1927849
## 2 100 0.5902536 0.1927114
## 3 100000 0.5995364 0.1999195
Exponential(5)
To show the equivalence of the exponential(5) distribution and the G(1, 5) distribution, we reset the RNG before we draw from the respective distribution.
Also note that \(rexp\) requires a slightly different input than the mean as described in the textbook. Rather, we have to specify the rate of the distribution, which is defined as 1 / mean
.
set.seed(987654) # Reset RNG
exp1 <- rexp(n1, rate = 1 / 5)
exp2 <- rexp(n2, rate = 1 / 5)
exp3 <- rexp(n3, rate = 1 / 5)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(exp1), mean(exp2), mean(exp3)),
sd = c(sd(exp1), sd(exp2), sd(exp3)))
result
## size mean sd
## 1 10 6.066529 4.766100
## 2 100 5.567845 5.514212
## 3 100000 4.991803 4.994150
Gamma(1, 5)
set.seed(987654) # Reset RNG
gamma_exp1 <- rgamma(n1, shape = 1, scale = 5)
gamma_exp2 <- rgamma(n2, shape = 1, scale = 5)
gamma_exp3 <- rgamma(n3, shape = 1, scale = 5)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(gamma_exp1), mean(gamma_exp2), mean(gamma_exp3)),
sd = c(sd(gamma_exp1), sd(gamma_exp2), sd(gamma_exp3)))
result
## size mean sd
## 1 10 4.439433 2.772297
## 2 100 4.856169 4.603918
## 3 100000 4.995174 5.002952
Although numerically not the same, it can be shown that the two approaches are equivalent by using one million draws and plotting their histograms:
library(ggplot2)
# Exponential distribution
set.seed(987654) # Reset RNG
exp4 <- data.frame(var = "Exponential(5)", value = rexp(10^6, rate = 1 / 5))
# Gamma distribution
set.seed(987654) # Reset RNG
gamma_exp4 <- data.frame(var = "Gamma(1, 5)", value = rgamma(10^6, shape = 1, scale = 5))
temp <- rbind(exp4, gamma_exp4)
ggplot(temp, aes(x = value, fill = var)) +
geom_histogram(position = "dodge")
Note that you could also specify the \(rate\) argument in \(rgamma\) to obtain the same result. In this case rate
is defined as 1 / scale
.
set.seed(987654) # Reset RNG
gamma_exp1 <- rgamma(n1, shape = 1, rate = 1 / 5)
gamma_exp2 <- rgamma(n2, shape = 1, rate = 1 / 5)
gamma_exp3 <- rgamma(n3, shape = 1, rate = 1 / 5)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(gamma_exp1), mean(gamma_exp2), mean(gamma_exp3)),
sd = c(sd(gamma_exp1), sd(gamma_exp2), sd(gamma_exp3)))
result
## size mean sd
## 1 10 4.439433 2.772297
## 2 100 4.856169 4.603918
## 3 100000 4.995174 5.002952
The rate
argument can be very handy if you want to draw from a inverse gamma distribution, which is often the case in Gibbs sampling.
Chi-squared(2)
Again, to show the equivalence of the \(\chi^{2}(5)\) distribution and the \(G(1, 5)\) distribution, we reset the RNG before we draw from the respective distribution.
set.seed(987654) # Reset RNG
chisq1 <- rchisq(n1, 3)
chisq2 <- rchisq(n2, 3)
chisq3 <- rchisq(n3, 3)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(chisq1), mean(chisq2), mean(chisq3)),
sd = c(sd(chisq1), sd(chisq2), sd(chisq3)))
result
## size mean sd
## 1 10 2.563593 1.633190
## 2 100 3.050965 2.245561
## 3 100000 3.006940 2.452734
Gamma(3/2, 2)
set.seed(987654) # Reset RNG
gamma_chi1 <- rgamma(n1, shape = 3 / 2, scale = 2)
gamma_chi2 <- rgamma(n2, shape = 3 / 2, scale = 2)
gamma_chi3 <- rgamma(n3, shape = 3 / 2, scale = 2)
result <- data.frame(size = c(n1, n2, n3),
mean = c(mean(gamma_chi1), mean(gamma_chi2), mean(gamma_chi3)),
sd = c(sd(gamma_chi1), sd(gamma_chi2), sd(gamma_chi3)))
result
## size mean sd
## 1 10 2.563593 1.633190
## 2 100 3.050965 2.245561
## 3 100000 3.006940 2.452734
In this case, the output of the two approaches is exaclty the same, which results from reseting the RNG seed.
Exercise 11.3: Monte Carlo integration in a regression problem
Generate artificial data set
set.seed(987654) # Reset RNG
n <- 100 # Number of observations
# Specify coefficients
beta1 <- 0
beta2 <- 1
h <- 1
# Generate random draws of explanatory variables
x <- runif(n)
# Generate random draws of errors
epsilon <- rnorm(n, 0, 1 / h)
# Calculate depended variable
y <- beta1 + beta2 * x + epsilon
y <- matrix(y) # Transform to matrix format for later
# Generate data matrix of regressors
x <- cbind(1, x)
# Number of regressors
k <- ncol(x)
Calculate posterior mean and standard deviation
Set priors
beta_mu_prior <- matrix(c(0, 1)) # Prior mean
beta_v_i_prior <- diag(1, k) # Prior covariance
v_prior <- 1 # Prior degrees of freedom
s2_prior <- 1 # Prior variance
Obtain posteriors
# OLS results
b_hat <- solve(crossprod(x)) %*% crossprod(x, y) # OLS estimate
v <- n - k # Degrees of freedom
s2 <- crossprod(y - x %*% b_hat) / v # sigma2
sse <- v * s2 # SSE
# beta posterior
beta_v_i_post <- solve(beta_v_i_prior + crossprod(x))
beta_mu_post <- beta_v_i_post %*% (beta_v_i_prior %*% beta_mu_prior + crossprod(x) %*% b_hat)
v_post <- v_prior + n
beta_diff <- b_hat - beta_mu_prior
vs_post <- as.numeric(v_prior * s2_prior + sse + crossprod(beta_diff, crossprod(x) %*% beta_v_i_post %*% beta_v_i_prior) %*% beta_diff)
s2_post <- vs_post / v_post
Posterior mean and standard deviation
beta_cov <- beta_v_i_post * vs_post / (v_post - k)
beta_sd <- sqrt(diag(beta_cov))
result <- data.frame(beta_mu = beta_mu_post,
beta_sd = beta_sd)
row.names(result) <- c("beta1", "beta2")
result
## beta_mu beta_sd
## beta1 0.03296222 0.1864333
## beta2 0.63769948 0.3337710
Work in progress