ST303 Stochastic Simumlation

Class 4


Philipp Sterzinger

p.sterzinger@lse.ac.uk


Office hours: Thursday, COL 8.05



30 October 2025

Recap

Inverse sampling method for discrete random variables

Generalised inverse

\(F_X^{-1}(p) = \left \{\inf \, x : F(x) \geq p \right\}\)

Code
library(ggplot2) 
library(tidyverse)
library(cowplot)
library(animation)

ns <- seq(from = 10, to = 1000, by = 10)

set.seed(1) 
support <- 0:5
pdfs <- runif(6) 
pdfs <- pdfs / sum(pdfs) 

set.seed(123)
inv_cdf <- function(p, support, cdf){
    support[which(cdf - p >= 0)[1]] 
}

new_steps <- tibble(x = support,
    cdf = cumsum(pdfs))


df <- new_steps %>%
  mutate(type = "cdf") %>%
  bind_rows(new_steps %>%
    mutate(type = "prior",
    cdf = lag(cdf))) %>%
  drop_na() %>%
  arrange(x, desc(type))

central_plot <- ggplot(df) + 
    geom_segment(aes(x = lag(x), y = lag(cdf),
            xend = x, yend = cdf,
            lty = type), 
            linewidth = 1.2) +
     geom_line(data = data.frame(x = c(df$x[length(df$x)], 61), y = rep(1, 2)), 
        aes(x = x, y = y), 
        linewidth = 1.2) + 
    geom_point(aes(x, cdf, fill = type),
        shape = 21, 
        size = 3) +
    geom_line(data = data.frame(x = c(-1, df$x[1]), y = rep(0, 2)), 
        aes(x = x, y = y), 
        linewidth = 1.2) + 
    geom_segment(aes(x = df$x[1], y = 0, xend = df$x[1], yend = df$cdf[1]), linetype = 2, size = 1.2) + 
    geom_segment(aes(x = df$x[1], y = 0, xend = df$x[1], yend = df$cdf[1]), linetype = 2, size = 1.2) + 
    geom_point(data = data.frame(x = df$x[1], y = 0), 
        aes(x = x, y = y), 
        size = 3, 
        shape = 21, 
        fill = "white") + 
    geom_point(data = data.frame(x = df$x[1], y = df$cdf[1]), 
        aes(x = x, y = y), 
        size = 3, 
        shape = 21, 
        fill = "grey") + 
    scale_fill_manual(values = c("grey", "white")) +
    scale_linetype_manual(values = c("dashed", "solid")) + 
    theme_minimal() + 
    labs(title = "(2) Discrete CDF", ylab = "") + 
    theme(plot.margin = margin(l = 0, t = 0), 
        legend.position = "none",
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(), 
        axis.title.y = element_blank()) + 
    coord_cartesian(xlim = c(-1, 6))

invisible(saveGIF({
  for(n in ns){
    set.seed(123) 
    us <- runif(n) 

    u <- us[sample.int(n, 1)]
    x_u <- inv_cdf(u, support, cumsum(pdfs))

    xs <- seq(from = x_u, to = 6, length.out = 2) 
    ys <- rep(u, 2)



nc_plot <- central_plot + 
  geom_line(data = data.frame(x = xs, y = ys), 
    aes(x = xs, y = ys), 
    color = "grey", 
    linewidth = 1, 
    linetype = 2) + 
    geom_point(data = data.frame(x = 6, y = u), 
      aes(x = x, y = y), 
      color = "steelblue", 
      size = 3) + 
    geom_segment(data = data.frame(x = x_u, y = c(0, cumsum(pdfs))[x_u + 1]), 
      aes(x = x, y = y, yend = 0, xend = x), 
      color = "grey", 
      linewidth = 1, 
      linetype = 2) + 
    geom_point(data = data.frame(x = x_u, y = 0), 
      aes(x = x, y = y), 
      color = "tomato", 
      size = 3) +
      geom_point(data = data.frame(x = x_u, y = c(0, cumsum(pdfs))[x_u + 1]), 
      aes(x = x, y = y), 
      fill = "white", 
      size = 3, 
      shape = 21) 

    right_hist <- ggplot(data.frame(us = us), aes(x = us)) +
        geom_histogram(aes(y = after_stat(density)), 
            bins = 30, 
            fill = "steelblue", 
            color = "black") +
        scale_x_continuous(limits = c(0, 1)) +  
        scale_y_continuous(limits = c(0, 1.2), 
            breaks = seq(0, 1.2, length.out = 4),
            minor_breaks = NULL) +  
        theme_minimal() +
        labs(title = "(1) Uniform sample") +
        coord_flip() +
        theme(axis.title = element_blank(),   
        axis.text = element_blank(),    
        axis.ticks = element_blank(),   
        plot.margin = margin(r = 0, t = 0)) 

    discrete_vals <- vapply(us, inv_cdf, 0.0, support = support, cdf = cumsum(pdfs)) 
    emp_pdf <- vapply(support, function(x){sum(discrete_vals == x) / length(discrete_vals)}, 0.0)

    bottom_hist <- ggplot(data.frame(x = support, y = emp_pdf), aes(x = x, y = y)) + 
        geom_col(fill = "tomato", width = 0.5) + 
        geom_point(data = data.frame(x = support, y = pdfs), aes(x = x, y = y), 
        size = 3, 
        col = "black") + 
      theme_minimal() +
      scale_x_continuous(limits = c(-1, 6)) +  
      scale_y_reverse(limits = c(.35, 0), 
        breaks = seq(0.35, 0, length.out = 4),
        minor_breaks = NULL) +
      labs(y = "(3) Discrete Sample") +
      theme(plot.margin = margin(t = 0, b = 0), 
        axis.title.y = element_text(size = 14,
          color = "black",
          angle = 90,                            
          hjust = 1,
          vjust = 0.5),    
        axis.text.y = element_blank(),  
        axis.title.x = element_blank(),  
        axis.ticks.y = element_blank()) 

    title_plot <- ggplot() + 
            labs(title = paste("Sample Size N =", n)) + 
            theme_void() + 
            theme(
                plot.title = element_text(
                size = 20,          
                face = "bold",      
                hjust = 0.5,        
                vjust = 0.),
                plot.margin = margin(r = 0, t = 0, b = 0, l = 0)
            )

    final_plot2 <- ggdraw() +
      draw_plot(title_plot, x = 0, y = 0.9, width = 1, height = 0.1) +
      draw_plot(nc_plot, x = 0.2, y = 0.2, width = 0.6, height = 0.6) + 
      draw_plot(right_hist, x = 0.8, y = 0.2, width = 0.2, height = 0.6) +     
      draw_plot(bottom_hist, x = 0.2, y = 0, width = 0.6, height = 0.2)   

    print(final_plot2)
  }
}, movie.name = "discrete_sampling.gif", interval = .5, ani.width = 1000, ani.height = 1000))
Discrete Sampling

Inverse sampling method for discrete random variables

Why it works

\(X\) discrete random variable taking values \(x_1, \ldots, x_n\) and CDF \(F_X\).

\[ \begin{aligned} \Pr\left(F_X^{-1} (U) = x_i \right) &= \Pr \left( \inf \{x : F_X(x) \geq U \} = x_i \right) \\ &= \Pr \left( F_X(x_{i-1}) < U \leq F_X(x_i) \right) \\ &= F_X(x_i) - F_X(x_{i - 1}) \end{aligned} \]

Inverse sampling method for discrete random variables

\(X\) discrete random variable taking values \(x_1, \ldots, x_n\) and CDF \(F_X\).

Algorithm

  1. \(U \sim \mathcal{U}(0,1)\)

  2. Find \(x_i\) such that \(F_X(x_{i-1}) < U \leq F_X(x_i)\)

  3. Set \(X(U) = x_i\)

Acceptance rejection sampling

Have:

  • Random variable \(X\) taking values \(x_1, x_2, \ldots, x_n\) with probabilities \(p_1, p_2, \ldots, p_n\)
  • Random variable \(Y\) whose support is that of \(X\) with pdf \(q_1, q_2, \ldots, q_n\) such that \(p_i / q_i \leq c\)
  • A sampling method for \(Y\)

Want:

  • A sampling method for \(X\)

Algorithm:

  1. Draw \(Y\)
  2. Draw \(U \sim \mathcal{U}(0,1)\), independent of \(Y\)
  3. If \(U \leq p_Y / (cq_Y)\), \(X = Y\), else go back to 1.

Acceptance rejection sampling

Code
library(tidyverse)
library(ggplot2)

a <- 2; b <- 5
ap <- 2; bp <- 4

ratio <- function(x) dbeta(x, a, b) / dbeta(x, ap, bp)
c_opt <- max(
  optimize(ratio, c(1e-9, 1-1e-9), maximum = TRUE)$objective,
  ratio(1e-9), ratio(1-1e-9)
)

df <- tibble(
  x = seq(0, 1, length.out = 1000)
) |>
  mutate(
    target = dbeta(x, a, b),
    proposal = dbeta(x, ap, bp),
    scaled_proposal = c_opt * proposal,
    ymin = pmin(target, scaled_proposal),
    ymax = pmax(target, scaled_proposal)
  )

ggplot(df, aes(x)) +
  geom_area(aes(y = target), fill = "steelblue", alpha = 0.2) +
  geom_ribbon(aes(ymin = ymin, ymax = ymax), fill = "tomato", alpha = 0.25) +
  geom_line(aes(y = target), linewidth = 1, color = "steelblue") +
  geom_line(aes(y = scaled_proposal), linewidth = 1, linetype = "dashed", color = "tomato") +
  labs(
    x = "X", y = "",
    title = "Target vs Scaled Proposal"
  ) +
  theme_minimal(base_size = 12)

Acceptance rejection sampling

Code
library(ggplot2)
library(tidyverse)
library(cowplot)
library(animation)

ns <- seq(10, 1000, by = 10)

a <- 2; b <- 5
ap <- 2; bp <- 4

ratio <- function(x) dbeta(x, a, b) / dbeta(x, ap, bp)
c_opt <- max(
  optimize(ratio, c(1e-9, 1-1e-9), maximum = TRUE)$objective,
  ratio(1e-9), ratio(1-1e-9)
)

grid <- tibble(x = seq(0, 1, length.out = 1200)) |>
  mutate(target = dbeta(x, a, b),
         prop = dbeta(x, ap, bp),
         scaled_prop = c_opt * prop,
         ymin = pmin(target, scaled_prop),
         ymax = pmax(target, scaled_prop))

curve_prop <- tibble(x = seq(0, 1, length.out = 400),
                     y = dbeta(x, ap, bp))
curve_target <- tibble(x = seq(0, 1, length.out = 400),
                       y = dbeta(x, a, b))

base_plot <- ggplot(grid, aes(x)) +
  geom_area(aes(y = target), fill = "steelblue", alpha = 0.18) +
  geom_ribbon(aes(ymin = ymin, ymax = ymax), fill = "tomato", alpha = 0.22) +
  geom_line(aes(y = target), linewidth = 1, color = "steelblue") +
  geom_line(aes(y = scaled_prop), linewidth = 1, linetype = "dashed", color = "steelblue") +
  labs(x = "x", y = "", title = "") +
  theme_minimal(base_size = 13) +
  coord_cartesian(xlim = c(0, 1))

invisible(saveGIF({
  set.seed(123)
  acc_store <- numeric(0)
  x_store <- numeric(0)
  for(n in ns){
    xs <- rbeta(n, ap, bp)
    us <- runif(n)
    hg <- c_opt * dbeta(xs, ap, bp)
    dots_y <- us * hg
    fx <- dbeta(xs, a, b)
    accept <- dots_y <= fx
    x_store <- c(x_store, xs)
    i <- sample.int(n, 1)
    xi <- xs[i]
    yi <- dots_y[i]
    ai <- accept[i]
    
    central1 <- base_plot +
      labs(title = "(2a) Candidate from proposal") +
      geom_segment(data = data.frame(x = xi, y0 = 0, y1 = c_opt * dbeta(xi, ap, bp)),
                   aes(x = x, xend = x, y = y0, yend = y1),
                   inherit.aes = FALSE,
                   linetype = "dashed",
                   linewidth = 1,
                   color = "grey")
    
    central2 <- central1 +
      labs(title = "(2b) Uniform draw on the line") +
      geom_point(data = data.frame(x = xi, y = yi),
                 aes(x = x, y = y),
                 inherit.aes = FALSE,
                 size = 3,
                 color = "black")
    
    central3 <- central1 +
      labs(title = "(2c) Accept/Reject") +
      geom_point(data = data.frame(x = xi, y = yi),
                 aes(x = x, y = y),
                 inherit.aes = FALSE,
                 size = 3,
                 color = if (ai) "steelblue" else "tomato")
    
    right_hist <- ggplot(data.frame(x = x_store), aes(x = x)) +
      geom_histogram(aes(y = after_stat(density)),
                     bins = 25,
                     fill = "tomato",
                     color = "black") +
      geom_line(data = curve_prop, aes(x = x, y = y)) +
      coord_flip(xlim = c(0, 1)) +
      scale_y_continuous(limits = c(0, 6)) +
      theme_minimal() +
      labs(title = "(1) Proposal draws", x = "", y = "") +
      theme(axis.text = element_blank(),
            axis.ticks = element_blank(),
            plot.margin = margin(r = 0, t = 0))
    
    bottom_hist_prev <- ggplot(data.frame(x = acc_store), aes(x = x)) +
      geom_histogram(aes(y = after_stat(density)),
                     bins = 25,
                     fill = "steelblue",
                     color = "black") +
      geom_line(data = curve_target, aes(x = x, y = y), color = "steelblue") +
      coord_cartesian(xlim = c(0, 1)) +
      theme_minimal() +
      labs(y = "(3) Accepted sample", x = "") +
      theme(plot.margin = margin(t = 0, b = 0),
            axis.title.y = element_text(size = 12))
    
    acc_store_next <- c(acc_store, xs[accept])
    
    bottom_hist_next <- ggplot(data.frame(x = acc_store_next), aes(x = x)) +
      geom_histogram(aes(y = after_stat(density)),
                     bins = 25,
                     fill = "steelblue",
                     color = "black") +
      geom_line(data = curve_target, aes(x = x, y = y), color = "steelblue") +
      coord_cartesian(xlim = c(0, 1)) +
      theme_minimal() +
      labs(y = "(3) Accepted sample", x = "") +
      theme(plot.margin = margin(t = 0, b = 0),
            axis.title.y = element_text(size = 12))
    
    title_plot <- ggplot() +
      labs(title = paste0("Proposals = ", length(x_store),
                          "   Accepts = ", length(acc_store_next))) +
      theme_void() +
      theme(plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
            plot.margin = margin(r = 0, t = 0, b = 0, l = 0))
    
    final1 <- ggdraw() +
      draw_plot(title_plot, x = 0, y = 0.9, width = 1, height = 0.1) +
      draw_plot(central1, x = 0.2, y = 0.2, width = 0.6, height = 0.6) +
      draw_plot(right_hist, x = 0.8, y = 0.2, width = 0.2, height = 0.6) +
      draw_plot(bottom_hist_prev, x = 0.2, y = 0, width = 0.6, height = 0.2)
    
    final2 <- ggdraw() +
      draw_plot(title_plot, x = 0, y = 0.9, width = 1, height = 0.1) +
      draw_plot(central2, x = 0.2, y = 0.2, width = 0.6, height = 0.6) +
      draw_plot(right_hist, x = 0.8, y = 0.2, width = 0.2, height = 0.6) +
      draw_plot(bottom_hist_prev, x = 0.2, y = 0, width = 0.6, height = 0.2)
    
    final3 <- ggdraw() +
      draw_plot(title_plot, x = 0, y = 0.9, width = 1, height = 0.1) +
      draw_plot(central3, x = 0.2, y = 0.2, width = 0.6, height = 0.6) +
      draw_plot(right_hist, x = 0.8, y = 0.2, width = 0.2, height = 0.6) +
      draw_plot(bottom_hist_next, x = 0.2, y = 0, width = 0.6, height = 0.2)
    
    print(final1)
    print(final2)
    print(final3)
    
    acc_store <- acc_store_next
  }
}, movie.name = "accept_reject.gif", fps = 2, ani.width = 1000, ani.height = 1000))
Acceptance Sampling

Acceptance rejection sampling

Why it works

\[ \begin{aligned} \Pr \left( X = j \right) &= \Pr \left( Y = j \mid \textrm{accept} \right) \\ &= \Pr \left( Y = j \mid U \leq \frac{p_Y}{cq_Y} \right) \\ &= \frac{\Pr\left( U \leq \frac{p_Y}{cq_Y} \mid Y = j \right) \Pr\left(Y = j \right)}{\Pr \left(U \leq \frac{p_Y}{cq_Y} \right)} && {\Huge|} \textrm{ Bayes' rule}\\ &= \frac{\frac{p_j}{cq_j} q_j}{\Pr \left(U \leq \frac{p_Y}{cq_Y} \right)} \\ &= \frac{p_j}{c \Pr \left(U \leq \frac{p_Y}{cq_Y} \right)} \\ % &= p_j \end{aligned} \]

Acceptance rejection sampling

Why it works

Now \[ \begin{aligned} \Pr \left(U \leq \frac{p_Y}{cq_Y} \right) &= \Pr \left(\bigcup_{j = 1}^n \left\{U \leq \frac{p_j}{cq_j} \cap Y = j\right\} \right) \\ &= \sum_{j = 1}^n \Pr \left(U \leq \frac{p_j}{cq_j} \cap Y = j \right) && {\huge|} \textrm{Mutually exclusive}\\ &= \sum_{j = 1}^n \Pr \left(U \leq \frac{p_j}{cq_j} \right) \Pr\left(Y = j \right) && {\huge|} (U,Y) \textrm{ independent}\\ &= \sum_{j = 1}^n \frac{p_j}{cq_j} q_j && {\huge|} p_j \textrm{ sum to one}\\ &= \frac{1}{c} \end{aligned} \]

and thus \(\Pr \left(X = j \right) = p_j\)

Questions

Question 1

  1. Use the function sample to create a random vector of \(5\) numbers between \(1\) and \(10\) without replacement.

  2. Use the function sample to create a random vector of \(15\) numbers between \(1\) and \(10\) with replacement

Code
n <- 5 
set.seed(123)

sample_without <-  sample(1:10, n, replace = FALSE) 
cat("Without replacement: ", sample_without) 
Without replacement:  3 10 2 8 6
Code
n <- 15
sample_with <-  sample(1:10, n, replace = TRUE) 
cat("With replacement: ", sample_with) 
With replacement:  5 4 6 9 10 5 3 9 9 9 3 8 10 7 10

Question 2

Consider \(8\) runners \(i = 1, 2, \ldots, 8\). The probability that the ith player wins a race is proportional to \(i^{1/2}\).

The probability to finish second is the same but with the winner excluded and the probability that someone finishes third is again the same excluding the runners finishing first and second.

Find the probability that \(1\) will win a medal (i.e. finish first, second, and third), the probability that \(2\) wins a medal and the probability that any of \(1\) or \(2\) will win a medal.

Question 2

Win probabilities for each runner before the race:

\[ \text{Win probabilities} = \frac{1}{\sum_{i = 1}^8 i^{1/2}} \left(1, 2^{1/2}, \ldots, 8^{1/2} \right) \]

\[ \Pr \left( 1 \textrm{ wins} \right) = \frac{1}{\sum_{i = 1}^8 i^{1/2}} \]

\[ \Pr \left( 1 \textrm{ second} \mid j \textrm{ won} \right) = \frac{1}{\sum_{i \neq j} i^{1/2}} \]

\[ \Pr \left( 1 \textrm{ third} \mid j \textrm{ won, } k \textrm{ second} \right) = \frac{1}{\sum_{i \neq \{j, k\}} i^{1/2}} \]

Question 2

Win probabilities for each runner before the race:

\[ \text{Win probabilities} = \frac{1}{\sum_{i = 1}^8 i^{1/2}} \left(1, 2^{1/2}, \ldots, 8^{1/2} \right) \]

\[ \Pr \left( 1 \textrm{ wins} \right) = \frac{1}{\sum_{i = 1}^8 i^{1/2}} \]

\[ \Pr \left( 1 \textrm{ second} \mid j \textrm{ won} \right) = \frac{1}{\sum_{i \neq j} i^{1/2}} \]

\[ \Pr \left( 1 \textrm{ third} \mid j \textrm{ won, } k \textrm{ second} \right) = \frac{1}{\sum_{i \neq \{j, k\}} i^{1/2}} \]

This is sampling without replacement:

  • Draw a winner according to win probabilities \(\propto i^{1 / 2}\)
  • Given winner \(j\) draw second from \(\{1, 2, \ldots, 8 \} \setminus \{j \}\) with win probabilities \(\propto i^{1 / 2}\)
  • Given second place \(k\), draw third from \(\{1, 2, \ldots, 8 \} \setminus \{j, k \}\) with win probabilities \(\propto i^{1 / 2}\)

Question 2

The event \(\mathcal{A} = \{1 \textrm{ medals}\}\) is equivalent to \(\mathcal{B} = \{1 \textrm{ is in a sample of } 3 \textrm{ without replacement}\}\)

We can easily simulate event \(\mathcal{B}\)!

Code
prob_first <- function(runner){
    probs[runner] / sum(probs) 
}

prob_second <- function(runner){
    sum_all <- sum(probs)
    sum(sapply((1:length(probs))[-runner], function(j){
        sum_not_j <- sum(probs[-j]) 
        probs[j] / sum_not_j
    })) * probs[runner] / sum_all 
}

prob_third <- function(runner){
    other_runners <- (1:length(probs))[-runner]
    sum_all <- sum(probs)
    sum(sapply(other_runners, function(j) { 
            sum_not_j <- sum(probs[-j]) 
            sum(sapply(other_runners[-j], function(k) { 
                sum_not_jk <- sum(probs[-c(j, k)])
                probs[k] / sum_not_jk 
        })) * probs[j] / sum_not_j
    })) * probs[runner] / sum_all
}

prob_medal <- function(runner){
    prob_first(runner) + prob_second(runner) + prob_third(runner)
}

set.seed(123) 
N <- 10^5 

probs <- (1:8)^{1/2} 

pr_runners_medal <- function(N, runners = 1, seed = 123){
    set.seed(seed) 
    podiums <- vapply(1:N, function(x){
        sample(1:length(probs), 3, replace = FALSE, prob = probs)
        }, rep(0.,3))

    runners_on_podium <- vapply(1:N, function(x){
        any(runners %in% podiums[, x])}, 1.)
    mean(runners_on_podium) 
}

pr_one_medals <- pr_runners_medal(N, runners = 1, seed = 1)
pr_one_actual <- prob_medal(1)

cat("Probability 1 medals: Estimate = ", 
    pr_one_medals, ", Actual = ", 
    pr_one_actual, ", Error = ", 
    pr_one_actual - pr_one_medals)
Probability 1 medals: Estimate =  0.20204 , Actual =  0.2025598 , Error =  0.0005197904
Code
pr_two_medals <- pr_runners_medal(N, runners = 2, seed = 2)
pr_two_actual <- prob_medal(2)

cat("Probability 2 medals: Estimate = ", 
    pr_two_medals, ", Actual = ", 
    pr_two_actual, ", Error = ", 
    pr_two_actual - pr_one_medals)
Probability 2 medals: Estimate =  0.27954 , Actual =  0.2780469 , Error =  0.0760069
Code
pr_one_or_two_medals <- pr_runners_medal(N, runners = c(1,2), seed = 12) 

cat("Probability 1 or 2 medal: ", pr_one_or_two_medals)
Probability 1 or 2 medal:  0.44394

Question 3

  1. Use the inverse transform method to generate a sample of the discrete random variable \(X\), whose distribution is given by \[ \Pr(X = i) = \frac{1}{i (i + 1)}, \quad (i = 1,2, \ldots) \,. \]

Question 3

CDF

\[ \begin{aligned} F_X(x) &= \Pr(X \leq \lfloor x \rfloor) \\ &= \sum_{i = 1}^{\lfloor x \rfloor} \frac{1}{i (i + 1)} \\ &= \sum_{i = 1}^{\lfloor x \rfloor} \frac{1}{i} - \frac{1}{i + 1} \\ &= \left(1 - \frac{1}{2} \right) + \left( \frac{1}{2} - \frac{1}{3} \right) + \ldots \left(\frac{1}{\lfloor x \rfloor} - \frac{1}{\lfloor x \rfloor + 1} \right) \\ &= 1 - \frac{1}{\lfloor x \rfloor + 1} \end{aligned} \]

Question 3

Generalised inverse

\[ F_X^{-1}(p) = \inf \{x \in \mathbb{N}: F_X(x) \geq p \} = \inf \left\{x \in \mathbb{N}: 1 - \frac{1}{x + 1} \geq p \right\} \]

Given \(u \in (0,1)\), set \(X(u) = i\) if

\[ \begin{aligned} 1 - \frac{1}{i}< &u\leq 1 - \frac{1}{i + 1} \\ \iff \frac{1}{i + 1} \leq &1 - u < \frac{1}{i} \\ \iff \frac{1}{1 - u} > &i \geq \frac{1}{1 - u} - 1 \\ \end{aligned} \]

Hence, given \(U \sim \mathcal{U}(0,1)\) can generate \(X(U)\) as \(\lceil 1 / (1 - U) \rceil - 1\)

Question 3

Code
library(ggplot2)

set.seed(1)
N <- 10^5 

U <- runif(N) 
X <- ceiling(1 / (1 - U)) - 1 

emp_support <- sort(unique(X))[1:50]
emp_pdf <- vapply(emp_support, function(x){sum(X == x) / N}, 0.0)
actual_pdf <- vapply(emp_support, function(x){1 / (x * (x + 1))}, 0.0) 

hist <- ggplot(data.frame(x = emp_support, y = emp_pdf, z = actual_pdf), aes(x = x)) + 
  geom_col(aes(y = y, fill = "Empirical PDF"), width = 0.5) + 
  geom_point(aes(y = z, color = "True PDF"), size = 2) + 
  scale_fill_manual(name = "", values = c("Empirical PDF" = "tomato")) +
  scale_color_manual(name = "", values = c("True PDF" = "black")) +
  theme_minimal() +
  labs(
    x = "Support",
    y = expression("X(u) = ceiling" ~ bgroup("(", frac( "1","1 - u"), ")") - 1)
  ) +
  theme(legend.position = "top")
print(hist) 

Question 3

  1. By approximating \(\textrm{E} \left[\frac{X + 1}{X}\right]\), try to verify the Euler’s identity \[ \sum_{i = 1}^\infty \frac{1}{i^2} = \frac{\pi^2}{6} \,. \]

\[ \begin{aligned} \sum_{i = 1}^\infty \frac{1}{i^2} &= \sum_{i = 1}^\infty\frac{i + 1}{i} \frac{1}{i(i + 1)} \\ &= \textrm{E}_X \left[\frac{X + 1}{X}\right] \\ &= \textrm{E}_{\mathcal{U}(0,1)} \left[\frac{X(U) + 1}{X(U)}\right] \\ & \approx \frac{1}{n} \sum_{j = 1}^n \frac{X(U_j) + 1}{X(U_j)} \end{aligned} \]

Question 3

Code
set.seed(123) 
N <- 10^6 

U <- runif(N) 
X <- ceiling(1 / (1 - U)) - 1 

mc_approx <- mean((X + 1) / X) 

cat("Approximation error: ", mc_approx - pi^2 / 6) 
Approximation error:  0.0005678814

Question 3

  1. Use the acceptance-rejection method and the sample from a. to generate a sample of the discrete random variable \(Y\), whose distribution is given by \[ \Pr(Y = i) = \frac{6}{\pi^2} \frac{1}{i^2}, \quad (i = 1,2, \ldots) \]
  • \(q_i = \frac{1}{i (i + 1)}\)
  • \(p_i = \frac{6}{\pi^2} \frac{1}{i^2}\)
  • \(\sup_i p_i / q_i = \sup_i \frac{6}{\pi^2} \frac{i + 1}{i} \leq \frac{12}{\pi^2} = c\)

Algorithm

  1. Draw \(U_1 \sim \mathcal{U}(0,1)\), \(X = \lceil \frac{1}{1 - U_1} \rceil - 1\)
  2. Draw \(U_2 \sim \mathcal{U}(0,1)\)
      1. If \(U_2 \leq p_X / (c q_X) = \frac{X + 1}{2 X}\), \(Y = X\)
      1. Else, go to 1.

Question 3

Code
set.seed(123) 
N <- 10^5 
U <- runif(N) 
U_2 <- runif(N)
X <- ceiling(1 / (1 - U)) - 1
threshold <- (X + 1) / (2 * X) 

Y <- X[U_2 <= threshold] 

emp_support <- sort(unique(Y))[1:25]
emp_pdf <- vapply(emp_support, function(x){sum(Y == x) / length(Y)}, 0.0)
actual_pdf <- vapply(emp_support, function(x){(6 / pi^2) * (1 / x^2)}, 0.0) 
X_pdf <- vapply(emp_support, function(x){12 / (pi^2 * (x + 1) * x)}, 0.0) 

hist <- ggplot(data.frame(x = emp_support, y = emp_pdf, z = actual_pdf, t = X_pdf), aes(x = x)) +
  geom_col(aes(y = t, fill = "PDF Expression"), width = 0.5) +
  geom_col(aes(y = y, fill = "Empirical PDF Y"), width = 0.5) +
  geom_point(aes(y = z, color = "True PDF"), size = 2) +
  scale_fill_manual(
    name = "",
    values = c(
      "Empirical PDF Y" = "tomato",
      "PDF Expression" = "steelblue"
    ),
    labels = c(
      "Empirical PDF Y",
      expression(frac(12, pi^2) * frac(1, i * (i + 1)))
    )
  ) +
  scale_color_manual(
    name = "",
    values = c("True PDF" = "black")
  ) +
  theme_minimal() +
  labs(x = "Support", y = "") +
  theme(legend.position = "top")

print(hist)