Given random variable \(X\) with CDF \(F_X\), we want to find a transformation \(T\) of a uniform random variable \(U\) such that \(T(U) \overset{d}{=} X\).
\[
\begin{aligned}
F_X(t) &= \Pr \left( X \leq t \right) \\
&= \Pr \left( T(U) \leq t \right) \\
&= \Pr \left(U \leq T^{-1}(t) \right) \\
&= T^{-1}(t)
\end{aligned}
\] So \(F_X\) is the inverse function of \(T\), or, equivalently, \(T(t) = F_X^{-1}(t)\).
Inverse transform sampling
Code
library(ggplot2) library(cowplot)logistf <-function(x){1/ (1+exp(-x))} inv_logistf <-function(y){log(y / (1- y))} xs <-seq(from =-10, to =10, length.out =1000)ys <-logistf(xs)set.seed(123)N <-10^5us <-runif(N) ls <-inv_logistf(us) ind <-2u1 <- us[ind] l1 <- ls[ind]min_u_ind <-which(abs(ys - u1) ==min(abs(ys - u1)))uliney <-rep(u1, length(xs))ulinex <-seq(from = xs[min_u_ind], to =max(xs), length.out =length(xs))llinex <-rep(l1, length(xs)) lliney <-seq(from =0, to = u1, length.out =length(xs))central_plot <-ggplot(data.frame(xs = xs, ys = ys, llinex = llinex, lliney = lliney, ulinex = ulinex, uliney = uliney), aes(x = xs, y = ys, group =1)) +geom_line(size =1.2) +geom_line(aes(x = ulinex, y = uliney), linetype ="dashed", color ="grey", size =1) +geom_line(aes(x = llinex, y = lliney), linetype ="dashed", color ="grey", size =1) +geom_point(aes(x =10, y = u1), shape =1, color ="steelblue", size =2) +geom_point(aes(x = l1, y =0.0), shape =1, color ="tomato", size =2) +geom_point(aes(x = l1, y = u1), shape =1, color ="black", size =2) +theme_minimal() +labs(title ="(2) Logistic CDF", ylab ="") +theme(plot.margin =margin(l =0, t =0), axis.title.x =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank(), axis.title.y =element_blank())right_hist <-ggplot(data.frame(us = us), aes(x = us)) +geom_histogram(bins =50, fill ="steelblue", color ="black") +theme_minimal() +labs(title ="(1) Uniform sample") +coord_flip() +theme(axis.title =element_blank(), axis.text =element_blank(), axis.ticks =element_blank(), plot.margin =margin(r =0, t =0))bottom_hist <-ggplot(data.frame(ls = ls), aes(x = ls)) +geom_histogram(bins =50, fill ="tomato", color ="black") +theme_minimal() +scale_y_reverse() +labs(y ="(3) Logistic Sample") +theme(plot.margin =margin(t =0, b =0), axis.title.y =element_text(size =14,color ="black",angle =90, hjust =1,vjust =0.5), axis.text.y =element_blank(), axis.title.x =element_blank(), axis.ticks.y =element_blank())final_plot <-ggdraw() +draw_plot(central_plot, x =0.2, y =0.2, width =0.6, height =0.6) +draw_plot(right_hist, x =0.8, y =0.2, width =0.2, height =0.6) +draw_plot(bottom_hist, x =0.2, y =0, width =0.6, height =0.2) final_plot
Algorithm
Given \(F_X\) compute the inverse \(F_X^{-1}\)
Draw \(U \sim \mathcal{U}(0,1)\)
Compute \(X = F_X^{-1}(U)\)
Inverse transform sampling
Code
library(dplyr)library(ggplot2) library(cowplot)library(animation)logistf <-function(x){1/ (1+exp(-x))} logistf_pdf <-function(x){exp(-x) / (1+exp(-x))^2}inv_logistf <-function(y){log(y / (1- y))}xs <-seq(from =-10, to =10, length.out =1000)ys <-logistf(xs)l_dens <-logistf_pdf(xs)sample_sizes <-seq(50, 5000, by =50)animation_data <-lapply(sample_sizes, function(N) {set.seed(123) us <-runif(N) ls <-inv_logistf(us) ind <-sample(1:N,1) u1 <- us[ind] l1 <- ls[ind] min_u_ind <-which(abs(ys - u1) ==min(abs(ys - u1))) uliney <-rep(u1, length(xs)) ulinex <-seq(from = xs[min_u_ind], to =max(xs), length.out =length(xs)) llinex <-rep(l1, length(xs)) lliney <-seq(from =0, to = u1, length.out =length(xs))data.frame(xs = xs,ys = ys,ulinex = ulinex,uliney = uliney,llinex = llinex,lliney = lliney,us_point = u1,ls_point = l1,N = N )}) %>%bind_rows()animation_data2 <-lapply(sample_sizes, function(N) {set.seed(123) us <-runif(N) ls <-inv_logistf(us)data.frame(us = us,ls = ls,N = N )}) %>%bind_rows()invisible(saveGIF({for(n inunique(animation_data$N)){ data <-filter(animation_data, N == n) data2 <-filter(animation_data2, N == n) center_plot <-ggplot(data, aes(x = xs, y = ys)) +geom_line(linewidth =1.2) +geom_line(aes(x = ulinex, y = uliney), linetype ="dashed", color ="grey", linewidth =1) +geom_line(aes(x = llinex, y = lliney), linetype ="dashed", color ="grey", linewidth =1) +geom_point(aes(x =max(xs), y = us_point), shape =1, color ="steelblue", size =2) +geom_point(aes(x = ls_point, y =0), shape =1, color ="tomato", size =2) +geom_point(aes(x = ls_point, y = us_point), shape =1, color ="black", size =2) +labs(title ="(2) Logistic CDF", y ="") +theme_minimal() +theme(plot.margin =margin(r =0, t =0, b =0, l =0),axis.text.x =element_blank(), axis.title.x =element_blank(), axis.ticks.x =element_blank()) right_hist <-ggplot(data2, aes(x = us)) +geom_histogram(aes(y =after_stat(density)), bins =30, fill ="steelblue", color ="black") +geom_line(data =data.frame(x = xs, y =rep(1, length(xs))), aes(x = x, y = y), linetype ="dashed", color ="grey", linewidth =1) +theme_minimal() +scale_x_continuous(limits =c(0, 1)) +scale_y_continuous(limits =c(0, 1.2)) +labs(title ="(1) Uniform sample") +coord_flip() +theme(plot.margin =margin(r =0, t =0, b =0, l =0),axis.title =element_blank(), axis.text =element_blank(), axis.ticks =element_blank()) bottom_hist <-ggplot(data2, aes(x = ls)) +geom_histogram(aes(y =after_stat(density)),bins =30, fill ="tomato", color ="black") +geom_line(data =data.frame(x = xs, y = l_dens), aes(x = x, y = y), linetype ="dashed", color ="grey", linewidth =1) +theme_minimal() +scale_x_continuous(limits =c(-10, 10)) +scale_y_reverse(limits =c(.3, 0)) +labs(y ="(3) Logistic Sample") +theme(plot.margin =margin(r =0, t =0, b =0, l =0), axis.title.y =element_text(size =14,color ="black",angle =90, hjust =1,vjust =0.5), axis.text.y =element_blank(), axis.title.x =element_blank(), axis.ticks.y =element_blank()) title_plot <-ggplot() +labs(title =paste("Sample Size N =", n)) +theme_void() +theme(plot.title =element_text(size =20, face ="bold", hjust =0.5, vjust =0.),plot.margin =margin(r =0, t =0, b =0, l =0) ) final_plot <-ggdraw() +draw_plot(title_plot, x =0, y =0.9, width =1, height =0.1) +draw_plot(center_plot, x =0.2, y =0.2, width =0.6, height =0.6) +draw_plot(right_hist, x =0.8, y =0.2, width =0.2, height =0.6) +draw_plot(bottom_hist, x =0.2, y =0, width =0.6, height =0.2) print(final_plot) }}, movie.name ="logist_sampling.gif", interval = .2, ani.width =1000, ani.height =1000))
Questions
Question 1
Generate two samples of size \(1000\) from the \(\Gamma(2, 1)\) distribution as follows:
Sample use rgamma(1000, 2, 1)
Sampling using the ’manual’ method, i.e. by calculating: -log(runif(1000)) − log(runif(1000))
Use qqplot to compare them. Then change the first sample to one generated by rgamma(1000, 2.2, 1.1) and repeat without changing the second sample to see what happens.
Code
# sample size N <-10^5# seed set.seed(123)# gamma sample gamma_sample <-rgamma(N, 2, 1)# manual gamma sample u1 <-runif(N) u2 <-runif(N) manual_gamma_sample <--log(u1 * u2) qqplot(gamma_sample, manual_gamma_sample, main =expression(Gamma(2,1) ~"Q-Q Plot"),col ="steelblue", alpha =0.5) gamma_quantiles <-function(p, shape =2, rate =1){qgamma(p, shape, rate = rate) }qqline(gamma_sample, distribution = gamma_quantiles, col ="magenta")
Code
wrong_gamma_sample <-rgamma(N, 2.2, 1.1)qqplot(wrong_gamma_sample, gamma_sample, main =expression(Gamma(2.1,1.1) ~"Q-Q Plot"), col ="steelblue", alpha =0.5) qqline(gamma_sample, distribution = gamma_quantiles, col ="magenta")
Question 2
Use the inverse transform method to generate samples of random variables with the following densities: