\documentclass[twoside]{article}
\newcommand{\assgnnum}{3}
\newcommand{\duedate}{Friday, 8 May, 2009}

\usepackage{amsmath}
%\usepackage{fullpage}
\usepackage{amssymb}
\usepackage{bbm}
\usepackage{fancyhdr,lastpage}
\usepackage{paralist}

\oddsidemargin 0in \evensidemargin 0in
\topmargin -0.5in \headheight 0.25in \headsep 0.25in
\textwidth 6.5in \textheight 9in
\parskip 6pt \parindent 0in \footskip 20pt

% set the header up
\fancyhead{} 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% UPDATE THIS SECTION FOR YOUR HOMEWORK SUBMISSION %
\fancyhead[LE,RO]{CME308: Assignment \assgnnum} % Put your name here
\fancyhead[RE,LO]{Due: \duedate} %Put the date here, or whatever else you want...
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\renewcommand\headrulewidth{0.4pt}
\setlength\headheight{15pt}

\newcommand{\p}{\ensuremath{\mathbf{P}}}
\renewcommand{\Pr}[1]{\ensuremath{\p \left \{ #1 \right \}}}
\newcommand{\nti}{\ensuremath{n \to \infty}}
\newcommand{\I}{\ensuremath{\operatorname{I}}}
\newcommand{\One}[1]{\ensuremath{\mathbbm{1}_{\left \{ #1 \right \}}}}
\newcommand{\E}{\ensuremath{\mathbf{E}}}
\newcommand{\Ex}[2][]{\ensuremath{\E_{#1} \left[ #2 \right]}}
\newcommand{\var}[1]{\ensuremath{\operatorname{Var}\left( #1\right)}}
\newcommand{\cov}[2]{\ensuremath{\operatorname{Cov}\left( #1, #2\right)}}
\newcommand{\F}{\ensuremath{\mathcal{F}}}
\newcommand{\R}{\ensuremath{\mathbb{R}}}
\newcommand{\NormRV}[2]{\ensuremath{\operatorname{N}\left(#1, #2\right)}}
\newcommand{\BetaRV}[2]{\ensuremath{\operatorname{Beta}\left(#1, #2\right)}}

\newcommand{\eqD}{\ensuremath{\overset{\mathcal{D}}{=}}}

\setlength{\parindent}{0in}

\newcounter{pbctr}
\setcounter{pbctr}{0}
\newenvironment{problem}[1]{\refstepcounter{pbctr}\begin{trivlist}
\item {\bf Problem \arabic{pbctr}: {\sc #1}\:}}
{\end{trivlist}}

\begin{document}

\pagestyle{fancy}
%\vspace*{15pt} 

{\bf Due Date: } This assignment is due on \duedate(with the natural 1 day extension for SCPD students), by 5pm in the box outside Durand 112.  The \LaTeX{} incentive policy is in effect.

% Problem 1
\begin{problem}{} 
Consider Monte Carlo integration with $g\geq 0$ and assume that $g\leq ch$ for some $c>1$ and some density $h$.  The \emph{hit-or-miss} estimator of $z  \stackrel{\text{def}}{=}\int_0^1 g(u)du$ is $c\One{Uch(Y)\leq g(Y)}$, where $U,Y$ are independent with $U$ as uniform$(0,1)$ and $Y$ with density $h$ (note that $h$ could be defined over the whole real line, but we restrict this to $h$ only defined on $[0,1]$).  Show that its expectation is $z=\int_0^1 g(u)du$ as desired, but that the variance is always at least the variance of the importance sampling estimator that uses sampling from $h$.
\end{problem}

%Problem 2
\begin{problem}{}
With $\phi(y)$ as in Problem 7 from Homework 1, show the following.
\begin{enumerate}
\item Prove that, for all functions $g$ satisfying $\E[ g(Y)^2]<\infty$, $\E[(X-\phi(Y))g(Y)] = 0$.
\item Prove that $\phi(Y)$ is the best mean square predictor of $X$ across all predictors $g(Y)$ such that $\E[g(Y)^2]<\infty$.
\end{enumerate}
\end{problem}

%Problem 3
\begin{problem}{Variance Reduction with Control Variates}\\
Suppose that we wish to compute $\alpha = \E X$ via Monte Carlo.  Assume that there exist r.v.'s $Y_1,\ldots,Y_d$ such that $\E Y_i = \mu_i$ is known for $1\leq i\leq d$.  The r.v. $C_i = Y_i-\mu_i$ is called a \emph{control variate}.
\begin{enumerate}
	\item If $C = (C_1,\ldots,C_d)^T$, prove that
	\[\E X({\lambda}) = \alpha,\]
	where $X({\lambda}) = X - {\lambda}^T{C}$, $\lambda\in\mathbb{R}^d$.
	\item Find the vector $\lambda^*$ which minimizes $\var{X(\lambda)}$ over $\lambda$.
	\item How would you estimate $\lambda^*$ in an implementation of this approach?
	\item How general is this method? (i.e., are there typically r.v.'s for which the means $\mu_1,\ldots,\mu_d$ can be computed?)  Explain your answer.
\end{enumerate}
\end{problem}

%Problem 4
\begin{problem}{}
The expected shortfall of a rv $Z$ at the $p$'th quantile is defined
as
\[ \Ex{Z | Z > q}\]
where $q$ is the $p$'th quantile value, that is $\Pr{Z \leq q} = p$.  

This problem is concerned with estimating the expected shortfall of a so-called
\emph{Asian Option}.  The option is based on an underlying asset $V$
which evolves in discrete time according to
\[ V_{n+1} = V_n R_{n+1} \]
where $R_{n+1}$ is a $\log$-Normal random variable, i.e.
\[ \ln R_n \sim \NormRV{r}{\sigma^2 \Delta t}, \]
$r$ is the risk free rate of return over one period, and $\sigma^2$
is the volatility of $V$.  The price of a (simplified) Asian option
with \emph{expiration} $N$ time units in the future is
\[ X_N = \E_{V_0}\left({ N^{-1} \sum_{i=1}^N V_{i} - K}\right)_+ \]
where $K$ is the \emph{strike price}.  In general, an analytic
expression is not known for $X_N$.  (Note: $(x)_+ = x$ if $x\geq0$ and $(x)_+ = 0$ if $x<0$.)

We will treat the $V_n$'s as the weekly values of the asset.  The annual
risk-free rate of return is 5\%, the volatility $\sigma^2 = 0.1$, $N =
50$ weeks\footnote{There are 50 working weeks in the financial year.}
, $V_0 = \$100$ and $K = \$115$.  (Note: Since the risk-free rate and $\sigma^2$ were given as annual values, the generation of $R_n$ must reflect that.  The volatility in $R_n$ was given as $\sigma^2\Delta t$, where $\Delta t=\frac{1}{50}$, so it already adequately reflects this.  For the weekly return, the effective rate is $r = \ln(1.05)/50 = 9.7580 \times 10^{-4}$.)

Using a bootstrap technique, compute the 95\% confidence interval for the  expected shortfall of the rv
\[\left({ N^{-1} \sum_{i=1}^N V_{i} - K}\right)_+,\]
with $p=90\%$.  Please include a description of the your method as well as the code.  
\end{problem}

%Problem 5
\begin{problem}{}
Suppose that $x(\cdot)$ is the solution to a deterministic differential equation
\[ \frac{d}{dt} x(t) = \phi(\theta, x(t)) \]
such that
\[ x(0) = x_0 \]
where $\phi$ is deterministic and $\theta$ represents a vector of parameters.  (For example, $\phi(\theta, x) = {\theta x}$ in Example 13 on page 52.)  Assume that $x_0$ and $\theta$ are measured with error, and that $(X_0, \hat{\theta})^T$ are multivariate normally distributed with mean $(x_0, \theta)^T$.  
\begin{enumerate}
\item  Compute the small noise approximations for the solution $X(t)$ to 
\[ \frac{d}{dt} X(t) = \phi(\hat{\theta}, X(t)) \]
such that
\[ X(0) = X_0 \]
\item  Discuss the computational issues that arise in computing the variance of your small noise approximation
\end{enumerate}
\end{problem}

%Problem 6
\begin{problem}{} 
Develop a corresponding approximation confidence interval for $q(p)$, where $q(p)$ is the ``$p^{th}$ quantile'' of the random variable $Y$ defined as the smaller root of the equation
\[ P\{ Y \leq q(p) \} = p \]
Such quantile computations are of interest on ``value at risk'' calculations in the finance setting.
\end{problem}

%Problem 7
\begin{problem}{}
Let $X$ be a Cauchy r.v. so that its density is given by
\[f(x) = \frac{1}{\pi (1+(x-b)^2)}\]
\begin{enumerate}
	\item Compute the distribution of $X_1+X_2$ where $X_1$, and $X_2$ are independent copies of $X$.
	\item If $X_1,X_2,\ldots,X_n$ is an i.i.d. sample from $X$, what does $n^{-1}(X_1+\cdots X_n)$ converge to?\label{conv}
	\item How might you estimate the parameter $b$ (in lieu of part \ref{conv})?
	\item Explain how to generate the r.v. $X$ using inversion.
	\item Consider the following algorithm:
	\begin{enumerate}
	\item[i.] Generate independent r.v.'s $V_1$ and $V_2$ that are uniform on $[-1,1]$.
	\item[ii.] If $V_1^2+V_2^2 \leq 1$, return $X=V_2/V_1$; else, return to step i.
	\end{enumerate}
	Show that $X$ is Cauchy distributed.  (This can be faster than inversion when computing the arc tangent is expensive.)
\end{enumerate}
\end{problem}

%Problem 8
\begin{problem}{}
Consider a linear model in which 
\[ Y_i = a^* x_i + b^* + \epsilon_i,\quad 1\leq i\leq n,\]
where the $\epsilon_i$'s are iid rvs with density
\[ f(x) = \frac{\lambda^*}{2}e^{-\lambda^* \vert x \vert}.\]

{\bf Note:} From a terminology standpoint, it is important to recognize that this is different from a linear regression problem.  A linear regression problem aims to find a least squares solution for the model.  In this problem, we are looking for maximum likelihood estimates for the model.  These are distinct problems, except when the error is normally distributed.  In that case, they are equivalent.
\begin{enumerate}
\item The MLE for $a^*$, $b^*$ and $\lambda^*$ solves an optimization
problem.  What is it?
\item Show that the MLE can be computed as the solution to a linear
program.
\end{enumerate}
\end{problem}

%Problem 9
\begin{problem}{Least Squares Estimation}
\begin{table}[ht]
\centering
\begin{tabular}{|c|r|r|r|r|} \hline
Name & Height (in) & Weight (lbs) & Age \\ \hline
Alfred &69.0& 112.5 &14 \\ \hline
Alice &56.5 &84.0 &13 \\ \hline
Barbara &65.3 &98.0 &13 \\ \hline
Carol &62.8 &102.5 &14 \\ \hline
Henry &63.5 &102.5 &14 \\ \hline
James &57.3 &83.0 &12 \\ \hline
Jane &59.8 &84.5 &12\\ \hline
Janet &62.5 &112.5 &15 \\ \hline
Jeffrey &62.5 &84.0 &13 \\ \hline
John &59.0 &99.5 &12 \\ \hline
Joyce &51.3 &50.5 &11 \\ \hline
Judy &64.3 &90.0 &14 \\ \hline
Louise &56.3 &77.0& 12 \\ \hline
Mary &66.5 &112.0 &15\\ \hline
Philip &72.0 &150.0 &16 \\ \hline
Robert &64.8 &128.0 &12 \\ \hline
Ronald &67.0 &133.0 &15 \\ \hline
Thomas &57.5 &85.0 &11 \\ \hline
William &66.5 &112.0 &15 \\ \hline
\end{tabular}
\caption{Data For Problem 9}
\label{tbl:data}
\end{table}
\begin{enumerate}
\item Use the first two columns of data (Height and Weight) in Table \ref{tbl:data} to build various regression models
that attempt to explain weight as a function of height. Which of your
models does the best job of predicting the weights of individuals in
the third column? (There is no need to use the age here.  If you wanted to, there is a way to factor in the age.)

\item Another possibility is to build separate regression models by
gender.  Does this improve the predictions?

\item Now use the full data set to recompute the coefficients for the
type of regression model that worked best in part a. Construct a
confidence interval for the slope based on bootstrapping. (Take into
account the fact that both weight and height should be viewed as
random variables in this setting. In other words, this is not a
setting where the levels of the explanatory variable (in this case,
height) is carefully set by the experimenter at various predetermined
levels; the height values that are observed are determined by the
particular random sample that is selected.)

\item Suppose that the height of a student is 71 inches. Use the
bootstrap to construct a 95\% prediction interval for that student's
weight.
\end{enumerate}
\end{problem}

%Problem 10
\begin{problem}{}
When the arrival process to a single-server queue follows a so-called Poisson process having
rate $\lambda$ (i.e. the inter-arrival times $\chi_1, \chi_2,\ldots$ are i.i.d. exponential random variables having parameter $\lambda > 0$) with $\rho = \lambda \E V_0 < 1$, the steady-state random variable $W_\infty$ corresponding to the time spent waiting in the queue can be represented as
\[W_\infty = \sum_{i=1}^N Z_i\]
where $N$ is a geometric random variable having probability mass function $\p(N = k) = (1 -\rho)\rho^{k-1}$
and $(Z_i\,:\,i\geq1)$ is an i.i.d. sequence of random variables independent of $N$ satisfying
\[\p(Z_1 > x) = \frac{1}{\E V_0}\int_x^\infty \p(V_0>y)dy\]
\begin{enumerate}
	\item  Suppose that $V_0$ is exponential with parameter $\mu > 0$. Compute the distribution of $W_\infty$.
	\item Abandoning the assumption in part (a), write $W_\infty$ as $W_\infty(\lambda)$ (reflecting its dependence on $\lambda$).
Prove that if $\E V_0^2<\infty$, $(1-\rho)W_\infty(\lambda)\Rightarrow \Gamma$ as $\lambda\rightarrow \frac{1}{\E V_0}$ and compute the distribution of $\Gamma$ ($\Gamma$ is not necessarily a $\Gamma$ distributed random variable.  We just needed a letter and it it seemed as good as any other).

{\bf Hint:} Convergence in distribution is equivalent to point-wise convergence of characteristic functions.  Use that fact to show the desired convergence as well as to derive the limiting characteristic function, and thus the distribution of $\Gamma$.

(This is known in the performance engineering literature as the ``heavy traffic'' theorem for
queues.)
\item Suppose that $V_0$ is gamma distributed with shape parameter $\alpha = 2$ and scale parameter 1.
What is an approximation to $P(W_\infty > 2)$ if $\lambda = 0.45$?
\end{enumerate}
\end{problem}




\begin{center}
{\bf Extra Credit Problems}
\end{center}

These extra credit problems do not serve as a replacement for the regular assigned problems.  Rather, they are an ``all or nothing'' shot meaning you must get it entirely correct in order to get credit.  At the end of the quarter, we will total the number you of ECP you have done and award an appropriate bonus.

\setcounter{pbctr}{0} 
%EC Problem 1
\begin{problem}{} 
Assume that $Z$ is generated from r.v.'s $X,Y$ (i.e., $Z= \varphi(X,Y)$ for some function $\varphi$).  Suppose further that the times to generate $X$ and $Y$ are $a$ and $b$, $a\gg b$, respectively.  The simulator judges that only the $Y$ taking values in a certain set $E$ contribute significantly to $z= \E \varphi(X,Y)$ and wants to construct an approach that tries to cut down on unnecessary computational costs.  He therefore does not necessarily generate $Z$ if $Y\notin E$ but rather only if a coin toss comes out with heads; the probability of heads is $p$.

Write up an unbiased estimator for $z$ according to the simulator's rules.
\end{problem}

%EC Problem 2
\begin{problem}{}
\begin{enumerate}
\item Prove that if $Z = \tau_1 + \cdots + \tau_n$,
where the $\tau_i$'s are iid $\mathrm{Exp}(\lambda)$ rvs, then $Z$ has
a $\mathrm{Gamma}(1/\lambda, n)$ distribution.
\label{i:firstPart}
\item Suppose the $1 \leq n < \alpha < n+1$.  Use (a) to
produce and acceptance-rejection algorithm for generating
$\mathrm{Gamma}(\lambda, \alpha)$ rvs.
\end{enumerate}
\end{problem}


%EC Problem 3
\begin{problem}{}
Propose a kernel-based estimator for the density of an
$\mathbb{R}^d-$ valued random variable $X$ based on observing $X_1,
X_2, \hdots, X_n$.  Compute the optimal rate of convergence as a
function of $d$.
\end{problem}

\end{document}