%% LyX 1.5.5 created this file.  For more info, see http://www.lyx.org/.
%% Do not edit unless you really know what you are doing.
\documentclass[english,twoside]{article}
\usepackage[T1]{fontenc}
\usepackage[latin9]{inputenc}

\makeatletter
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.

\usepackage{mathrsfs}


\oddsidemargin  0in \evensidemargin 0in \topmargin -0.5in
\headheight 0.2in \headsep 0.2in
\textwidth   6.5in \textheight 9in
\parskip 1.5ex  \parindent 0ex \footskip 40pt

%\newcommand{\Nystrom}{Nystr$\ddot{o}$m  }

\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\newtheorem{claim}{Claim}
\newtheorem{conjecture}{Conjecture}
\newtheorem{observation}{Observation}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{example}{Example}
\newtheorem{remark}{Remark}
\newtheorem{proof}{Proof}

\makeatother

\usepackage{babel}

\begin{document}
\framebox[6.4in]{ %
\begin{minipage}[c][1\totalheight]{6.4in}%
 \vspace{1mm}
 \center \makebox[6.2in]{\textbf{CS369M: Algorithms for Modern
Massive Data Set Analysis \hfill{}Lecture 18, - 12/02/2009}} \vspace{2mm}
 \\
 \center \makebox[6.2in]{{\Large Data-motivated Matrix Factorizations
(2 of 2)}} \vspace{1mm}
 \\
 \center \makebox[6.2in]{\textit{Lecturer: Michael Mahoney \hfill{}Scribe:
Bahman Bahmani}} \vspace{1mm}
 %
\end{minipage}} \vspace{2mm}
 \\
 \mbox{{ \textit{{*}Unedited Notes}}}


\section{Rank Minimization }

The general rank minimization problem, which arises in a wide range
of applications, is as follows:

\begin{eqnarray}
\textrm{min.~~}rank(X)\\
\textrm{s.t.~~}X\in\mathcal{C}\end{eqnarray}


where $\mathcal{C}$ is a convex subset of $R^{m\times n}$. Since
this problem is generally hard to solve, we replace it with the following
intuitively sound optimization:

\begin{eqnarray}
\textrm{min.~~}||X||_{\star}\\
\textrm{s.t.~~}X\in\mathcal{C}\end{eqnarray}


where $||X||_{\star}=\sum_{i}\sigma_{i}(X)$ is the sum of singular
values of $X$.

Even though the original rank-minimization problem is non-convex, the
above heuristic optimization is indeed convex. Also, we have the following
theorem, which shows this is actually a good convex formulation:

\begin{theorem}$||X||_{\star}$ is the convex envelope of $rank(X)$
on $\{X\in R^{m\times n}|\;||X||\leq1\}$.

\end{theorem}

The proof of this theorem can be found in \cite{key-1}.

As mentioned, the heuristic formulation is a convex problem, and hence
can be solved in general. We also show that for the special case where
$\mathcal{C}$ is a set of linear constraints, we can turn this problem
into an SDP. The problem is equivalent to:

\begin{eqnarray}
\textrm{min.~~}t\\
\textrm{s.t.~~}||X||_{\star}\leq t\\
X\in\mathcal{C}\end{eqnarray}


But, we have the following lemma:

\begin{lemma}\label{lem:norm_lin}For $X\in R^{m\times n}$ and $t\in R$,
$||X||_{\star}\leq t$ iff there exist matrices $Y\in R^{m\times m}$
and $Z\in R^{n\times n}$ such that:

\[
\left[\begin{array}{cc}
Y & X\\
X^{T} & Z\end{array}\right]\geq0,\qquad tr(Y)+tr(Z)\leq2t\]


\end{lemma}

Hence, the last optimization is equivalent to:

\begin{eqnarray}
\textrm{min.~~}tr(Y)+tr(Z)\\
\textrm{s.t.~~}\left[\begin{array}{cc}
Y & X\\
X^{T} & Z\end{array}\right]\geq0\\
X\in\mathcal{C}\end{eqnarray}


which is an SDP (if $\mathcal{C}$ is a set of linear constraints)
and hence can be solved efficiently using any SDP solver.


\section{Maximum Margin Matrix Factorization}

Assume we have a matrix $Y\in\{\pm1\}^{n\times m}$ some subset $S$
of whose entries have been observed (and formed $Y_{S}$). We would
like to approximate the rest of the entries. To do so, we can find
an approximation $X$ of $Y$ using an optimization over the observed
entries. One way to do so is to find a low-rank approximation $X$.
Notice that $rank(X)\leq k$ iff $X$ can be written as $UV^{T}$
where $U\in R^{n\times k}$ and $V\in R^{m\times k}$. Hence, looking
for low rank $X$ corresponds to seeking low dimensionality factorization.

Another approach is looking for small norm factorization (through
a penalty term), where norm of the factorization is measured by $||U||_{Fro}^{2}+||V||_{Fro}^{2}$.
We have the following lemma \cite{key-2}:

\begin{lemma}\[
min_{X=UV^{T}}\frac{1}{2}(||U||_{Fro}^{2}+||V||_{Fro}^{2})=min_{X=UV^{T}}||U||_{Fro}||V||_{Fro}=||X||_{\star}\]


\end{lemma}

Hence, using the above approach and the above lemma, we can formulate
two optimization variants:

\begin{enumerate}
\item Hard-margin matrix factorization \begin{eqnarray}
\textrm{min.~~}||X||_{\star}\\
\textrm{s.t.~~}Y_{ia}X_{ia}\geq1 & \forall\: ia\in S\end{eqnarray}

\item Soft-margin matrix factorization\[
\mbox{min.}||X||_{\star}+c\sum_{ia\in S}max(0,1-Y_{ia}X_{ia})\]

\end{enumerate}
Now, using lemma \ref{lem:norm_lin}, we can write the soft-margin
optimizations as follows:

\begin{eqnarray}
\textrm{min.~~}\frac{1}{2}(tr(A)+tr(B))+c\sum_{ia\in S}\xi_{ia}\\
\textrm{s.t.~~}\left[\begin{array}{cc}
A & X\\
X^{T} & B\end{array}\right]\geq0\\
y_{ia}X_{ia}\geq1-\xi_{ia} & \forall\; ia\in S\\
\xi_{ia}\geq0 & \forall\; ia\in S\end{eqnarray}


The hard-margin optimization can also be written similarly (with slack
variables equal to zero). This is an SDP and hence can be solved efficiently.

\begin{thebibliography}{1}
\bibitem{key-1}Fazel, Hindi, and Boyd, \char`\"{}A Rank Minimization
Heuristic with Application to Minimum Order System Approximation\char`\"{}

\bibitem{key-2}Srebro, Rennie, and Jaakkola, \char`\"{}Maximum Margin
Matrix Factorizations\char`\"{}
\end{thebibliography}

\end{document}
