\documentclass[twoside]{article}

\usepackage{amssymb, amsmath,amsthm}
\usepackage{enumerate}
\usepackage{mathrsfs}

\newtheorem{thm}{Theorem}

\oddsidemargin  0in \evensidemargin 0in \topmargin -0.5in
\headheight 0.2in \headsep 0.2in
\textwidth   6.5in \textheight 9in 
\parskip 1.5ex  \parindent 0ex \footskip 40pt


\begin{document}

\framebox[6.4in]{
\begin{minipage}{6.4in}
  \vspace{1mm}
  \center \makebox[6.2in]{{\bf CS369M: Algorithms for Modern Massive Data Set Analysis \hfill Lecture 14 - 11/09/2009}} 
  \vspace{2mm} \\
  \center \makebox[6.2in]{{\Large Flow-based Methods for Clustering and Partitioning Graphs and Data }} 
  \vspace{1mm} \\
  \center \makebox[6.2in]{{\it Lecturer: Michael Mahoney \hfill Scribes: Jacob Bien and Ya Xu}}
  \vspace{1mm}
\end{minipage}
} \vspace{2mm} \\
\mbox{{ \it *Unedited notes}}

\section{Spectral Methods}
\begin{enumerate}
\item Find an approximation to best cut in $G$
\item Time takes to compute Fiedler vector ``exactly'' or ``approximately''.
\end{enumerate}
\begin{itemize}
\item If the graph is really large, can we find approximation to the best
cut near by or for a given size? We would like to inherent some of the
provably good properties (theorems) or some of the robustness
properties of the global methods:
\begin{enumerate}[(1)]
\item do what we did with Cheeger's inequality
\item with a vector that's good locally.
\end{enumerate}
\item Two senses which you might be local:
\begin{enumerate}[(1)]
\item find a good cluster near you
\item do all computations locally, i.e. depend on size of set/cut returned
\end{enumerate}
\item How to get a vector that is good locally:
\begin{enumerate}[(1)]
\item Truncate: random walks from localized start node
\item Approximate: PageRank computation with local seed vector
\item heat kernels
\end{enumerate}
\end{itemize}

Recall Cheeger's inequality:
\begin{thm}
  \begin{align*}
    2h_G\geq \lambda_G\geq \frac{\alpha_G^2}{2}\geq \frac{h_G^2}{2}
  \end{align*}
where $\alpha_G$ is the conductance of the best set along the sweep cut.
\end{thm}

\begin{paragraph}{Fact:}
  There is a strong relationship between $h_G(\phi_G)$ and rate of
  convergence of a random walk
\end{paragraph}

Two directions:
\begin{enumerate}[(1)]
\item Let $S$ be the best cut. $S$ is the set of nodes such that
  $\phi_S=\min_{S'\subset G}\phi_{S'}$
\item The probability that the random walk will go to a vertex in
  $\bar{S}$ is $\phi_s$. It needs to run $\sim\frac{1}{4\phi_S}$ steps
  to get $1/4$ mass out of $S$
\end{enumerate}

Partial Converse: (proof can be found in Chung's ``Four proofs\dots'' paper)
\begin{enumerate}[(1)]
\item If $\phi_S$ is big then every random walk converges ``fast''.
\item If the random walk does not converge fast, then by looking at
  probability distribution, you can get a good cut.
\end{enumerate}

\begin{thm}
  Let $W$ be the lazy random walk matrix, then
  \begin{align*}
    |W^{t}(u,s)-\pi(s)|\leq \sqrt{\frac{vol(S)}{d_u}}\left(1-\beta_t/8\right)^t
  \end{align*}
where $\beta_t$ is the conductance value found in the best sweep cut
found in first $t$ steps. 
\end{thm}

\begin{thm}[``Cheeger-like'']
  \begin{align*}
    2h_G\geq \lambda_G\geq \frac{\beta_G^2}{8}\geq \frac{h_G^2}{8}
  \end{align*}
where $\beta_G$ is the min cheeger ratio
\end{thm}

Notes: this is algorithmic time - time to compute $p_0$, $p_1$,
$\dots$, $p_t=W^tp_0$. Truncated random walk: if $(p_t)_i\leq\xi$, set
$(p_t)_i=0$.

\begin{paragraph}{PageRank}
  PageRank is a way to order vertices of large graph. Recall the $W$
matrix. Then with probability $\alpha$, the random walk jumps to a new
node on $G$, and with $1-\alpha$ it follows $W$:  $$p=\alpha(\frac
1n,\cdots,\frac 1n)+(1-\alpha)Wp$$
\end{paragraph}
\begin{paragraph}{Personalized PageRank}
Say we are at a starting node $s$. Let $v=\chi_s$ be the teleporting
vector. Then $p=\alpha\chi_s+(1-\alpha)Wp$, which gives $p=\alpha\sum_{t=0}^\infty(1-\alpha)^tW^t\chi_s$.
\end{paragraph}

Recall, $\alpha(S)=\{(u,v), u\in S, v\notin S\}$ is the edge boundary
and $\delta(S)=\{v, v\in S, (u,v)\in E, u\notin S\}$ is the vertex
boundary and $f:V\to\mathbb{R}$ satisfies the Dirichlet boundary
conditions if $f(v)=0~\forall v\in\delta(S)$. 
\begin{paragraph}{Point:}
  Laplacian on $G$ also acts on function on $G$ satisfying Dirichlet
  boundary condition and the same as Laplacian restricted to $S$.
\end{paragraph}
\begin{paragraph}{Definition}
$$h_S=\min_{S\subset T}h(T)$$ 
the local expansion coefficient.  
\end{paragraph}
\begin{thm}
  Using the personalized PageRank vector,
  $$h_S\geq\lambda_S\geq\frac{\gamma_s}{8\log(\cdots)}$$
where $\gamma_S$ is the best sweep cut value.
\end{thm}
\begin{paragraph}{Point}
  Much of the machinery underlying global spectral methods can be made
  local
  \begin{itemize}
  \item global computation, local cut
  \item algorithm running time local
  \end{itemize}
\end{paragraph}

\section{Flow based graph partitioning}
\begin{itemize}
\item using network flow ideas to reveal bottlenecks in graph.
\item $G=(V,E)$ $s$ is source, $t$ is sink.
\item \textbf{Goal:} route as much flow as possible.
\item max flow = min cut (duality) 
\end{itemize}
\begin{paragraph}{Def}
  \emph{Multicommodity flow problem:} Given $k\ge1$,
  $(s_i,t_i,D_i)$, goal is to simultaneously route $D_i$ units of flow
  from $s_i$ to $t_i \forall i$ while respecting capacity constraints.
\end{paragraph}
\begin{itemize}
\item Max throughput flow: max amount of flow summed over all
  commodities.
\item Max concurrent flow: max fraction of demand $D_i$ that can be route by
  flow...
  \begin{equation*}
    \max f \text{~s.t.~}fD_i \text{~units of flow go from $s_i$ to $t_i$.}  
  \end{equation*}
\item 
  \begin{equation*}
    \text{min cut} = \rho = \min_{U\subseteq V}\frac{C(U,\bar U)}{D(U,\bar U)}
  \end{equation*}
  where
  \begin{align*}
    C(U,\bar U)&=\sum_{e\in (U,\bar U)}C(e)\\
    D(U,\bar U)&=\sum_{\substack{i:s_i\in U\\t_i\in \bar U \text{or v.v.}}}D_i
  \end{align*}
\item UMFP: all demands are uniform $\to$ expansion
\item PMFP: $\pi:V\to R^+$.  Demands are $\pi(v_i)\pi(v_j)$. E.g. if
  $\pi(v)=deg(v)\to$ conductance.
\end{itemize}
\begin{paragraph}{Fact 1}
  max-flow/min-cut gap $\le O(\log k)$ (comes from metric embedding)
\end{paragraph}
\begin{paragraph}{Fact 2}
If certain conditions are satisfied, then gap=0.  Look at dual
polytope.\\
Optimal solution -- integral or not.
\end{paragraph}
\begin{paragraph}{Fact 3}
  Worst case (over input graph) gap $\Omega(\log k)$.
  \begin{proof}
    on expanders.  Structure of proof like that seen earlier.
  \end{proof}
\end{paragraph}

\subsection{Algorithmic Applications}
\label{sec:algor-appl}
UMFP:  $D(U,\bar U) = |U||\bar U|$.
  \begin{align*}
    \text{min cut:}\quad\rho &= \min_{U\subseteq V}\frac{C(U,\bar
      U)}{|U||\bar U|}\\
    &=\min_{U\subseteq V}\frac{E(U,\bar U)}{|U||\bar U|} \quad\text{if
    all capacities $=1$.}\\
  \end{align*}
  So sparsest cut $\sim$ best expansion.
  \begin{itemize}
  \item ``poly-time'' -- can solve ``balanced'' cut problem and use it
    for divide and conquer.
  \item best running time $O(n^2)$
  \end{itemize}
  \begin{paragraph}{Aside}
    A local improvement algorithm:\\
    \begin{itemize}
    \item Goal: Given a partition, find a strictly better partition.
    \item METIS -- post process with a flow based improvement
      heuristic.
    \item Vanilla spectral: post process with improvement method.
    \item Local improvement at one step online iterative algorithm.
    \end{itemize}
  \end{paragraph}
  \textbf{Theorem:~}$A\subseteq V$ s.t. $\pi(A)\le \pi(\bar
  A)$. $S=Improve(A)$ [partition flow algorithm].
  \begin{enumerate}
  \item if $C\subseteq A$, then $Q(S)\le Q(C)$ [where $Q(S) = |\partial S|/vol(S)$]
  \item if $C$ is such that 
    \begin{align*}
      \frac{\pi(A\cap
        C)}{\pi(C)}\ge\frac{\pi(A)}{\pi(V)}+\epsilon\frac{\pi(\bar A)}{\pi(V)},
    \end{align*}
    i.e. $C$ is $\epsilon$ more correlated with $A$ than random,\\
    then $Q(S)\le Q(C)/\epsilon$ i.e. bound on nearby cuts.
  \end{enumerate}
  \begin{itemize}
  \item Spectral: relaxation to vector space $O(\log n)$, graph
    partition.
  \item Flow: relaxation to $l_1$ (that's an LP) $O(\log n)$, graph
    partitioning algorithm.
  \end{itemize}

\end{document}
