src_c/l1_logreg.c File Reference

Main source file for l_1-regularized logistic regression problem solver. More...

#include <time.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "def.h"
#include "pcg.h"
#include "util.h"
#include "dmatrix.h"
#include "l1_logreg.h"

Data Structures

struct  problem_data_t
struct  variables_t
struct  adata_t
struct  mdata_t

Defines

#define INTERNAL_PLOT   0
#define MU   2
#define MAX_NT_ITER   400
#define ABSTOL   1e-8
#define ZABSTOL   1e-12
#define MAX_NT_ITERZ   10
#define MAX_LS_ITERZ   10
#define ALPHA   0.01
#define BETA   0.5
#define MAX_LS_ITER   100
#define MAX_PCG_ITER   5000
#define PRINT_BUF_SIZE   256
#define LOG_INF   700

Typedefs

typedef void(* p2func_progress )(char *, int, double, double, double, double, double, int, double, int)

Functions

void set_problem_data (problem_data_t *pdat, dmatrix *matX1, dmatrix *matX2, double *ac, double *ar, double *b, double lambda, double *avg_x, double *std_x)
void get_problem_data (problem_data_t *pdat, dmatrix **matX1, dmatrix **matX2, double **ac, double **ar, double **b, double *lambda)
void free_problem_data (problem_data_t *pdat)
void create_variables (variables_t *vars, int m, int n)
void free_variables (variables_t *vars)
void get_variables (variables_t *vars, double **x, double **v, double **w, double **u, double **dx, double **dv, double **dw, double **du, double **gx, double **gv, double **gw, double **gu, double **g, double **h, double **z, double **expz, double **expmz, double **d1, double **d2, double **Aw)
void allocate_temporaries (int m, int n, int is_pcg, double **tm1, double **tn1, double **tn2, double **tn3, double **tn4, double **tx1, double **precond, dmatrix **B, dmatrix **BB)
void free_temporaries (double *tm1, double *tn1, double *tn2, double *tn3, double *tn4, double *tx1, double *precond, dmatrix *B, dmatrix *BB)
void set_adata (adata_t *ad, dmatrix *dmat, double *ac, double *ar, double *b, double *h, double *d1, double *d2)
void set_mdata (mdata_t *md, int m, int n, double *precond)
double logistic_loss (const int n, const double *z)
 Computes the logistic loss of vector z, i.e., $ \sum_{i=1}^n \log(1 + \exp(-z_i)). $.
double logistic_loss2 (const int n, const double *z, const double *expz, const double *expmz)
 Computes the logistic loss using precomputed exp(z), exp(-z), i.e., $ \sum_{i=1}^n \log(1 + \exp(-z_i)). $.
double nentropy (const int n, const double *z)
 Computes the sum of negative entropy of an n-vector z times n, i.e., $ -\sum_{i=1}^n nz_i\log(nz_i)+(1-nz_i)log(1-nz_i). $.
void fprimes (const int n, const double *expz, const double *expmz, double *f1prime, double *f2prime)
 Computes the derivatives of the logistic loss. $ f' = -1/(1+\exp(+z)) $, $ f'' = -1/(2+\exp(-z)+\exp(+z)) $.
void gradient_hessian_over_v (const int n, const double *expz, const double *expmz, const double *b, double *gradient, double *hessian)
 Computes the derivatives of the logistic loss over v.
double eval_phi (const int m, const int n, const double *z, const double *expz, const double *expmz, const double *w, const double *u, const double lambda, const double t)
 Evaluates the phi function.
void afun (double *y, const double *x, const void *adata)
void mfun (double *y, const double *x, const void *mdata)
int is_indomain (double *w, double *u, int n)
void optimize_intercept (double *v, double *z, double *expz, double *expmz, double *z2, const double *b, const double *Aw, const int m)
 Optimize intercept v.
void compute_searchdir_pcg (problem_data_t *pdat, variables_t *vars, double t, double s, double gap, pcg_status_t *pcgstat, adata_t *adata, mdata_t *mdata, double *precond, double *tmp_m1, double *A2h, double *tmp_x1)
 Compute search direction using pcg method.
void compute_searchdir_chol_fat (problem_data_t *pdat, variables_t *vars, double t, dmatrix *B, dmatrix *BB, double *tm1, double *bDA, double *d3inv, double *tmp31, double *tmp32)
 Compute search direction using cholesky method (m < n).
void compute_searchdir_chol_thin (problem_data_t *pdat, variables_t *vars, double t, dmatrix *B, dmatrix *BB, double *tm1, double *bDA, double *d3)
 Compute search direction using cholesky method (m > n).
double backtracking_linesearch (problem_data_t *pdat, variables_t *vars, double t, double *Adw, double *xnew)
 Perform backtraking linesearch.
double backtracking_linesearch_deprecated (problem_data_t *pdat, variables_t *vars, double t, double *Adw, double *xnew)
void progress_pcg_v3 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void progress_pcg_v2 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void progress_pcg_v0 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void progress_dir_v3 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void progress_dir_v2 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void progress_dir_v0 (char *form, int in0, double in1, double in2, double in3, double in4, double in5, int in6, double in7, int in8)
void init_progress (const int is_pcg, const int verbose_level, char *format_buf, p2func_progress *print_progress)
int l1_logreg_train (dmatrix *X, double *b, double lambda, train_opts to, double *initial_x, double *initial_t, double *sol, int *total_ntiter, int *total_pcgiter)
 Train (learn a model).
int l1_logreg_classify (dmatrix *X, double *b, double *sol, int pflag, double *result, int *error_count)
 Classify data.

Variables

char * prt []
char * prn []


Detailed Description

Main source file for l_1-regularized logistic regression problem solver.


Function Documentation

double backtracking_linesearch ( problem_data_t *  pdat,
variables_t *  vars,
double  t,
double *  Adw,
double *  xnew 
)

Perform backtraking linesearch.

References dmat_dot(), dmat_vcopy(), dmat_waxpby(), dmat_yAmpqx(), dmat_yexpx(), dmat_yinvx(), eval_phi(), dmatrix::m, and dmatrix::n.

Referenced by l1_logreg_train().

void compute_searchdir_chol_fat ( problem_data_t *  pdat,
variables_t *  vars,
double  t,
dmatrix B,
dmatrix BB,
double *  tm1,
double *  bDA,
double *  d3inv,
double *  tmp31,
double *  tmp32 
)

void compute_searchdir_chol_thin ( problem_data_t *  pdat,
variables_t *  vars,
double  t,
dmatrix B,
dmatrix BB,
double *  tm1,
double *  bDA,
double *  d3 
)

Compute search direction using cholesky method (m > n).

References dmat_A_axxTpA(), dmat_B_ATA(), dmat_copy(), dmat_diagadd(), dmat_diagscale(), dmat_dot(), dmat_posv(), dmat_yATx(), dmat_ysqrtx(), dmatrix::m, and dmatrix::n.

Referenced by l1_logreg_train().

void compute_searchdir_pcg ( problem_data_t *  pdat,
variables_t *  vars,
double  t,
double  s,
double  gap,
pcg_status_t pcgstat,
adata_t *  adata,
mdata_t *  mdata,
double *  precond,
double *  tmp_m1,
double *  A2h,
double *  tmp_x1 
)

Compute search direction using pcg method.

References dmat_elemprod(), dmat_norm2(), dmat_vset(), dmat_waxpby(), dmat_yAmpqTx(), dmat_yATx(), dmatrix::m, dmatrix::n, dmatrix::nz, and pcg().

Referenced by l1_logreg_train().

int l1_logreg_classify ( dmatrix X,
double *  b,
double *  sol,
int  pflag,
double *  result,
int *  error_count 
)

Classify data.

$ \mbox{sign}\left( (X - 1\mu^T)\mbox{diag}(\sigma)^{-1}w +1v \right) = X\tilde{w} + 1(v-\mu^T\tilde{w}) $

Parameters:
X test data (feature matrix)
b test data (class vector)
sol model data (coefficients and intercept)
  • sol[0] : intercept
  • sol[1..(n-1)] : coefficients -- $ w./\sigma $ when standardized -- $ w $ when not standardized
result test result vector
error_count numter of test error

References dmat_yAx(), dmatrix::m, and dmatrix::n.

Referenced by main().

int l1_logreg_train ( dmatrix X,
double *  b,
double  lambda,
train_opts  to,
double *  initial_x,
double *  initial_t,
double *  sol,
int *  total_ntiter,
int *  total_pcgiter 
)

Train (learn a model).

Parameters:
X train data (feature matrix)
b train data (class vector)
lambda regularization parameter
sflag standardization flag
cflag coefficients flag
verbose_level verbose level
tolerance duality gap (absolute) tolerance
initial_x initial primal point (v0, w0, u0)
initial_t initial barrier parameter
sol model data (coefficients and intercept)
  • sol[0] : intercept
  • sol[1..(n-1)] : coefficients -- $ w./\sigma $ when standardized -- $ w $ when not standardized
total_ntiter number of newton iterations
total_pcgiter number of pcg iterations

References backtracking_linesearch(), compute_searchdir_chol_fat(), compute_searchdir_chol_thin(), compute_searchdir_pcg(), dmat_copy(), dmat_diagscale(), dmat_dot(), dmat_duplicate(), dmat_elemAA(), dmat_elemdivi(), dmat_new_dense(), dmat_norm1(), dmat_norminf(), dmat_vcopy(), dmat_vset(), dmat_waxpby(), dmat_yAmpqTx(), dmat_yAmpqx(), pcg_status_t::flag, fprimes(), init_pcg_status(), pcg_status_t::iter, logistic_loss2(), dmatrix::m, dmatrix::n, nentropy(), dmatrix::nz, optimize_intercept(), pcg_status_t::relres, standardize_data(), and dmatrix::val.

void optimize_intercept ( double *  v,
double *  z,
double *  expz,
double *  expmz,
double *  z2,
const double *  b,
const double *  Aw,
const int  m 
)


Variable Documentation

char* prn[]

Initial value:

 {
    "%10d    ",
    "%14.4e",
    "%14.4e",
    "%14.4e",
    "%14.4e",
    "%14.4e",
    "%10d    ",
    "%14.4e",
    "%10d    ",
}

char* prt[]

Initial value:

 {
    "    NT iter   ",
    "       gap    ",
    "    primal obj",
    "     dual obj ",
    "     step size",
    "     t value  ",
    "   pcg flag   ",
    "    pcg relres",
    "   pcg iter   ",
}


Generated on Mon May 25 19:15:19 2009 for l1_logreg by Doxygen 1.5.5