SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gpdt.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *** GPDT - Gradient Projection Decomposition Technique ***
3  ******************************************************************************
4  *** ***
5  *** GPDT is a C++ software designed to train large-scale Support Vector ***
6  *** Machines for binary classification in both scalar and distributed ***
7  *** memory parallel environments. It uses the Joachims' problem ***
8  *** decomposition technique to split the whole quadratic programming (QP) ***
9  *** problem into a sequence of smaller QP subproblems, each one being ***
10  *** solved by a suitable gradient projection method (GPM). The presently ***
11  *** implemented GPMs are the Generalized Variable Projection Method ***
12  *** GVPM (T. Serafini, G. Zanghirati, L. Zanni, "Gradient Projection ***
13  *** Methods for Quadratic Programs and Applications in Training Support ***
14  *** Vector Machines"; Optim. Meth. Soft. 20, 2005, 353-378) and the ***
15  *** Dai-Fletcher Method DFGPM (Y. Dai and R. Fletcher,"New Algorithms for ***
16  *** Singly Linear Constrained Quadratic Programs Subject to Lower and ***
17  *** Upper Bounds"; Math. Prog. to appear). ***
18  *** ***
19  *** Authors: ***
20  *** Thomas Serafini, Luca Zanni ***
21  *** Dept. of Mathematics, University of Modena and Reggio Emilia - ITALY ***
22  *** serafini.thomas@unimo.it, zanni.luca@unimo.it ***
23  *** Gaetano Zanghirati ***
24  *** Dept. of Mathematics, University of Ferrara - ITALY ***
25  *** g.zanghirati@unife.it ***
26  *** ***
27  *** Software homepage: http://dm.unife.it/gpdt ***
28  *** ***
29  *** This work is supported by the Italian FIRB Projects ***
30  *** 'Statistical Learning: Theory, Algorithms and Applications' ***
31  *** (grant RBAU01877P), http://slipguru.disi.unige.it/ASTA ***
32  *** and ***
33  *** 'Parallel Algorithms and Numerical Nonlinear Optimization' ***
34  *** (grant RBAU01JYPN), http://dm.unife.it/pn2o ***
35  *** ***
36  *** Copyright (C) 2004-2008 by T. Serafini, G. Zanghirati, L. Zanni. ***
37  *** ***
38  *** COPYRIGHT NOTIFICATION ***
39  *** ***
40  *** Permission to copy and modify this software and its documentation ***
41  *** for internal research use is granted, provided that this notice is ***
42  *** retained thereon and on all copies or modifications. The authors and ***
43  *** their respective Universities makes no representations as to the ***
44  *** suitability and operability of this software for any purpose. It is ***
45  *** provided "as is" without express or implied warranty. ***
46  *** Use of this software for commercial purposes is expressly prohibited ***
47  *** without contacting the authors. ***
48  *** ***
49  *** This program is free software; you can redistribute it and/or modify ***
50  *** it under the terms of the GNU General Public License as published by ***
51  *** the Free Software Foundation; either version 3 of the License, or ***
52  *** (at your option) any later version. ***
53  *** ***
54  *** This program is distributed in the hope that it will be useful, ***
55  *** but WITHOUT ANY WARRANTY; without even the implied warranty of ***
56  *** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ***
57  *** GNU General Public License for more details. ***
58  *** ***
59  *** You should have received a copy of the GNU General Public License ***
60  *** along with this program; if not, write to the Free Software ***
61  *** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ***
62  *** ***
63  *** File: gpdt.cpp ***
64  *** Type: scalar ***
65  *** Version: 1.0 ***
66  *** Date: October, 2005 ***
67  *** Revision: 1 ***
68  *** ***
69  *** SHOGUN adaptions Written (W) 2006-2008 Soeren Sonnenburg ***
70  ******************************************************************************/
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <ctype.h>
75 #include <math.h>
78 
79 using namespace shogun;
80 
81 #ifndef DOXYGEN_SHOULD_SKIP_THIS
82 void fatalError(const char *msg1, const char *msg2);
83 
84 /******************************************************************************/
85 /*** Class constructor ***/
86 /******************************************************************************/
87 QPproblem::QPproblem()
88 : CSGObject()
89 {
90  /*** set problem defaults ***/
91  maxmw = 40;
92  c_const = 10.0;
93  projection_solver = SOLVER_FLETCHER;
94  projection_projector = 1;
95  PreprocessMode = 0;
96  delta = 1.0e-3;
97  DELTAsv = EPS_SV;
98  ker_type = 2;
99  chunk_size = 400;
100  q = -1;
101  y = NULL;
102  tau_proximal = 0.0;
103  dim = 1;
104 }
105 
106 /******************************************************************************/
107 /*** Class destructor ***/
108 /******************************************************************************/
109 QPproblem::~QPproblem()
110 {
111  //if (y != NULL) free(y);
112 }
113 
114 /******************************************************************************/
115 /*** Setter method for the subproblem features ***/
116 /******************************************************************************/
117 void QPproblem::Subproblem(QPproblem &p, int32_t len, int32_t *perm)
118 {
119  int32_t k;
120 
121  memcpy(this, &p, sizeof(QPproblem));
122  ell = len;
123 
124  KER->SetSubproblem(p.KER, len, perm);
125  y = SG_MALLOC(int32_t, len);
126  for (k = 0; k < ell; k++)
127  y[k] = p.y[perm[k]];
128 }
129 
130 namespace shogun
131 {
132 /******************************************************************************/
133 /*** Extract the samples information from an SVMlight-compliant data file ***/
134 /******************************************************************************/
135 int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll)
136 {
137  FILE *fl;
138  int32_t ic;
139  char c;
140  int64_t current_length, current_vlen;
141 
142  if ((fl = fopen (file, "r")) == NULL)
143  return(-1);
144  current_length = 0;
145  current_vlen = 0;
146 
147  *ll = 0; /* length of the longest input line (the read buffer should
148  be allocated with this size) */
149  *lines = 1; /* number of lines in the file */
150  *vlen = 0; /* max number of nonzero components in a vector */
151 
152  while ((ic = getc(fl)) != EOF)
153  {
154  c = (char)ic;
155  current_length++;
156 
157  if (c == ' ')
158  current_vlen++;
159 
160  if (c == '\n')
161  {
162  (*lines)++;
163  if (current_length > (*ll))
164  *ll = current_length;
165  if (current_vlen > (*vlen))
166  *vlen = current_vlen;
167  current_length = 0;
168  current_vlen = 0;
169  }
170  }
171  fclose(fl);
172  return(0);
173 }
174 }
175 /******************************************************************************/
176 /*** return 1 if problem is single class, 0 if two-class ***/
177 /******************************************************************************/
178 int32_t QPproblem::Check2Class()
179 {
180  int32_t i;
181 
182  for (i = 1; i < ell; i++)
183  if (y[i] != y[0])
184  return 0;
185  return 1;
186 }
187 
188 namespace shogun
189 {
190 /******************************************************************************/
191 /*** Compute the size of data splitting for preprocessing ***/
192 /******************************************************************************/
193 void SplitParts(
194  int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off)
195 {
196  int32_t r;
197 
198  r = n % parts;
199  *dim = n / parts;
200 
201  if (part < r)
202  {
203  (*dim)++;
204  *off = *dim * part;
205  }
206  else
207  *off = *dim * part + r;
208 }
209 }
210 /******************************************************************************/
211 /*** Kernel class constructor ***/
212 /******************************************************************************/
213 sKernel::sKernel (CKernel* k, int32_t l)
214 {
215  kernel=k;
216  ell=l;
217  nor = NULL;
218  vaux = NULL;
219  lx = NULL;
220  ix = NULL;
221  x = NULL;
222  IsSubproblem = 0;
223  KernelEvaluations = 0.0;
224 }
225 
226 /******************************************************************************/
227 /*** Set the problem data for kernel evaluation ***/
228 /******************************************************************************/
229 void sKernel::SetData(
230  float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim)
231 {
232  int32_t i, j, k;
233 
234  dim = _dim;
235  ell = _ell;
236  nor = SG_MALLOC(float64_t, ell);
237  vaux = SG_CALLOC(float32_t, dim);
238 
239  IsSubproblem = 0;
240  x = x_;
241  ix = ix_;
242  lx = lx_;
243 
244  // unroll one (sparse) vector
245  vauxRow = 0;
246  i = vauxRow;
247  for (k = 0; k < lx[i]; k++)
248  vaux[ix[i][k]] = x[i][k];
249 
250  // compute the squared Euclidean norm of each vector
251  for (i = 0; i < ell; i++)
252  {
253  nor[i] = 0.0;
254  for (j = 0; j < lx[i]; j++)
255  nor[i] += (float64_t)(x[i][j]*x[i][j]);
256  }
257 }
258 
259 /******************************************************************************/
260 /*** Set the subproblem data ***/
261 /******************************************************************************/
262 void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm)
263 {
264  int32_t k;
265 
266  /* arrays allocations */
267  nor = SG_MALLOC(float64_t, len);
268  vaux = SG_CALLOC(float32_t, ker->dim);
269 
270  lx = SG_MALLOC(int32_t, len);
271  ix = SG_MALLOC(int32_t*, len);
272  x = SG_MALLOC(float32_t*, len);
273  IsSubproblem = 1;
274 
275  for (k = 0; k < len; k++)
276  {
277  x[k] = ker->x[perm[k]];
278  ix[k] = ker->ix[perm[k]];
279  lx[k] = ker->lx[perm[k]];
280  nor[k] = ker->nor[perm[k]];
281  }
282 
283  // unroll one (sparse) vector
284  vauxRow = 0;
285  for (k = 0; k < lx[vauxRow]; k++)
286  vaux[ix[vauxRow][k]] = x[vauxRow][k];
287 }
288 
289 /******************************************************************************/
290 /*** Kernel class destructor ***/
291 /******************************************************************************/
292 sKernel::~sKernel()
293 {
294  int32_t i;
295 
296  SG_FREE(nor);
297  SG_FREE(vaux);
298 
299  SG_FREE(lx);
300  if (ix != NULL)
301  {
302  if (!IsSubproblem)
303  for (i = 0; i < ell; i++)
304  SG_FREE(ix[i]);
305  SG_FREE(ix);
306  }
307  if (x != NULL)
308  {
309  if (!IsSubproblem)
310  for (i = 0; i < ell; i++)
311  SG_FREE(x[i]);
312  SG_FREE(x);
313  }
314 }
315 
316 #endif // DOXYGEN_SHOULD_SKIP_THIS
317 
318 /******************************************************************************/
319 /*** End of gpdt.cpp file ***/
320 /******************************************************************************/

SHOGUN Machine Learning Toolbox - Documentation