SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
LPBoost.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/config.h>
12 
13 #ifdef USE_CPLEX
14 
16 #include <shogun/labels/Labels.h>
20 #include <shogun/lib/Signal.h>
21 #include <shogun/lib/Time.h>
22 
23 using namespace shogun;
24 
26 : CLinearMachine(), C1(1), C2(1), use_bias(true), epsilon(1e-3)
27 {
28  u=NULL;
29  dim=NULL;
30  num_sfeat=0;
31  num_svec=0;
32  sfeat=NULL;
33 }
34 
35 
37 {
38  cleanup();
39 }
40 
41 bool CLPBoost::init(int32_t num_vec)
42 {
43  u=SG_MALLOC(float64_t, num_vec);
44  for (int32_t i=0; i<num_vec; i++)
45  u[i]=1.0/num_vec;
46 
47  dim=new CDynamicArray<int32_t>(100000);
48 
50 
51  if (sfeat)
52  return true;
53  else
54  return false;
55 }
56 
58 {
59  SG_FREE(u);
60  u=NULL;
61 
62  ((CSparseFeatures<float64_t>*) features)->clean_tsparse(sfeat, num_svec);
63  sfeat=NULL;
64 
65  delete dim;
66  dim=NULL;
67 }
68 
70 {
71  float64_t max_val=0;
72  max_dim=-1;
73 
74  for (int32_t i=0; i<num_svec; i++)
75  {
76  float64_t valplus=0;
77  float64_t valminus=0;
78 
79  for (int32_t j=0; j<sfeat[i].num_feat_entries; j++)
80  {
81  int32_t idx=sfeat[i].features[j].feat_index;
82  float64_t v=u[idx]*((CBinaryLabels*)m_labels)->get_confidence(idx)*sfeat[i].features[j].entry;
83  valplus+=v;
84  valminus-=v;
85  }
86 
87  if (valplus>max_val || max_dim==-1)
88  {
89  max_dim=i;
90  max_val=valplus;
91  }
92 
93  if (valminus>max_val)
94  {
95  max_dim=num_svec+i;
96  max_val=valminus;
97  }
98  }
99 
100  dim->append_element(max_dim);
101  return max_val;
102 }
103 
105 {
108  int32_t num_train_labels=m_labels->get_num_labels();
109  int32_t num_feat=features->get_dim_feature_space();
110  int32_t num_vec=features->get_num_vectors();
111 
112  ASSERT(num_vec==num_train_labels)
113  w = SGVector<float64_t>(num_feat);
114  memset(w.vector,0,sizeof(float64_t)*num_feat);
115 
116  CCplex solver;
117  solver.init(E_LINEAR);
118  SG_PRINT("setting up lpboost\n")
119  solver.setup_lpboost(C1, num_vec);
120  SG_PRINT("finished setting up lpboost\n")
121 
122  float64_t result=init(num_vec);
123  ASSERT(result)
124 
125  int32_t num_hypothesis=0;
126  CTime time;
128 
129  while (!(CSignal::cancel_computations()))
130  {
131  int32_t max_dim=0;
132  float64_t violator=find_max_violator(max_dim);
133  SG_PRINT("iteration:%06d violator: %10.17f (>1.0) chosen: %d\n", num_hypothesis, violator, max_dim)
134  if (violator <= 1.0+epsilon && num_hypothesis>1) //no constraint violated
135  {
136  SG_PRINT("converged after %d iterations!\n", num_hypothesis)
137  break;
138  }
139 
140  float64_t factor=+1.0;
141  if (max_dim>=num_svec)
142  {
143  factor=-1.0;
144  max_dim-=num_svec;
145  }
146 
148  int32_t len=sfeat[max_dim].num_feat_entries;
149  solver.add_lpboost_constraint(factor, h, len, num_vec, m_labels);
150  solver.optimize(u);
151  //CMath::display_vector(u, num_vec, "u");
152  num_hypothesis++;
153 
155  break;
156  }
157  float64_t* lambda=SG_MALLOC(float64_t, num_hypothesis);
158  solver.optimize(u, lambda);
159 
160  //CMath::display_vector(lambda, num_hypothesis, "lambda");
161  for (int32_t i=0; i<num_hypothesis; i++)
162  {
163  int32_t d=dim->get_element(i);
164  if (d>=num_svec)
165  w[d-num_svec]+=lambda[i];
166  else
167  w[d]-=lambda[i];
168 
169  }
170  //solver.write_problem("problem.lp");
171  solver.cleanup();
172 
173  cleanup();
174 
175  return true;
176 }
177 #endif
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
Class CCplex to encapsulate access to the commercial cplex general purpose optimizer.
Definition: Cplex.h:42
bool init(E_PROB_TYPE t, int32_t timeout=60)
init cplex with problem type t and retry timeout 60 seconds
Definition: Cplex.cpp:33
virtual int32_t get_num_labels() const =0
bool optimize(float64_t *sol, float64_t *lambda=NULL)
Definition: Cplex.cpp:601
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:361
int32_t num_sfeat
Definition: LPBoost.h:118
virtual int32_t get_dim_feature_space() const =0
bool setup_lpboost(float64_t C, int32_t num_cols)
Definition: Cplex.cpp:261
static const float64_t epsilon
Definition: libbmrm.cpp:25
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
virtual ~CLPBoost()
Definition: LPBoost.cpp:36
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
int32_t num_svec
Definition: LPBoost.h:119
bool add_lpboost_constraint(float64_t factor, SGSparseVectorEntry< float64_t > *h, int32_t len, int32_t ulen, CBinaryLabels *label)
Definition: Cplex.cpp:292
static void clear_cancel()
Definition: Signal.cpp:129
SGSparseVector< float64_t > * sfeat
Definition: LPBoost.h:120
double float64_t
Definition: common.h:50
bool cleanup()
Definition: Cplex.cpp:469
float64_t get_max_train_time()
Definition: Machine.cpp:87
SGVector< float64_t > w
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
static bool cancel_computations()
Definition: Signal.h:86
SGSparseVectorEntry< T > * features
CDynamicArray< int32_t > * dim
Definition: LPBoost.h:116
bool init(int32_t num_vec)
Definition: LPBoost.cpp:41
float64_t find_max_violator(int32_t &max_dim)
Definition: LPBoost.cpp:69
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
void cleanup()
Definition: LPBoost.cpp:57
const T & get_element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:212
float64_t * u
Definition: LPBoost.h:115
virtual bool train_machine(CFeatures *data=NULL)
Definition: LPBoost.cpp:104
float64_t C1
Definition: LPBoost.h:110

SHOGUN 机器学习工具包 - 项目文档