SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 
13 #ifdef HAVE_LAPACK
14 #include <shogun/machine/Machine.h>
16 #include <shogun/classifier/LDA.h>
17 #include <shogun/labels/Labels.h>
21 
22 using namespace shogun;
23 
25 : CLinearMachine(), m_gamma(gamma)
26 {
27 }
28 
30 : CLinearMachine(), m_gamma(gamma)
31 {
32  set_features(traindat);
33  set_labels(trainlab);
34 }
35 
36 
38 {
39 }
40 
42 {
44  if (data)
45  {
46  if (!data->has_property(FP_DOT))
47  SG_ERROR("Specified features are not of type CDotFeatures\n");
48  set_features((CDotFeatures*) data);
49  }
51  SGVector<int32_t> train_labels=((CBinaryLabels*) m_labels)->get_int_labels();
52  ASSERT(train_labels.vector);
53 
54  int32_t num_feat=features->get_dim_feature_space();
55  int32_t num_vec=features->get_num_vectors();
56  ASSERT(num_vec==train_labels.vlen);
57 
58  int32_t* classidx_neg=SG_MALLOC(int32_t, num_vec);
59  int32_t* classidx_pos=SG_MALLOC(int32_t, num_vec);
60 
61  int32_t i=0;
62  int32_t j=0;
63  int32_t num_neg=0;
64  int32_t num_pos=0;
65  for (i=0; i<train_labels.vlen; i++)
66  {
67  if (train_labels.vector[i]==-1)
68  classidx_neg[num_neg++]=i;
69  else if (train_labels.vector[i]==+1)
70  classidx_pos[num_pos++]=i;
71  else
72  {
73  SG_ERROR( "found label != +/- 1 bailing...");
74  return false;
75  }
76  }
77 
78  if (num_neg<=0 || num_pos<=0)
79  {
80  SG_ERROR( "whooooo ? only a single class found\n");
81  return false;
82  }
83 
84  w=SGVector<float64_t>(num_feat);
85 
86  float64_t* mean_neg=SG_MALLOC(float64_t, num_feat);
87  memset(mean_neg,0,num_feat*sizeof(float64_t));
88 
89  float64_t* mean_pos=SG_MALLOC(float64_t, num_feat);
90  memset(mean_pos,0,num_feat*sizeof(float64_t));
91 
92  /* calling external lib */
93  double* scatter=SG_MALLOC(double, num_feat*num_feat);
94  double* buffer=SG_MALLOC(double, num_feat*CMath::max(num_neg, num_pos));
95  int nf = (int) num_feat;
96 
98  //mean neg
99  for (i=0; i<num_neg; i++)
100  {
101  int32_t vlen;
102  bool vfree;
103  float64_t* vec=
104  rf->get_feature_vector(classidx_neg[i], vlen, vfree);
105  ASSERT(vec);
106 
107  for (j=0; j<vlen; j++)
108  {
109  mean_neg[j]+=vec[j];
110  buffer[num_feat*i+j]=vec[j];
111  }
112 
113  rf->free_feature_vector(vec, classidx_neg[i], vfree);
114  }
115 
116  for (j=0; j<num_feat; j++)
117  mean_neg[j]/=num_neg;
118 
119  for (i=0; i<num_neg; i++)
120  {
121  for (j=0; j<num_feat; j++)
122  buffer[num_feat*i+j]-=mean_neg[j];
123  }
124  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf,
125  (int) num_neg, 1.0, buffer, nf, buffer, nf, 0, scatter, nf);
126 
127  //mean pos
128  for (i=0; i<num_pos; i++)
129  {
130  int32_t vlen;
131  bool vfree;
132  float64_t* vec=
133  rf->get_feature_vector(classidx_pos[i], vlen, vfree);
134  ASSERT(vec);
135 
136  for (j=0; j<vlen; j++)
137  {
138  mean_pos[j]+=vec[j];
139  buffer[num_feat*i+j]=vec[j];
140  }
141 
142  rf->free_feature_vector(vec, classidx_pos[i], vfree);
143  }
144 
145  for (j=0; j<num_feat; j++)
146  mean_pos[j]/=num_pos;
147 
148  for (i=0; i<num_pos; i++)
149  {
150  for (j=0; j<num_feat; j++)
151  buffer[num_feat*i+j]-=mean_pos[j];
152  }
153  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, nf, nf, (int) num_pos,
154  1.0/(train_labels.vlen-1), buffer, nf, buffer, nf,
155  1.0/(train_labels.vlen-1), scatter, nf);
156 
157  float64_t trace=SGMatrix<float64_t>::trace((float64_t*) scatter, num_feat, num_feat);
158 
159  double s=1.0-m_gamma; /* calling external lib; indirectly */
160  for (i=0; i<num_feat*num_feat; i++)
161  scatter[i]*=s;
162 
163  for (i=0; i<num_feat; i++)
164  scatter[i*num_feat+i]+= trace*m_gamma/num_feat;
165 
166  double* inv_scatter= (double*) SGMatrix<float64_t>::pinv(
167  scatter, num_feat, num_feat, NULL);
168 
169  float64_t* w_pos=buffer;
170  float64_t* w_neg=&buffer[num_feat];
171 
172  cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf,
173  (double*) mean_pos, 1, 0., (double*) w_pos, 1);
174  cblas_dsymv(CblasColMajor, CblasUpper, nf, 1.0, inv_scatter, nf,
175  (double*) mean_neg, 1, 0, (double*) w_neg, 1);
176 
177  bias=0.5*(SGVector<float64_t>::dot(w_neg, mean_neg, num_feat)-SGVector<float64_t>::dot(w_pos, mean_pos, num_feat));
178  for (i=0; i<num_feat; i++)
179  w.vector[i]=w_pos[i]-w_neg[i];
180 
181 #ifdef DEBUG_LDA
182  SG_PRINT("bias: %f\n", bias);
184  SGVector<float64_t>::display_vector(w_pos, num_feat, "w_pos");
185  SGVector<float64_t>::display_vector(w_neg, num_feat, "w_neg");
186  SGVector<float64_t>::display_vector(mean_pos, num_feat, "mean_pos");
187  SGVector<float64_t>::display_vector(mean_neg, num_feat, "mean_neg");
188 #endif
189 
190  SG_FREE(mean_neg);
191  SG_FREE(mean_pos);
192  SG_FREE(scatter);
193  SG_FREE(inv_scatter);
194  SG_FREE(classidx_neg);
195  SG_FREE(classidx_pos);
196  SG_FREE(buffer);
197  return true;
198 }
199 #endif

SHOGUN Machine Learning Toolbox - Documentation