21 #include <shogun/lib/external/libocas.h>
29 #ifndef DOXYGEN_SHOULD_SKIP_THIS
30 struct wdocas_thread_params_output
40 struct wdocas_thread_params_add
49 #endif // DOXYGEN_SHOULD_SKIP_THIS
51 CWDSVMOcas::CWDSVMOcas()
52 :
CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
53 epsilon(1e-3), method(SVM_OCAS)
67 CWDSVMOcas::CWDSVMOcas(E_SVM_TYPE type)
68 :
CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
69 epsilon(1e-3), method(type)
81 CWDSVMOcas::CWDSVMOcas(
84 :
CMachine(), use_bias(false), bufsize(3000), C1(C), C2(C), epsilon(1e-3),
98 CWDSVMOcas::~CWDSVMOcas()
121 SG_ERROR(
"Features not of class string type byte\n")
129 set_normalization_const();
134 int32_t num=features->get_num_vectors();
139 for (int32_t i=0; i<num; i++)
140 outputs[i] = apply_one(i);
146 int32_t CWDSVMOcas::set_wd_weights()
148 ASSERT(degree>0 && degree<=8)
152 w_offsets=SG_MALLOC(int32_t, degree);
153 int32_t w_dim_single_c=0;
155 for (int32_t i=0; i<degree; i++)
158 wd_weights[i]=sqrt(2.0*(from_degree-i)/(from_degree*(from_degree+1)));
159 w_dim_single_c+=w_offsets[i];
161 return w_dim_single_c;
164 bool CWDSVMOcas::train_machine(
CFeatures* data)
166 SG_INFO(
"C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
172 if (data->get_feature_class() !=
C_STRING ||
173 data->get_feature_type() !=
F_BYTE)
175 SG_ERROR(
"Features not of class string type byte\n")
181 CAlphabet* alphabet=get_features()->get_alphabet();
184 alphabet_size=alphabet->get_num_symbols();
185 string_length=features->get_num_vectors();
189 w_dim_single_char=set_wd_weights();
191 SG_DEBUG("w_dim_single_char=%d\n", w_dim_single_char)
192 w_dim=string_length*w_dim_single_char;
193 SG_DEBUG("cutting plane has %d dims\n", w_dim)
194 num_vec=get_features()->get_max_vector_length();
196 set_normalization_const();
197 SG_INFO("num_vec: %d num_lab: %d\n", num_vec, labvec.vlen)
198 ASSERT(num_vec==labvec.vlen)
202 w=SG_MALLOC(float32_t, w_dim);
203 memset(w, 0, w_dim*sizeof(float32_t));
206 old_w=SG_MALLOC(float32_t, w_dim);
207 memset(old_w, 0, w_dim*sizeof(float32_t));
211 cuts=SG_MALLOC(float32_t*, bufsize);
212 memset(cuts, 0, sizeof(*cuts)*bufsize);
214 memset(cp_bias, 0, sizeof(float64_t)*bufsize);
229 if (method == SVM_OCAS)
231 ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
232 TolAbs, QPBound, get_max_train_time(), bufsize, Method,
233 &CWDSVMOcas::compute_W,
234 &CWDSVMOcas::update_W,
235 &CWDSVMOcas::add_new_cut,
236 &CWDSVMOcas::compute_output,
241 SG_INFO("Ocas Converged after %d iterations\n"
242 "==================================\n"
243 "timing statistics:\n"
244 "output_time: %f s\n"
249 "ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
250 result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
252 for (int32_t i=bufsize-1; i>=0; i--)
269 float64_t CWDSVMOcas::update_W( float64_t t,
void* ptr )
271 float64_t sq_norm_W = 0;
272 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
273 uint32_t nDim = (uint32_t) o->w_dim;
275 float32_t* oldW=o->old_w;
276 float64_t bias=o->bias;
277 float64_t old_bias=bias;
279 for(uint32_t j=0; j <nDim; j++)
281 W[j] = oldW[j]*(1-t) + t*W[j];
282 sq_norm_W += W[j]*W[j];
285 bias=old_bias*(1-t) + t*bias;
289 o->old_bias=old_bias;
302 void* CWDSVMOcas::add_new_cut_helper(
void* ptr)
304 wdocas_thread_params_add* p = (wdocas_thread_params_add*) ptr;
305 CWDSVMOcas* o = p->wdocas;
306 int32_t start = p->start;
307 int32_t end = p->end;
308 int32_t string_length = o->string_length;
310 uint32_t cut_length=p->cut_length;
311 uint32_t* new_cut=p->new_cut;
312 int32_t* w_offsets = o->w_offsets;
313 float64_t* y = o->lab;
314 int32_t alphabet_size = o->alphabet_size;
315 float32_t* wd_weights = o->wd_weights;
316 int32_t degree = o->degree;
318 float64_t normalization_const = o->normalization_const;
321 float32_t* new_a = p->new_a;
325 int32_t* val=SG_MALLOC(int32_t, cut_length);
326 for (int32_t j=start; j<end; j++)
328 int32_t offs=o->w_dim_single_char*j;
329 memset(val,0,
sizeof(int32_t)*cut_length);
330 int32_t lim=
CMath::min(degree, string_length-j);
333 for (int32_t k=0; k<lim; k++)
337 float32_t wd = wd_weights[k]/normalization_const;
339 for(uint32_t i=0; i < cut_length; i++)
341 val[i]=val[i]*alphabet_size + vec[new_cut[i]];
342 new_a[offs+val[i]]+=wd * y[new_cut[i]];
354 int CWDSVMOcas::add_new_cut(
355 float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
356 uint32_t nSel,
void* ptr)
358 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
360 float64_t* c_bias = o->cp_bias;
361 uint32_t nDim=(uint32_t) o->w_dim;
362 float32_t** cuts=o->cuts;
363 float32_t* new_a=SG_MALLOC(float32_t, nDim);
364 memset(new_a, 0,
sizeof(float32_t)*nDim);
367 wdocas_thread_params_add* params_add=SG_MALLOC(wdocas_thread_params_add, o->parallel->get_num_threads());
368 pthread_t* threads=SG_MALLOC(pthread_t, o->parallel->get_num_threads());
370 int32_t string_length = o->string_length;
372 int32_t nthreads=o->parallel->get_num_threads()-1;
373 int32_t step= string_length/o->parallel->get_num_threads();
377 nthreads=string_length-1;
381 for (t=0; t<nthreads; t++)
383 params_add[t].wdocas=o;
385 params_add[t].new_a=new_a;
386 params_add[t].new_cut=new_cut;
387 params_add[t].start = step*t;
388 params_add[t].end = step*(t+1);
389 params_add[t].cut_length = cut_length;
391 if (pthread_create(&threads[t], NULL, &CWDSVMOcas::add_new_cut_helper, (
void*)¶ms_add[t]) != 0)
399 params_add[t].wdocas=o;
401 params_add[t].new_a=new_a;
402 params_add[t].new_cut=new_cut;
403 params_add[t].start = step*t;
404 params_add[t].end = string_length;
405 params_add[t].cut_length = cut_length;
406 add_new_cut_helper(¶ms_add[t]);
409 for (t=0; t<nthreads; t++)
411 if (pthread_join(threads[t], NULL) != 0)
422 for(i=0; i < cut_length; i++)
425 c_bias[nSel]+=o->lab[new_cut[i]];
429 for(i=0; i < nSel; i++)
430 new_col_H[i] =
CMath::dot(new_a, cuts[i], nDim) + c_bias[nSel]*c_bias[i];
441 int CWDSVMOcas::sort( float64_t* vals, float64_t* data, uint32_t size)
452 void* CWDSVMOcas::compute_output_helper(
void* ptr)
454 wdocas_thread_params_output* p = (wdocas_thread_params_output*) ptr;
455 CWDSVMOcas* o = p->wdocas;
456 int32_t start = p->start;
457 int32_t end = p->end;
458 float32_t* out = p->out;
459 float64_t* output = p->output;
460 int32_t* val = p->val;
464 int32_t degree = o->degree;
465 int32_t string_length = o->string_length;
466 int32_t alphabet_size = o->alphabet_size;
467 int32_t* w_offsets = o->w_offsets;
468 float32_t* wd_weights = o->wd_weights;
471 float64_t* y = o->lab;
472 float64_t normalization_const = o->normalization_const;
475 for (int32_t j=0; j<string_length; j++)
477 int32_t offs=o->w_dim_single_char*j;
478 for (int32_t i=start ; i<end; i++)
481 int32_t lim=
CMath::min(degree, string_length-j);
484 for (int32_t k=0; k<lim; k++)
488 float32_t wd = wd_weights[k];
490 for (int32_t i=start; i<end; i++)
492 val[i]=val[i]*alphabet_size + vec[i];
493 out[i]+=wd*w[offs+val[i]];
536 for (int32_t i=start; i<end; i++)
537 output[i]=y[i]*o->bias + out[i]*y[i]/normalization_const;
544 int CWDSVMOcas::compute_output( float64_t *output,
void* ptr )
547 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
548 int32_t nData=o->num_vec;
549 wdocas_thread_params_output* params_output=SG_MALLOC(wdocas_thread_params_output, o->parallel->get_num_threads());
550 pthread_t* threads = SG_MALLOC(pthread_t, o->parallel->get_num_threads());
552 float32_t* out=SG_MALLOC(float32_t, nData);
553 int32_t* val=SG_MALLOC(int32_t, nData);
554 memset(out, 0,
sizeof(float32_t)*nData);
557 int32_t nthreads=o->parallel->get_num_threads()-1;
558 int32_t step= nData/o->parallel->get_num_threads();
566 for (t=0; t<nthreads; t++)
568 params_output[t].wdocas=o;
569 params_output[t].output=output;
570 params_output[t].out=out;
571 params_output[t].val=val;
572 params_output[t].start = step*t;
573 params_output[t].end = step*(t+1);
576 if (pthread_create(&threads[t], NULL, &CWDSVMOcas::compute_output_helper, (
void*)¶ms_output[t]) != 0)
584 params_output[t].wdocas=o;
585 params_output[t].output=output;
586 params_output[t].out=out;
587 params_output[t].val=val;
588 params_output[t].start = step*t;
589 params_output[t].end = nData;
590 compute_output_helper(¶ms_output[t]);
593 for (t=0; t<nthreads; t++)
595 if (pthread_join(threads[t], NULL) != 0)
599 SG_FREE(params_output);
614 void CWDSVMOcas::compute_W(
615 float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
618 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
619 uint32_t nDim= (uint32_t) o->w_dim;
622 float32_t* oldW=o->old_w;
623 float32_t** cuts=o->cuts;
624 memset(W, 0,
sizeof(float32_t)*nDim);
625 float64_t* c_bias = o->cp_bias;
626 float64_t old_bias=o->bias;
629 for (uint32_t i=0; i<nSel; i++)
634 bias += c_bias[i]*alpha[i];
638 *dp_WoldW =
CMath::dot(W,oldW, nDim) + bias*old_bias;;
642 o->old_bias = old_bias;
645 #endif //USE_GPL_SHOGUN
SGVector< ST > get_feature_vector(int32_t num)
Real Labels are real-valued labels.
RAWDNA - letters 0,1,2,3.
The class Labels models labels, i.e. class assignments of objects.
static void qsort_index(T1 *output, T2 *index, uint32_t size)
SGString< ST > * features
The class Alphabet implements an alphabet and alphabet utility functions.
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
A generic learning machine interface.
void print(CJLCoverTreePoint &p)
virtual EFeatureClass get_feature_class() const =0
float64_t normalization_const
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
all of classes and functions are contained in the shogun namespace
SGStringList< ST > get_features()
The class Features is the base class of all feature objects.
Binary Labels for binary classification.
static void swap(T &a, T &b)
#define SG_UNSTABLE(func,...)
static int32_t pow(bool x, int32_t n)
virtual EFeatureType get_feature_type() const =0