SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
AUCKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 2009 Soeren Sonnnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
20 void
21 CAUCKernel::init()
22 {
23  SG_ADD((CSGObject**) &subkernel, "subkernel", "The subkernel.",
24  MS_AVAILABLE);
25 }
26 
28 : CDotKernel(0), subkernel(NULL)
29 {
30  init();
31 }
32 
34 : CDotKernel(size), subkernel(s)
35 {
36  init();
38 }
39 
41 {
43  cleanup();
44 }
45 
47 {
48  SG_INFO("setting up AUC maximization\n")
49  ASSERT(labels)
50  ASSERT(labels->get_label_type() == LT_BINARY)
51  labels->ensure_valid();
52 
53  // get the original labels
54  SGVector<int32_t> int_labels=((CBinaryLabels*) labels)->get_int_labels();
55  ASSERT(subkernel->get_num_vec_rhs()==int_labels.vlen)
56 
57  // count positive and negative
58  int32_t num_pos=0;
59  int32_t num_neg=0;
60 
61  for (int32_t i=0; i<int_labels.vlen; i++)
62  {
63  if (int_labels.vector[i]==1)
64  num_pos++;
65  else
66  num_neg++;
67  }
68 
69  // create AUC features and labels (alternate labels)
70  int32_t num_auc = num_pos*num_neg;
71  SG_INFO("num_pos: %i num_neg: %i num_auc: %i\n", num_pos, num_neg, num_auc)
72 
73  SGMatrix<uint16_t> features_auc(2,num_auc);
74  int32_t* labels_auc = SG_MALLOC(int32_t, num_auc);
75  int32_t n=0 ;
76 
77  for (int32_t i=0; i<int_labels.vlen; i++)
78  {
79  if (int_labels.vector[i]!=1)
80  continue;
81 
82  for (int32_t j=0; j<int_labels.vlen; j++)
83  {
84  if (int_labels.vector[j]!=-1)
85  continue;
86 
87  // create about as many positively as negatively labeled examples
88  if (n%2==0)
89  {
90  features_auc.matrix[n*2]=i;
91  features_auc.matrix[n*2+1]=j;
92  labels_auc[n]=1;
93  }
94  else
95  {
96  features_auc.matrix[n*2]=j;
97  features_auc.matrix[n*2+1]=i;
98  labels_auc[n]=-1;
99  }
100 
101  n++;
102  ASSERT(n<=num_auc)
103  }
104  }
105 
106  // create label object and attach it to svm
107  CBinaryLabels* lab_auc = new CBinaryLabels(num_auc);
108  lab_auc->set_int_labels(SGVector<int32_t>(labels_auc, num_auc, false));
109  SG_REF(lab_auc);
110 
111  // create feature object
113  f->set_feature_matrix(features_auc);
114 
115  // create AUC kernel and attach the features
116  init(f,f);
117 
118  SG_FREE(labels_auc);
119 
120  return lab_auc;
121 }
122 
123 
124 bool CAUCKernel::init(CFeatures* l, CFeatures* r)
125 {
126  CDotKernel::init(l, r);
127  init_normalizer();
128  return true;
129 }
130 
131 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b)
132 {
133  int32_t alen, blen;
134  bool afree, bfree;
135 
136  uint16_t* avec=((CDenseFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree);
137  uint16_t* bvec=((CDenseFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree);
138 
139  ASSERT(alen==2)
140  ASSERT(blen==2)
141 
143 
144  float64_t k11,k12,k21,k22;
145  int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1];
146 
147  k11 = subkernel->kernel(idx_a1,idx_b1);
148  k12 = subkernel->kernel(idx_a1,idx_b2);
149  k21 = subkernel->kernel(idx_a2,idx_b1);
150  k22 = subkernel->kernel(idx_a2,idx_b2);
151 
152  float64_t result = k11+k22-k21-k12;
153 
154  ((CDenseFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
155  ((CDenseFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);
156 
157  return result;
158 }
#define SG_INFO(...)
Definition: SGIO.h:118
virtual void cleanup()
Definition: Kernel.cpp:173
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
void set_int_labels(SGVector< int32_t > labels)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
void set_feature_matrix(SGMatrix< ST > matrix)
CLabels * setup_auc_maximization(CLabels *labels)
Definition: AUCKernel.cpp:46
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:207
#define SG_REF(x)
Definition: SGObject.h:54
Template class DotKernel is the base class for kernels working on DotFeatures.
Definition: DotKernel.h:31
index_t vlen
Definition: SGVector.h:494
virtual ~CAUCKernel()
Definition: AUCKernel.cpp:40
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
double float64_t
Definition: common.h:50
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:526
virtual bool init_normalizer()
Definition: Kernel.cpp:168
CFeatures * rhs
feature vectors to occur on right hand side
Definition: Kernel.h:1062
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on left hand side
Definition: Kernel.h:1060
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual float64_t compute(int32_t idx_a, int32_t idx_b)
Definition: AUCKernel.cpp:131
The Kernel base class.
Definition: Kernel.h:159
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
#define SG_ADD(...)
Definition: SGObject.h:84
virtual bool has_features()
Definition: Kernel.h:535
virtual void ensure_valid(const char *context=NULL)=0
CKernel * subkernel
Definition: AUCKernel.h:106

SHOGUN Machine Learning Toolbox - Documentation