AUCKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2009 Soeren Sonnnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/common.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/kernel/AUCKernel.h>
00015 #include <shogun/features/SimpleFeatures.h>
00016 #include <shogun/io/SGIO.h>
00017 
00018 using namespace shogun;
00019 
00020 void
00021 CAUCKernel::init()
00022 {
00023     m_parameters->add((CSGObject**) &subkernel, "subkernel",
00024                       "The subkernel.");
00025 }
00026 
00027 CAUCKernel::CAUCKernel()
00028 : CDotKernel(0), subkernel(NULL)
00029 {
00030     init();
00031 }
00032 
00033 CAUCKernel::CAUCKernel(int32_t size, CKernel* s)
00034 : CDotKernel(size), subkernel(s)
00035 {
00036     init();
00037     SG_REF(subkernel);
00038 }
00039 
00040 CAUCKernel::~CAUCKernel()
00041 {
00042     SG_UNREF(subkernel);
00043     cleanup();
00044 }
00045 
00046 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels)
00047 {
00048     SG_INFO( "setting up AUC maximization\n") ;
00049     ASSERT(labels);
00050     ASSERT(labels->is_two_class_labeling());
00051 
00052     // get the original labels
00053     ASSERT(labels);
00054     SGVector<int32_t> int_labels=labels->get_int_labels();
00055     ASSERT(subkernel->get_num_vec_rhs()==int_labels.vlen);
00056 
00057     // count positive and negative
00058     int32_t num_pos=0;
00059     int32_t num_neg=0;
00060 
00061     for (int32_t i=0; i<int_labels.vlen; i++)
00062     {
00063         if (int_labels.vector[i]==1)
00064             num_pos++;
00065         else 
00066             num_neg++;
00067     }
00068 
00069     // create AUC features and labels (alternate labels)
00070     int32_t num_auc = num_pos*num_neg;
00071     SG_INFO("num_pos: %i  num_neg: %i  num_auc: %i\n", num_pos, num_neg, num_auc);
00072 
00073     uint16_t* features_auc = SG_MALLOC(uint16_t, num_auc*2);
00074     int32_t* labels_auc = SG_MALLOC(int32_t, num_auc);
00075     int32_t n=0 ;
00076 
00077     for (int32_t i=0; i<int_labels.vlen; i++)
00078     {
00079         if (int_labels.vector[i]!=1)
00080             continue;
00081 
00082         for (int32_t j=0; j<int_labels.vlen; j++)
00083         {
00084             if (int_labels.vector[j]!=-1)
00085                 continue;
00086 
00087             // create about as many positively as negatively labeled examples
00088             if (n%2==0)
00089             {
00090                 features_auc[n*2]=i;
00091                 features_auc[n*2+1]=j;
00092                 labels_auc[n]=1;
00093             }
00094             else
00095             {
00096                 features_auc[n*2]=j;
00097                 features_auc[n*2+1]=i;
00098                 labels_auc[n]=-1;
00099             }
00100 
00101             n++;
00102             ASSERT(n<=num_auc);
00103         }
00104     }
00105 
00106     // create label object and attach it to svm
00107     CLabels* lab_auc = new CLabels(num_auc);
00108     lab_auc->set_int_labels(SGVector<int32_t>(labels_auc, num_auc));
00109     SG_REF(lab_auc);
00110 
00111     // create feature object
00112     CSimpleFeatures<uint16_t>* f = new CSimpleFeatures<uint16_t>(0);
00113     f->set_feature_matrix(features_auc, 2, num_auc);
00114 
00115     // create AUC kernel and attach the features
00116     init(f,f);
00117 
00118     int_labels.free_vector();
00119     SG_FREE(labels_auc);
00120 
00121     return lab_auc;
00122 }
00123 
00124 
00125 bool CAUCKernel::init(CFeatures* l, CFeatures* r)
00126 {
00127     CDotKernel::init(l, r);
00128     init_normalizer();
00129     return true;
00130 }
00131 
00132 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b)
00133 {
00134   int32_t alen, blen;
00135   bool afree, bfree;
00136 
00137   uint16_t* avec=((CSimpleFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree);
00138   uint16_t* bvec=((CSimpleFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree);
00139 
00140   ASSERT(alen==2);
00141   ASSERT(blen==2);
00142 
00143   ASSERT(subkernel && subkernel->has_features());
00144 
00145   float64_t k11,k12,k21,k22;
00146   int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1];
00147 
00148   k11 = subkernel->kernel(idx_a1,idx_b1);
00149   k12 = subkernel->kernel(idx_a1,idx_b2);
00150   k21 = subkernel->kernel(idx_a2,idx_b1);
00151   k22 = subkernel->kernel(idx_a2,idx_b2);
00152 
00153   float64_t result = k11+k22-k21-k12;
00154 
00155   ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
00156   ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00157 
00158   return result;
00159 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation