AUCKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2009 Soeren Sonnnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/Mathematics.h"
00014 #include "kernel/AUCKernel.h"
00015 #include "features/SimpleFeatures.h"
00016 #include "lib/io.h"
00017 
00018 using namespace shogun;
00019 
00020 void
00021 CAUCKernel::init(void)
00022 {
00023     m_parameters->add((CSGObject**) &subkernel, "subkernel",
00024                       "The subkernel.");
00025 }
00026 
00027 CAUCKernel::CAUCKernel(void)
00028 : CDotKernel(0), subkernel(NULL)
00029 {
00030     init();
00031 }
00032 
00033 CAUCKernel::CAUCKernel(int32_t size, CKernel* s)
00034 : CDotKernel(size), subkernel(s)
00035 {
00036     init();
00037     SG_REF(subkernel);
00038 }
00039 
00040 CAUCKernel::~CAUCKernel()
00041 {
00042     SG_UNREF(subkernel);
00043     cleanup();
00044 }
00045 
00046 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels)
00047 {
00048     SG_INFO( "setting up AUC maximization\n") ;
00049     ASSERT(labels);
00050     ASSERT(labels->is_two_class_labeling());
00051 
00052     // get the original labels
00053     int32_t num=0;
00054     ASSERT(labels);
00055     int32_t* int_labels=labels->get_int_labels(num);
00056     ASSERT(subkernel->get_num_vec_rhs()==num);
00057 
00058     // count positive and negative
00059     int32_t num_pos=0;
00060     int32_t num_neg=0;
00061 
00062     for (int32_t i=0; i<num; i++)
00063     {
00064         if (int_labels[i]==1)
00065             num_pos++;
00066         else 
00067             num_neg++;
00068     }
00069 
00070     // create AUC features and labels (alternate labels)
00071     int32_t num_auc = num_pos*num_neg;
00072     SG_INFO("num_pos: %i  num_neg: %i  num_auc: %i\n", num_pos, num_neg, num_auc);
00073 
00074     uint16_t* features_auc = new uint16_t[num_auc*2];
00075     int32_t* labels_auc = new int32_t[num_auc];
00076     int32_t n=0 ;
00077 
00078     for (int32_t i=0; i<num; i++)
00079     {
00080         if (int_labels[i]!=1)
00081             continue;
00082 
00083         for (int32_t j=0; j<num; j++)
00084         {
00085             if (int_labels[j]!=-1)
00086                 continue;
00087 
00088             // create about as many positively as negatively labeled examples
00089             if (n%2==0)
00090             {
00091                 features_auc[n*2]=i;
00092                 features_auc[n*2+1]=j;
00093                 labels_auc[n]=1;
00094             }
00095             else
00096             {
00097                 features_auc[n*2]=j;
00098                 features_auc[n*2+1]=i;
00099                 labels_auc[n]=-1;
00100             }
00101 
00102             n++;
00103             ASSERT(n<=num_auc);
00104         }
00105     }
00106 
00107     // create label object and attach it to svm
00108     CLabels* lab_auc = new CLabels(num_auc);
00109     lab_auc->set_int_labels(labels_auc, num_auc);
00110     SG_REF(lab_auc);
00111 
00112     // create feature object
00113     CSimpleFeatures<uint16_t>* f = new CSimpleFeatures<uint16_t>(0);
00114     f->set_feature_matrix(features_auc, 2, num_auc);
00115 
00116     // create AUC kernel and attach the features
00117     init(f,f);
00118 
00119     delete[] int_labels;
00120     delete[] labels_auc;
00121 
00122     return lab_auc;
00123 }
00124 
00125 
00126 bool CAUCKernel::init(CFeatures* l, CFeatures* r)
00127 {
00128     CDotKernel::init(l, r);
00129     init_normalizer();
00130     return true;
00131 }
00132 
00133 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b)
00134 {
00135   int32_t alen, blen;
00136   bool afree, bfree;
00137 
00138   uint16_t* avec=((CSimpleFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree);
00139   uint16_t* bvec=((CSimpleFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree);
00140 
00141   ASSERT(alen==2);
00142   ASSERT(blen==2);
00143 
00144   ASSERT(subkernel && subkernel->has_features());
00145 
00146   float64_t k11,k12,k21,k22;
00147   int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1];
00148 
00149   k11 = subkernel->kernel(idx_a1,idx_b1);
00150   k12 = subkernel->kernel(idx_a1,idx_b2);
00151   k21 = subkernel->kernel(idx_a2,idx_b1);
00152   k22 = subkernel->kernel(idx_a2,idx_b2);
00153 
00154   float64_t result = k11+k22-k21-k12;
00155 
00156   ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
00157   ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00158 
00159   return result;
00160 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation