SHOGUN: MultitaskKernelNormalizer.h Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Christian Widmer
00008  * Copyright (C) 2009 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELNORMALIZER_H___
00012 #define _MULTITASKKERNELNORMALIZER_H___
00013 
00014 #include "kernel/KernelNormalizer.h"
00015 #include "kernel/Kernel.h"
00016 #include <algorithm>
00017 
00018 
00019 
00020 namespace shogun
00021 {
00031 class CMultitaskKernelNormalizer: public CKernelNormalizer
00032 {
00033 
00034 public:
00035 
00038     CMultitaskKernelNormalizer() : CKernelNormalizer(), scale(1.0)
00039     {
00040     }
00041 
00046     CMultitaskKernelNormalizer(std::vector<int32_t> task_vector)
00047         : CKernelNormalizer(), scale(1.0)
00048     {
00049 
00050         num_tasks = get_num_unique_tasks(task_vector);
00051 
00052         // set both sides equally
00053         set_task_vector(task_vector);
00054 
00055         // init similarity matrix
00056         similarity_matrix = std::vector<float64_t>(num_tasks * num_tasks);
00057 
00058     }
00059 
00061     virtual ~CMultitaskKernelNormalizer()
00062     {
00063     }
00064 
00067     virtual bool init(CKernel* k)
00068     {
00069 
00070         //same as first-element normalizer
00071         CFeatures* old_lhs=k->lhs;
00072         CFeatures* old_rhs=k->rhs;
00073         k->lhs=old_lhs;
00074         k->rhs=old_lhs;
00075 
00076         if (strcmp(k->get_name(), "WeightedDegree") == 0) {
00077             SG_INFO("using first-element normalization\n");
00078             scale=k->compute(0, 0);
00079         } else {
00080             SG_INFO("no inner normalization for non-WDK kernel\n");
00081             scale=1.0;
00082         }
00083 
00084         k->lhs=old_lhs;
00085         k->rhs=old_rhs;
00086 
00087         ASSERT(k);
00088         int32_t num_lhs = k->get_num_vec_lhs();
00089         int32_t num_rhs = k->get_num_vec_rhs();
00090         ASSERT(num_lhs>0);
00091         ASSERT(num_rhs>0);
00092 
00093         //std::cout << "scale: " << scale << std::endl;
00094 
00095         return true;
00096     }
00097 
00103     int32_t get_num_unique_tasks(std::vector<int32_t> vec) {
00104 
00105         //sort
00106         std::sort(vec.begin(), vec.end());
00107 
00108         //reorder tasks with unique prefix
00109         std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end());
00110 
00111         //count unique tasks
00112         int32_t num_vec = std::distance(vec.begin(), endLocation);
00113 
00114         return num_vec;
00115 
00116     }
00117 
00123     inline virtual float64_t normalize(float64_t value, int32_t idx_lhs,
00124             int32_t idx_rhs)
00125     {
00126 
00127         //lookup tasks
00128         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00129         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00130 
00131         //lookup similarity
00132         float64_t task_similarity = get_task_similarity(task_idx_lhs,
00133                 task_idx_rhs);
00134 
00135         //take task similarity into account
00136         float64_t similarity = (value/scale) * task_similarity;
00137 
00138 
00139         return similarity;
00140 
00141     }
00142 
00147     inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
00148     {
00149         SG_ERROR("normalize_lhs not implemented");
00150         return 0;
00151     }
00152 
00157     inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
00158     {
00159         SG_ERROR("normalize_rhs not implemented");
00160         return 0;
00161     }
00162 
00163 public:
00164 
00166     std::vector<int32_t> get_task_vector_lhs() const
00167     {
00168         return task_vector_lhs;
00169     }
00170 
00172     void set_task_vector_lhs(std::vector<int32_t> vec)
00173     {
00174         task_vector_lhs = vec;
00175     }
00176 
00178     std::vector<int32_t> get_task_vector_rhs() const
00179     {
00180         return task_vector_rhs;
00181     }
00182 
00184     void set_task_vector_rhs(std::vector<int32_t> vec)
00185     {
00186         task_vector_rhs = vec;
00187     }
00188 
00190     void set_task_vector(std::vector<int32_t> vec)
00191     {
00192         task_vector_lhs = vec;
00193         task_vector_rhs = vec;
00194     }
00195 
00201     float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs)
00202     {
00203 
00204         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00205         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00206 
00207         return similarity_matrix[task_lhs * num_tasks + task_rhs];
00208 
00209     }
00210 
00216     void set_task_similarity(int32_t task_lhs, int32_t task_rhs,
00217             float64_t similarity)
00218     {
00219 
00220         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00221         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00222 
00223         similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity;
00224 
00225     }
00226 
00228     inline virtual const char* get_name() const
00229     {
00230         return "MultitaskKernelNormalizer";
00231     }
00232 
00233 protected:
00234 
00236     std::vector<float64_t> similarity_matrix;
00237 
00239     int32_t num_tasks;
00240 
00242     std::vector<int32_t> task_vector_lhs;
00243 
00245     std::vector<int32_t> task_vector_rhs;
00246 
00248     float64_t scale;
00249 
00250 };
00251 }
00252 #endif