MultitaskKernelPlifNormalizer.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___
00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___
00013 
00014 #include <shogun/kernel/KernelNormalizer.h>
00015 #include <shogun/kernel/MultitaskKernelMklNormalizer.h>
00016 #include <shogun/kernel/Kernel.h>
00017 #include <algorithm>
00018 
00019 
00020 
00021 namespace shogun
00022 {
00026 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer
00027 {
00028 
00029 public:
00031     CMultitaskKernelPlifNormalizer() : CMultitaskKernelMklNormalizer()
00032     {
00033         num_tasks = 0;
00034         num_tasksqr = 0;
00035         num_betas = 0;
00036     }
00037 
00040     CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector)
00041         : CMultitaskKernelMklNormalizer()
00042     {
00043 
00044         num_betas = static_cast<int>(support_.size());
00045 
00046         support = support_;
00047 
00048         // init support points values with constant function
00049         betas = std::vector<float64_t>(num_betas);
00050         for (int i=0; i!=num_betas; i++)
00051         {
00052             betas[i] = 1;
00053         }
00054 
00055         num_tasks = get_num_unique_tasks(task_vector);
00056         num_tasksqr = num_tasks * num_tasks;
00057 
00058         // set both sides equally
00059         set_task_vector(task_vector);
00060 
00061         // init distance matrix
00062         distance_matrix = std::vector<float64_t>(num_tasksqr);
00063 
00064         // init similarity matrix
00065         similarity_matrix = std::vector<float64_t>(num_tasksqr);
00066 
00067     }
00068 
00069 
00075     inline virtual float64_t normalize(float64_t value, int32_t idx_lhs,
00076             int32_t idx_rhs)
00077     {
00078 
00079         //lookup tasks
00080         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00081         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00082 
00083         //lookup similarity
00084         float64_t task_similarity = get_task_similarity(task_idx_lhs,
00085                 task_idx_rhs);
00086 
00087         //take task similarity into account
00088         float64_t similarity = (value/scale) * task_similarity;
00089 
00090 
00091         return similarity;
00092 
00093     }
00094 
00100     int32_t get_num_unique_tasks(std::vector<int32_t> vec) {
00101 
00102         //sort
00103         std::sort(vec.begin(), vec.end());
00104 
00105         //reorder tasks with unique prefix
00106         std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end());
00107 
00108         //count unique tasks
00109         int32_t num_vec = std::distance(vec.begin(), endLocation);
00110 
00111         return num_vec;
00112 
00113     }
00114 
00115 
00117     virtual ~CMultitaskKernelPlifNormalizer()
00118     {
00119     }
00120 
00121 
00123     void update_cache()
00124     {
00125 
00126 
00127         for (int32_t i=0; i!=num_tasks; i++)
00128         {
00129             for (int32_t j=0; j!=num_tasks; j++)
00130             {
00131 
00132                 float64_t similarity = compute_task_similarity(i, j);
00133                 set_task_similarity(i,j,similarity);
00134 
00135             }
00136 
00137         }
00138     }
00139 
00140 
00142     float64_t compute_task_similarity(int32_t task_a, int32_t task_b)
00143     {
00144 
00145         float64_t distance = get_task_distance(task_a, task_b);
00146         float64_t similarity = -1;
00147 
00148         int32_t upper_bound_idx = -1;
00149 
00150 
00151         // determine interval
00152         for (int i=1; i!=num_betas; i++)
00153         {
00154             if (distance <= support[i])
00155             {
00156                 upper_bound_idx = i;
00157                 break;
00158             }
00159         }
00160 
00161         // perform interpolation (constant for beyond upper bound)
00162         if (upper_bound_idx == -1)
00163         {
00164 
00165             similarity = betas[num_betas-1];
00166 
00167         } else {
00168 
00169             int32_t lower_bound_idx = upper_bound_idx - 1;
00170             float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx];
00171 
00172             float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size;
00173             float64_t factor_upper = 1 - factor_lower;
00174 
00175             similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx];
00176 
00177         }
00178 
00179         return similarity;
00180 
00181     }
00182 
00183 
00184 public:
00185 
00187     virtual std::vector<int32_t> get_task_vector_lhs() const
00188     {
00189         return task_vector_lhs;
00190     }
00191 
00193     virtual void set_task_vector_lhs(std::vector<int32_t> vec)
00194     {
00195         task_vector_lhs = vec;
00196     }
00197 
00199     virtual std::vector<int32_t> get_task_vector_rhs() const
00200     {
00201         return task_vector_rhs;
00202     }
00203 
00205     virtual void set_task_vector_rhs(std::vector<int32_t> vec)
00206     {
00207         task_vector_rhs = vec;
00208     }
00209 
00211     virtual void set_task_vector(std::vector<int32_t> vec)
00212     {
00213         task_vector_lhs = vec;
00214         task_vector_rhs = vec;
00215     }
00216 
00222     float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs)
00223     {
00224 
00225         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00226         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00227 
00228         return distance_matrix[task_lhs * num_tasks + task_rhs];
00229 
00230     }
00231 
00237     void set_task_distance(int32_t task_lhs, int32_t task_rhs,
00238             float64_t distance)
00239     {
00240 
00241         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00242         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00243 
00244         distance_matrix[task_lhs * num_tasks + task_rhs] = distance;
00245 
00246     }
00247 
00253     float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs)
00254     {
00255 
00256         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00257         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00258 
00259         return similarity_matrix[task_lhs * num_tasks + task_rhs];
00260 
00261     }
00262 
00268     void set_task_similarity(int32_t task_lhs, int32_t task_rhs,
00269             float64_t similarity)
00270     {
00271 
00272         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00273         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00274 
00275         similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity;
00276 
00277     }
00278 
00282     float64_t get_beta(int32_t idx)
00283     {
00284 
00285         return betas[idx];
00286 
00287     }
00288 
00293     void set_beta(int32_t idx, float64_t weight)
00294     {
00295 
00296         betas[idx] = weight;
00297 
00298         update_cache();
00299 
00300     }
00301 
00305     int32_t get_num_betas()
00306     {
00307 
00308         return num_betas;
00309 
00310     }
00311 
00312 
00314     inline virtual const char* get_name() const
00315     {
00316         return "MultitaskKernelPlifNormalizer";
00317     }
00318 
00319 protected:
00322     virtual void register_params()
00323     {
00324         m_parameters->add(&num_tasks, "num_tasks", "the number of tasks");
00325         m_parameters->add(&num_betas, "num_betas", "the number of weights");
00326         m_parameters->add_vector((SGString<float64_t>**)&distance_matrix, &num_tasksqr, "distance_matrix", "distance between tasks");
00327         m_parameters->add_vector((SGString<float64_t>**)&similarity_matrix, &num_tasksqr, "similarity_matrix", "similarity between tasks");
00328         m_parameters->add_vector((SGString<float64_t>**)&betas, &num_betas, "num_betas", "weights");
00329         m_parameters->add_vector((SGString<float64_t>**)&support, &num_betas, "support", "support points");
00330     }
00331 
00333     int32_t num_tasks;
00335     int32_t num_tasksqr;
00336 
00338     std::vector<int32_t> task_vector_lhs;
00339 
00341     std::vector<int32_t> task_vector_rhs;
00342 
00344     std::vector<float64_t> distance_matrix;
00345 
00347     std::vector<float64_t> similarity_matrix;
00348 
00350     int32_t num_betas;
00351 
00353     std::vector<float64_t> betas;
00354 
00356     std::vector<float64_t> support;
00357 
00358 };
00359 }
00360 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation