MultitaskKernelPlifNormalizer.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___
00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___
00013 
00014 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h>
00015 #include <shogun/kernel/Kernel.h>
00016 #include <algorithm>
00017 #include <vector>
00018 
00019 
00020 namespace shogun
00021 {
00025 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer
00026 {
00027 
00028 public:
00030     CMultitaskKernelPlifNormalizer() : CMultitaskKernelMklNormalizer()
00031     {
00032         num_tasks = 0;
00033         num_tasksqr = 0;
00034         num_betas = 0;
00035     }
00036 
00039     CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector)
00040         : CMultitaskKernelMklNormalizer()
00041     {
00042 
00043         num_betas = static_cast<int>(support_.size());
00044 
00045         support = support_;
00046 
00047         // init support points values with constant function
00048         betas = std::vector<float64_t>(num_betas);
00049         for (int i=0; i!=num_betas; i++)
00050         {
00051             betas[i] = 1;
00052         }
00053 
00054         num_tasks = get_num_unique_tasks(task_vector);
00055         num_tasksqr = num_tasks * num_tasks;
00056 
00057         // set both sides equally
00058         set_task_vector(task_vector);
00059 
00060         // init distance matrix
00061         distance_matrix = std::vector<float64_t>(num_tasksqr);
00062 
00063         // init similarity matrix
00064         similarity_matrix = std::vector<float64_t>(num_tasksqr);
00065 
00066     }
00067 
00068 
00074     virtual float64_t normalize(float64_t value, int32_t idx_lhs,
00075             int32_t idx_rhs)
00076     {
00077 
00078         //lookup tasks
00079         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00080         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00081 
00082         //lookup similarity
00083         float64_t task_similarity = get_task_similarity(task_idx_lhs,
00084                 task_idx_rhs);
00085 
00086         //take task similarity into account
00087         float64_t similarity = (value/scale) * task_similarity;
00088 
00089 
00090         return similarity;
00091 
00092     }
00093 
00099     int32_t get_num_unique_tasks(std::vector<int32_t> vec) {
00100 
00101         //sort
00102         std::sort(vec.begin(), vec.end());
00103 
00104         //reorder tasks with unique prefix
00105         std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end());
00106 
00107         //count unique tasks
00108         int32_t num_vec = std::distance(vec.begin(), endLocation);
00109 
00110         return num_vec;
00111 
00112     }
00113 
00114 
00116     virtual ~CMultitaskKernelPlifNormalizer()
00117     {
00118     }
00119 
00120 
00122     void update_cache()
00123     {
00124 
00125 
00126         for (int32_t i=0; i!=num_tasks; i++)
00127         {
00128             for (int32_t j=0; j!=num_tasks; j++)
00129             {
00130 
00131                 float64_t similarity = compute_task_similarity(i, j);
00132                 set_task_similarity(i,j,similarity);
00133 
00134             }
00135 
00136         }
00137     }
00138 
00139 
00141     float64_t compute_task_similarity(int32_t task_a, int32_t task_b)
00142     {
00143 
00144         float64_t distance = get_task_distance(task_a, task_b);
00145         float64_t similarity = -1;
00146 
00147         int32_t upper_bound_idx = -1;
00148 
00149 
00150         // determine interval
00151         for (int i=1; i!=num_betas; i++)
00152         {
00153             if (distance <= support[i])
00154             {
00155                 upper_bound_idx = i;
00156                 break;
00157             }
00158         }
00159 
00160         // perform interpolation (constant for beyond upper bound)
00161         if (upper_bound_idx == -1)
00162         {
00163 
00164             similarity = betas[num_betas-1];
00165 
00166         } else {
00167 
00168             int32_t lower_bound_idx = upper_bound_idx - 1;
00169             float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx];
00170 
00171             float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size;
00172             float64_t factor_upper = 1 - factor_lower;
00173 
00174             similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx];
00175 
00176         }
00177 
00178         return similarity;
00179 
00180     }
00181 
00182 
00183 public:
00184 
00186     virtual std::vector<int32_t> get_task_vector_lhs() const
00187     {
00188         return task_vector_lhs;
00189     }
00190 
00192     virtual void set_task_vector_lhs(std::vector<int32_t> vec)
00193     {
00194         task_vector_lhs = vec;
00195     }
00196 
00198     virtual std::vector<int32_t> get_task_vector_rhs() const
00199     {
00200         return task_vector_rhs;
00201     }
00202 
00204     virtual void set_task_vector_rhs(std::vector<int32_t> vec)
00205     {
00206         task_vector_rhs = vec;
00207     }
00208 
00210     virtual void set_task_vector(std::vector<int32_t> vec)
00211     {
00212         task_vector_lhs = vec;
00213         task_vector_rhs = vec;
00214     }
00215 
00221     float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs)
00222     {
00223 
00224         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00225         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00226 
00227         return distance_matrix[task_lhs * num_tasks + task_rhs];
00228 
00229     }
00230 
00236     void set_task_distance(int32_t task_lhs, int32_t task_rhs,
00237             float64_t distance)
00238     {
00239 
00240         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00241         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00242 
00243         distance_matrix[task_lhs * num_tasks + task_rhs] = distance;
00244 
00245     }
00246 
00252     float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs)
00253     {
00254 
00255         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00256         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00257 
00258         return similarity_matrix[task_lhs * num_tasks + task_rhs];
00259 
00260     }
00261 
00267     void set_task_similarity(int32_t task_lhs, int32_t task_rhs,
00268             float64_t similarity)
00269     {
00270 
00271         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00272         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00273 
00274         similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity;
00275 
00276     }
00277 
00281     float64_t get_beta(int32_t idx)
00282     {
00283 
00284         return betas[idx];
00285 
00286     }
00287 
00292     void set_beta(int32_t idx, float64_t weight)
00293     {
00294 
00295         betas[idx] = weight;
00296 
00297         update_cache();
00298 
00299     }
00300 
00304     int32_t get_num_betas()
00305     {
00306 
00307         return num_betas;
00308 
00309     }
00310 
00311 
00313     virtual const char* get_name() const
00314     {
00315         return "MultitaskKernelPlifNormalizer";
00316     }
00317 
00321     CMultitaskKernelPlifNormalizer* KernelNormalizerToMultitaskKernelPlifNormalizer(CKernelNormalizer* n)
00322     {
00323            return dynamic_cast<shogun::CMultitaskKernelPlifNormalizer*>(n);
00324     }
00325 
00326 protected:
00329     virtual void register_params()
00330     {
00331         SG_ADD(&num_tasks, "num_tasks", "the number of tasks", MS_NOT_AVAILABLE);
00332         SG_ADD(&num_betas, "num_betas", "the number of weights", MS_NOT_AVAILABLE);
00333         m_parameters->add_vector((SGString<float64_t>**)&distance_matrix, &num_tasksqr, "distance_matrix", "distance between tasks");
00334         m_parameters->add_vector((SGString<float64_t>**)&similarity_matrix, &num_tasksqr, "similarity_matrix", "similarity between tasks");
00335         m_parameters->add_vector((SGString<float64_t>**)&betas, &num_betas, "num_betas", "weights");
00336         m_parameters->add_vector((SGString<float64_t>**)&support, &num_betas, "support", "support points");
00337     }
00338 
00340     int32_t num_tasks;
00342     int32_t num_tasksqr;
00343 
00345     std::vector<int32_t> task_vector_lhs;
00346 
00348     std::vector<int32_t> task_vector_rhs;
00349 
00351     std::vector<float64_t> distance_matrix;
00352 
00354     std::vector<float64_t> similarity_matrix;
00355 
00357     int32_t num_betas;
00358 
00360     std::vector<float64_t> betas;
00361 
00363     std::vector<float64_t> support;
00364 
00365 };
00366 }
00367 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation