MultitaskKernelTreeNormalizer.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___
00012 #define _MULTITASKKERNELTREENORMALIZER_H___
00013 
00014 #include "kernel/KernelNormalizer.h"
00015 #include "kernel/MultitaskKernelMklNormalizer.h"
00016 #include "kernel/Kernel.h"
00017 #include <algorithm>
00018 #include <map>
00019 #include <set>
00020 #include <deque>
00021 
00022 namespace shogun
00023 {
00024 
00029 class CNode: public CSGObject
00030 {
00031 
00032 public:
00033 
00034 
00037     CNode()
00038     {
00039         parent = NULL;
00040         beta = 1.0;
00041         node_id = 0;
00042     }
00043 
00047     std::set<CNode*> get_path_root()
00048     {
00049         std::set<CNode*> nodes_on_path = std::set<CNode*>();
00050         CNode *node = this;
00051         while (node != NULL) {
00052             nodes_on_path.insert(node);
00053             node = node->parent;
00054         }
00055         return nodes_on_path;
00056     }
00057 
00061     std::vector<int32_t> get_task_ids_below()
00062     {
00063 
00064         std::vector<int32_t> task_ids;
00065         std::deque<CNode*> grey_nodes;
00066         grey_nodes.push_back(this);
00067 
00068         while(grey_nodes.size() > 0)
00069         {
00070 
00071             CNode *current_node = grey_nodes.front();
00072             grey_nodes.pop_front();
00073 
00074             for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){
00075                 grey_nodes.push_back(current_node->children[i]);
00076             }
00077 
00078             if(current_node->is_leaf()){
00079                 task_ids.push_back(current_node->getNode_id());
00080             }
00081         }
00082 
00083         return task_ids;
00084     }
00085 
00089     void add_child(CNode *node)
00090     {
00091         node->parent = this;
00092         this->children.push_back(node);
00093     }
00094 
00096     inline virtual const char *get_name() const
00097     {
00098         return "CNode";
00099     }
00100 
00102     bool is_leaf()
00103     {
00104         return children.empty();
00105 
00106     }
00107 
00109     int32_t getNode_id() const
00110     {
00111         return node_id;
00112     }
00113 
00115     void setNode_id(int32_t node_idx)
00116     {
00117         this->node_id = node_idx;
00118     }
00119 
00121     float64_t beta;
00122 
00123 protected:
00124 
00126     CNode* parent;
00127 
00129     std::vector<CNode*> children;
00130 
00132     int32_t node_id;
00133 
00134 };
00135 
00136 
00141 class CTaxonomy : public CSGObject
00142 {
00143 
00144 public:
00145 
00148     CTaxonomy() : CSGObject()
00149     {
00150         root = new CNode();
00151         nodes.push_back(root);
00152 
00153         name2id = std::map<std::string, int32_t>();
00154         name2id["root"] = 0;
00155     }
00156 
00161     CNode* get_node(int32_t task_id) {
00162         return nodes[task_id];
00163     }
00164 
00168     void set_root_beta(float64_t beta)
00169     {
00170         nodes[0]->beta = beta;
00171     }
00172 
00178     CNode* add_node(std::string parent_name, std::string child_name, float64_t beta) {
00179 
00180 
00181         if (child_name=="") SG_ERROR("child_name empty");
00182         if (parent_name=="") SG_ERROR("parent_name empty");
00183 
00184 
00185         CNode* child_node = new CNode();
00186 
00187         child_node->beta = beta;
00188 
00189         nodes.push_back(child_node);
00190         int32_t id = nodes.size()-1;
00191 
00192         name2id[child_name] = id;
00193 
00194         child_node->setNode_id(id);
00195 
00196 
00197         //create edge
00198         CNode* parent = nodes[name2id[parent_name]];
00199 
00200         parent->add_child(child_node);
00201 
00202         return child_node;
00203 
00204     }
00205 
00210     int32_t get_id(std::string name) {
00211         return name2id[name];
00212     }
00213 
00219     std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs) {
00220 
00221         std::set<CNode*> root_path_lhs = node_lhs->get_path_root();
00222         std::set<CNode*> root_path_rhs = node_rhs->get_path_root();
00223 
00224         std::set<CNode*> intersection;
00225 
00226         std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(),
00227                               root_path_rhs.begin(), root_path_rhs.end(),
00228                               std::inserter(intersection, intersection.end()));
00229 
00230         return intersection;
00231 
00232     }
00233 
00239     float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
00240     {
00241 
00242         CNode* node_lhs = get_node(task_lhs);
00243         CNode* node_rhs = get_node(task_rhs);
00244 
00245         // compute intersection of paths to root
00246         std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs);
00247 
00248         // sum up weights
00249         float64_t gamma = 0;
00250         for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) {
00251 
00252             gamma += (*p)->beta;
00253         }
00254 
00255         return gamma;
00256 
00257     }
00258 
00262     void update_task_histogram(std::vector<int32_t> task_vector_lhs) {
00263 
00264         //empty map
00265         task_histogram.clear();
00266 
00267 
00268         //fill map with zeros
00269         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00270         {
00271             task_histogram[*it] = 0.0;
00272         }
00273 
00274         //fill map
00275         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00276         {
00277             task_histogram[*it] += 1.0;
00278         }
00279 
00280         //compute fractions
00281         for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++)
00282         {
00283             task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size());
00284         }
00285 
00286     }
00287 
00289     int32_t get_num_nodes()
00290     {
00291         return (int32_t)(nodes.size());
00292     }
00293 
00295     int32_t get_num_leaves()
00296     {
00297         int32_t num_leaves = 0;
00298 
00299         for (int32_t i=0; i!=get_num_nodes(); i++)
00300         {
00301             if (get_node(i)->is_leaf()==true)
00302             {
00303                 num_leaves++;
00304             }
00305         }
00306 
00307         return num_leaves;
00308     }
00309 
00311     float64_t get_node_weight(int32_t idx)
00312     {
00313         CNode* node = get_node(idx);
00314         return node->beta;
00315     }
00316 
00321     void set_node_weight(int32_t idx, float64_t weight)
00322     {
00323         CNode* node = get_node(idx);
00324         node->beta = weight;
00325     }
00326 
00328     inline virtual const char* get_name() const
00329     {
00330         return "CTaxonomy";
00331     }
00332 
00334     std::map<std::string, int32_t> get_name2id() {
00335         return name2id;
00336     }
00337 
00343     int32_t get_id_by_name(std::string name)
00344     {
00345         return name2id[name];
00346     }
00347 
00348 
00349 protected:
00350 
00351     CNode* root;
00352     std::map<std::string, int32_t> name2id;
00353     std::vector<CNode*> nodes;
00354     std::map<int32_t, float64_t> task_histogram;
00355 
00356 };
00357 
00358 
00359 
00360 
00361 class CMultitaskKernelMklNormalizer;
00362 
00366 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer
00367 {
00368 
00369 
00370 
00371 public:
00372 
00375     CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer()
00376     {
00377     }
00378 
00385     CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs,
00386                                    std::vector<std::string> task_rhs,
00387                                    CTaxonomy tax) : CMultitaskKernelMklNormalizer()
00388     {
00389 
00390         taxonomy = tax;
00391         set_task_vector_lhs(task_lhs);
00392         set_task_vector_rhs(task_rhs);
00393 
00394         num_nodes = taxonomy.get_num_nodes();
00395 
00396         dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes);
00397 
00398         update_cache();
00399     }
00400 
00401 
00403     virtual ~CMultitaskKernelTreeNormalizer()
00404     {
00405     }
00406 
00407 
00409     void update_cache()
00410     {
00411 
00412 
00413         for (int32_t i=0; i!=num_nodes; i++)
00414         {
00415             for (int32_t j=0; j!=num_nodes; j++)
00416             {
00417 
00418                 float64_t similarity = taxonomy.compute_node_similarity(i, j);
00419                 set_node_similarity(i,j,similarity);
00420 
00421             }
00422 
00423         }
00424     }
00425 
00426 
00427 
00433     inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
00434     {
00435 
00436         //lookup tasks
00437         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00438         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00439 
00440         //lookup similarity
00441         float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs);
00442         //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs);
00443 
00444         //take task similarity into account
00445         float64_t similarity = (value/scale) * task_similarity;
00446 
00447 
00448         return similarity;
00449 
00450     }
00451 
00456     inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
00457     {
00458         SG_ERROR("normalize_lhs not implemented");
00459         return 0;
00460     }
00461 
00466     inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
00467     {
00468         SG_ERROR("normalize_rhs not implemented");
00469         return 0;
00470     }
00471 
00472 
00474     void set_task_vector_lhs(std::vector<std::string> vec)
00475     {
00476 
00477         task_vector_lhs.clear();
00478 
00479         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00480         {
00481             task_vector_lhs.push_back(taxonomy.get_id(vec[i]));
00482         }
00483 
00484         //update task histogram
00485         taxonomy.update_task_histogram(task_vector_lhs);
00486 
00487     }
00488 
00490     void set_task_vector_rhs(std::vector<std::string> vec)
00491     {
00492 
00493         task_vector_rhs.clear();
00494 
00495         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00496         {
00497             task_vector_rhs.push_back(taxonomy.get_id(vec[i]));
00498         }
00499 
00500     }
00501 
00503     void set_task_vector(std::vector<std::string> vec)
00504     {
00505         set_task_vector_lhs(vec);
00506         set_task_vector_rhs(vec);
00507     }
00508 
00510     int32_t get_num_betas()
00511     {
00512 
00513         return taxonomy.get_num_nodes();
00514 
00515     }
00516 
00520     float64_t get_beta(int32_t idx)
00521     {
00522 
00523         return taxonomy.get_node_weight(idx);
00524 
00525     }
00526 
00530     void set_beta(int32_t idx, float64_t weight)
00531     {
00532 
00533         taxonomy.set_node_weight(idx, weight);
00534 
00535         update_cache();
00536 
00537     }
00538 
00539 
00545     float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
00546     {
00547 
00548         ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00549         ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00550 
00551         return dependency_matrix[node_lhs * num_nodes + node_rhs];
00552 
00553     }
00554 
00560     void set_node_similarity(int32_t node_lhs, int32_t node_rhs,
00561             float64_t similarity)
00562     {
00563 
00564         ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00565         ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00566 
00567         dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity;
00568 
00569     }
00570 
00571 
00573     inline virtual const char* get_name() const
00574     {
00575         return "MultitaskKernelTreeNormalizer";
00576     }
00577 
00578 
00579 
00580 protected:
00581 
00582 
00584     CTaxonomy taxonomy;
00585 
00587     int32_t num_nodes;
00588 
00590     std::vector<int32_t> task_vector_lhs;
00591 
00593     std::vector<int32_t> task_vector_rhs;
00594 
00596     std::vector<float64_t> dependency_matrix;
00597 
00598 };
00599 }
00600 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation