MultitaskKernelTreeNormalizer.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___
00012 #define _MULTITASKKERNELTREENORMALIZER_H___
00013 
00014 #include <shogun/kernel/KernelNormalizer.h>
00015 #include <shogun/kernel/MultitaskKernelMklNormalizer.h>
00016 #include <shogun/kernel/Kernel.h>
00017 #include <algorithm>
00018 #include <map>
00019 #include <set>
00020 #include <deque>
00021 
00022 namespace shogun
00023 {
00024 
00029 class CNode: public CSGObject
00030 {
00031 public:
00034     CNode()
00035     {
00036         parent = NULL;
00037         beta = 1.0;
00038         node_id = 0;
00039     }
00040 
00044     std::set<CNode*> get_path_root()
00045     {
00046         std::set<CNode*> nodes_on_path = std::set<CNode*>();
00047         CNode *node = this;
00048         while (node != NULL) {
00049             nodes_on_path.insert(node);
00050             node = node->parent;
00051         }
00052         return nodes_on_path;
00053     }
00054 
00058     std::vector<int32_t> get_task_ids_below()
00059     {
00060 
00061         std::vector<int32_t> task_ids;
00062         std::deque<CNode*> grey_nodes;
00063         grey_nodes.push_back(this);
00064 
00065         while(grey_nodes.size() > 0)
00066         {
00067 
00068             CNode *current_node = grey_nodes.front();
00069             grey_nodes.pop_front();
00070 
00071             for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){
00072                 grey_nodes.push_back(current_node->children[i]);
00073             }
00074 
00075             if(current_node->is_leaf()){
00076                 task_ids.push_back(current_node->getNode_id());
00077             }
00078         }
00079 
00080         return task_ids;
00081     }
00082 
00086     void add_child(CNode *node)
00087     {
00088         node->parent = this;
00089         this->children.push_back(node);
00090     }
00091 
00093     inline virtual const char *get_name() const
00094     {
00095         return "CNode";
00096     }
00097 
00099     bool is_leaf()
00100     {
00101         return children.empty();
00102 
00103     }
00104 
00106     int32_t getNode_id() const
00107     {
00108         return node_id;
00109     }
00110 
00112     void setNode_id(int32_t node_idx)
00113     {
00114         this->node_id = node_idx;
00115     }
00116 
00118     float64_t beta;
00119 
00120 protected:
00121 
00123     CNode* parent;
00124 
00126     std::vector<CNode*> children;
00127 
00129     int32_t node_id;
00130 
00131 };
00132 
00133 
00138 class CTaxonomy : public CSGObject
00139 {
00140 
00141 public:
00142 
00145     CTaxonomy() : CSGObject()
00146     {
00147         root = new CNode();
00148         nodes.push_back(root);
00149 
00150         name2id = std::map<std::string, int32_t>();
00151         name2id["root"] = 0;
00152     }
00153 
00158     CNode* get_node(int32_t task_id) {
00159         return nodes[task_id];
00160     }
00161 
00165     void set_root_beta(float64_t beta)
00166     {
00167         nodes[0]->beta = beta;
00168     }
00169 
00175     CNode* add_node(std::string parent_name, std::string child_name, float64_t beta)
00176     {
00177         if (child_name=="") SG_ERROR("child_name empty");
00178         if (parent_name=="") SG_ERROR("parent_name empty");
00179 
00180 
00181         CNode* child_node = new CNode();
00182 
00183         child_node->beta = beta;
00184 
00185         nodes.push_back(child_node);
00186         int32_t id = nodes.size()-1;
00187 
00188         name2id[child_name] = id;
00189 
00190         child_node->setNode_id(id);
00191 
00192 
00193         //create edge
00194         CNode* parent = nodes[name2id[parent_name]];
00195 
00196         parent->add_child(child_node);
00197 
00198         return child_node;
00199     }
00200 
00205     int32_t get_id(std::string name) {
00206         return name2id[name];
00207     }
00208 
00214     std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs)
00215     {
00216 
00217         std::set<CNode*> root_path_lhs = node_lhs->get_path_root();
00218         std::set<CNode*> root_path_rhs = node_rhs->get_path_root();
00219 
00220         std::set<CNode*> intersection;
00221 
00222         std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(),
00223                               root_path_rhs.begin(), root_path_rhs.end(),
00224                               std::inserter(intersection, intersection.end()));
00225 
00226         return intersection;
00227 
00228     }
00229 
00235     float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
00236     {
00237 
00238         CNode* node_lhs = get_node(task_lhs);
00239         CNode* node_rhs = get_node(task_rhs);
00240 
00241         // compute intersection of paths to root
00242         std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs);
00243 
00244         // sum up weights
00245         float64_t gamma = 0;
00246         for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) {
00247 
00248             gamma += (*p)->beta;
00249         }
00250 
00251         return gamma;
00252 
00253     }
00254 
00258     void update_task_histogram(std::vector<int32_t> task_vector_lhs) {
00259 
00260         //empty map
00261         task_histogram.clear();
00262 
00263 
00264         //fill map with zeros
00265         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00266         {
00267             task_histogram[*it] = 0.0;
00268         }
00269 
00270         //fill map
00271         for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00272         {
00273             task_histogram[*it] += 1.0;
00274         }
00275 
00276         //compute fractions
00277         for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++)
00278         {
00279             task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size());
00280         }
00281 
00282     }
00283 
00285     int32_t get_num_nodes()
00286     {
00287         return (int32_t)(nodes.size());
00288     }
00289 
00291     int32_t get_num_leaves()
00292     {
00293         int32_t num_leaves = 0;
00294 
00295         for (int32_t i=0; i!=get_num_nodes(); i++)
00296         {
00297             if (get_node(i)->is_leaf()==true)
00298             {
00299                 num_leaves++;
00300             }
00301         }
00302 
00303         return num_leaves;
00304     }
00305 
00307     float64_t get_node_weight(int32_t idx)
00308     {
00309         CNode* node = get_node(idx);
00310         return node->beta;
00311     }
00312 
00317     void set_node_weight(int32_t idx, float64_t weight)
00318     {
00319         CNode* node = get_node(idx);
00320         node->beta = weight;
00321     }
00322 
00324     inline virtual const char* get_name() const
00325     {
00326         return "CTaxonomy";
00327     }
00328 
00330     std::map<std::string, int32_t> get_name2id() {
00331         return name2id;
00332     }
00333 
00339     int32_t get_id_by_name(std::string name)
00340     {
00341         return name2id[name];
00342     }
00343 
00344 
00345 protected:
00346 
00348     CNode* root;
00350     std::map<std::string, int32_t> name2id;
00352     std::vector<CNode*> nodes;
00354     std::map<int32_t, float64_t> task_histogram;
00355 
00356 };
00357 
00358 
00359 
00360 
00361 class CMultitaskKernelMklNormalizer;
00362 
00366 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer
00367 {
00368 
00369 
00370 
00371 public:
00372 
00375     CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer()
00376     {
00377     }
00378 
00385     CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs,
00386                                    std::vector<std::string> task_rhs,
00387                                    CTaxonomy tax) : CMultitaskKernelMklNormalizer()
00388     {
00389 
00390         taxonomy = tax;
00391         set_task_vector_lhs(task_lhs);
00392         set_task_vector_rhs(task_rhs);
00393 
00394         num_nodes = taxonomy.get_num_nodes();
00395 
00396         dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes);
00397 
00398         update_cache();
00399     }
00400 
00401 
00403     virtual ~CMultitaskKernelTreeNormalizer()
00404     {
00405     }
00406 
00407 
00409     void update_cache()
00410     {
00411 
00412 
00413         for (int32_t i=0; i!=num_nodes; i++)
00414         {
00415             for (int32_t j=0; j!=num_nodes; j++)
00416             {
00417 
00418                 float64_t similarity = taxonomy.compute_node_similarity(i, j);
00419                 set_node_similarity(i,j,similarity);
00420 
00421             }
00422 
00423         }
00424     }
00425 
00426 
00427 
00433     inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
00434     {
00435         //lookup tasks
00436         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00437         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00438 
00439         //lookup similarity
00440         float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs);
00441         //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs);
00442 
00443         //take task similarity into account
00444         float64_t similarity = (value/scale) * task_similarity;
00445 
00446 
00447         return similarity;
00448     }
00449 
00454     inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
00455     {
00456         SG_ERROR("normalize_lhs not implemented");
00457         return 0;
00458     }
00459 
00464     inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
00465     {
00466         SG_ERROR("normalize_rhs not implemented");
00467         return 0;
00468     }
00469 
00470 
00472     void set_task_vector_lhs(std::vector<std::string> vec)
00473     {
00474 
00475         task_vector_lhs.clear();
00476 
00477         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00478         {
00479             task_vector_lhs.push_back(taxonomy.get_id(vec[i]));
00480         }
00481 
00482         //update task histogram
00483         taxonomy.update_task_histogram(task_vector_lhs);
00484 
00485     }
00486 
00488     void set_task_vector_rhs(std::vector<std::string> vec)
00489     {
00490 
00491         task_vector_rhs.clear();
00492 
00493         for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00494         {
00495             task_vector_rhs.push_back(taxonomy.get_id(vec[i]));
00496         }
00497 
00498     }
00499 
00501     void set_task_vector(std::vector<std::string> vec)
00502     {
00503         set_task_vector_lhs(vec);
00504         set_task_vector_rhs(vec);
00505     }
00506 
00508     int32_t get_num_betas()
00509     {
00510 
00511         return taxonomy.get_num_nodes();
00512 
00513     }
00514 
00518     float64_t get_beta(int32_t idx)
00519     {
00520 
00521         return taxonomy.get_node_weight(idx);
00522 
00523     }
00524 
00528     void set_beta(int32_t idx, float64_t weight)
00529     {
00530 
00531         taxonomy.set_node_weight(idx, weight);
00532 
00533         update_cache();
00534 
00535     }
00536 
00537 
00543     float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
00544     {
00545 
00546         ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00547         ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00548 
00549         return dependency_matrix[node_lhs * num_nodes + node_rhs];
00550 
00551     }
00552 
00558     void set_node_similarity(int32_t node_lhs, int32_t node_rhs,
00559             float64_t similarity)
00560     {
00561 
00562         ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00563         ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00564 
00565         dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity;
00566 
00567     }
00568 
00570     inline virtual const char* get_name() const
00571     {
00572         return "MultitaskKernelTreeNormalizer";
00573     }
00574 
00578     CMultitaskKernelTreeNormalizer* KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer* n)
00579     {
00580         return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n);
00581     }
00582 
00583 protected:
00585     CTaxonomy taxonomy;
00586 
00588     int32_t num_nodes;
00589 
00591     std::vector<int32_t> task_vector_lhs;
00592 
00594     std::vector<int32_t> task_vector_rhs;
00595 
00597     std::vector<float64_t> dependency_matrix;
00598 };
00599 }
00600 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation