Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___
00012 #define _MULTITASKKERNELTREENORMALIZER_H___
00013
00014 #include <shogun/kernel/KernelNormalizer.h>
00015 #include <shogun/kernel/MultitaskKernelMklNormalizer.h>
00016 #include <shogun/kernel/Kernel.h>
00017 #include <algorithm>
00018 #include <map>
00019 #include <set>
00020 #include <deque>
00021
00022 namespace shogun
00023 {
00024
00029 class CNode: public CSGObject
00030 {
00031 public:
00034 CNode()
00035 {
00036 parent = NULL;
00037 beta = 1.0;
00038 node_id = 0;
00039 }
00040
00044 std::set<CNode*> get_path_root()
00045 {
00046 std::set<CNode*> nodes_on_path = std::set<CNode*>();
00047 CNode *node = this;
00048 while (node != NULL) {
00049 nodes_on_path.insert(node);
00050 node = node->parent;
00051 }
00052 return nodes_on_path;
00053 }
00054
00058 std::vector<int32_t> get_task_ids_below()
00059 {
00060
00061 std::vector<int32_t> task_ids;
00062 std::deque<CNode*> grey_nodes;
00063 grey_nodes.push_back(this);
00064
00065 while(grey_nodes.size() > 0)
00066 {
00067
00068 CNode *current_node = grey_nodes.front();
00069 grey_nodes.pop_front();
00070
00071 for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){
00072 grey_nodes.push_back(current_node->children[i]);
00073 }
00074
00075 if(current_node->is_leaf()){
00076 task_ids.push_back(current_node->getNode_id());
00077 }
00078 }
00079
00080 return task_ids;
00081 }
00082
00086 void add_child(CNode *node)
00087 {
00088 node->parent = this;
00089 this->children.push_back(node);
00090 }
00091
00093 inline virtual const char *get_name() const
00094 {
00095 return "CNode";
00096 }
00097
00099 bool is_leaf()
00100 {
00101 return children.empty();
00102
00103 }
00104
00106 int32_t getNode_id() const
00107 {
00108 return node_id;
00109 }
00110
00112 void setNode_id(int32_t node_idx)
00113 {
00114 this->node_id = node_idx;
00115 }
00116
00118 float64_t beta;
00119
00120 protected:
00121
00123 CNode* parent;
00124
00126 std::vector<CNode*> children;
00127
00129 int32_t node_id;
00130
00131 };
00132
00133
00138 class CTaxonomy : public CSGObject
00139 {
00140
00141 public:
00142
00145 CTaxonomy() : CSGObject()
00146 {
00147 root = new CNode();
00148 nodes.push_back(root);
00149
00150 name2id = std::map<std::string, int32_t>();
00151 name2id["root"] = 0;
00152 }
00153
00158 CNode* get_node(int32_t task_id) {
00159 return nodes[task_id];
00160 }
00161
00165 void set_root_beta(float64_t beta)
00166 {
00167 nodes[0]->beta = beta;
00168 }
00169
00175 CNode* add_node(std::string parent_name, std::string child_name, float64_t beta)
00176 {
00177 if (child_name=="") SG_ERROR("child_name empty");
00178 if (parent_name=="") SG_ERROR("parent_name empty");
00179
00180
00181 CNode* child_node = new CNode();
00182
00183 child_node->beta = beta;
00184
00185 nodes.push_back(child_node);
00186 int32_t id = nodes.size()-1;
00187
00188 name2id[child_name] = id;
00189
00190 child_node->setNode_id(id);
00191
00192
00193
00194 CNode* parent = nodes[name2id[parent_name]];
00195
00196 parent->add_child(child_node);
00197
00198 return child_node;
00199 }
00200
00205 int32_t get_id(std::string name) {
00206 return name2id[name];
00207 }
00208
00214 std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs)
00215 {
00216
00217 std::set<CNode*> root_path_lhs = node_lhs->get_path_root();
00218 std::set<CNode*> root_path_rhs = node_rhs->get_path_root();
00219
00220 std::set<CNode*> intersection;
00221
00222 std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(),
00223 root_path_rhs.begin(), root_path_rhs.end(),
00224 std::inserter(intersection, intersection.end()));
00225
00226 return intersection;
00227
00228 }
00229
00235 float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
00236 {
00237
00238 CNode* node_lhs = get_node(task_lhs);
00239 CNode* node_rhs = get_node(task_rhs);
00240
00241
00242 std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs);
00243
00244
00245 float64_t gamma = 0;
00246 for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) {
00247
00248 gamma += (*p)->beta;
00249 }
00250
00251 return gamma;
00252
00253 }
00254
00258 void update_task_histogram(std::vector<int32_t> task_vector_lhs) {
00259
00260
00261 task_histogram.clear();
00262
00263
00264
00265 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00266 {
00267 task_histogram[*it] = 0.0;
00268 }
00269
00270
00271 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
00272 {
00273 task_histogram[*it] += 1.0;
00274 }
00275
00276
00277 for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++)
00278 {
00279 task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size());
00280 }
00281
00282 }
00283
00285 int32_t get_num_nodes()
00286 {
00287 return (int32_t)(nodes.size());
00288 }
00289
00291 int32_t get_num_leaves()
00292 {
00293 int32_t num_leaves = 0;
00294
00295 for (int32_t i=0; i!=get_num_nodes(); i++)
00296 {
00297 if (get_node(i)->is_leaf()==true)
00298 {
00299 num_leaves++;
00300 }
00301 }
00302
00303 return num_leaves;
00304 }
00305
00307 float64_t get_node_weight(int32_t idx)
00308 {
00309 CNode* node = get_node(idx);
00310 return node->beta;
00311 }
00312
00317 void set_node_weight(int32_t idx, float64_t weight)
00318 {
00319 CNode* node = get_node(idx);
00320 node->beta = weight;
00321 }
00322
00324 inline virtual const char* get_name() const
00325 {
00326 return "CTaxonomy";
00327 }
00328
00330 std::map<std::string, int32_t> get_name2id() {
00331 return name2id;
00332 }
00333
00339 int32_t get_id_by_name(std::string name)
00340 {
00341 return name2id[name];
00342 }
00343
00344
00345 protected:
00346
00348 CNode* root;
00350 std::map<std::string, int32_t> name2id;
00352 std::vector<CNode*> nodes;
00354 std::map<int32_t, float64_t> task_histogram;
00355
00356 };
00357
00358
00359
00360
00361 class CMultitaskKernelMklNormalizer;
00362
00366 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer
00367 {
00368
00369
00370
00371 public:
00372
00375 CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer()
00376 {
00377 }
00378
00385 CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs,
00386 std::vector<std::string> task_rhs,
00387 CTaxonomy tax) : CMultitaskKernelMklNormalizer()
00388 {
00389
00390 taxonomy = tax;
00391 set_task_vector_lhs(task_lhs);
00392 set_task_vector_rhs(task_rhs);
00393
00394 num_nodes = taxonomy.get_num_nodes();
00395
00396 dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes);
00397
00398 update_cache();
00399 }
00400
00401
00403 virtual ~CMultitaskKernelTreeNormalizer()
00404 {
00405 }
00406
00407
00409 void update_cache()
00410 {
00411
00412
00413 for (int32_t i=0; i!=num_nodes; i++)
00414 {
00415 for (int32_t j=0; j!=num_nodes; j++)
00416 {
00417
00418 float64_t similarity = taxonomy.compute_node_similarity(i, j);
00419 set_node_similarity(i,j,similarity);
00420
00421 }
00422
00423 }
00424 }
00425
00426
00427
00433 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
00434 {
00435
00436 int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00437 int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00438
00439
00440 float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs);
00441
00442
00443
00444 float64_t similarity = (value/scale) * task_similarity;
00445
00446
00447 return similarity;
00448 }
00449
00454 inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
00455 {
00456 SG_ERROR("normalize_lhs not implemented");
00457 return 0;
00458 }
00459
00464 inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
00465 {
00466 SG_ERROR("normalize_rhs not implemented");
00467 return 0;
00468 }
00469
00470
00472 void set_task_vector_lhs(std::vector<std::string> vec)
00473 {
00474
00475 task_vector_lhs.clear();
00476
00477 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00478 {
00479 task_vector_lhs.push_back(taxonomy.get_id(vec[i]));
00480 }
00481
00482
00483 taxonomy.update_task_histogram(task_vector_lhs);
00484
00485 }
00486
00488 void set_task_vector_rhs(std::vector<std::string> vec)
00489 {
00490
00491 task_vector_rhs.clear();
00492
00493 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
00494 {
00495 task_vector_rhs.push_back(taxonomy.get_id(vec[i]));
00496 }
00497
00498 }
00499
00501 void set_task_vector(std::vector<std::string> vec)
00502 {
00503 set_task_vector_lhs(vec);
00504 set_task_vector_rhs(vec);
00505 }
00506
00508 int32_t get_num_betas()
00509 {
00510
00511 return taxonomy.get_num_nodes();
00512
00513 }
00514
00518 float64_t get_beta(int32_t idx)
00519 {
00520
00521 return taxonomy.get_node_weight(idx);
00522
00523 }
00524
00528 void set_beta(int32_t idx, float64_t weight)
00529 {
00530
00531 taxonomy.set_node_weight(idx, weight);
00532
00533 update_cache();
00534
00535 }
00536
00537
00543 float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
00544 {
00545
00546 ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00547 ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00548
00549 return dependency_matrix[node_lhs * num_nodes + node_rhs];
00550
00551 }
00552
00558 void set_node_similarity(int32_t node_lhs, int32_t node_rhs,
00559 float64_t similarity)
00560 {
00561
00562 ASSERT(node_lhs < num_nodes && node_lhs >= 0);
00563 ASSERT(node_rhs < num_nodes && node_rhs >= 0);
00564
00565 dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity;
00566
00567 }
00568
00570 inline virtual const char* get_name() const
00571 {
00572 return "MultitaskKernelTreeNormalizer";
00573 }
00574
00578 CMultitaskKernelTreeNormalizer* KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer* n)
00579 {
00580 return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n);
00581 }
00582
00583 protected:
00585 CTaxonomy taxonomy;
00586
00588 int32_t num_nodes;
00589
00591 std::vector<int32_t> task_vector_lhs;
00592
00594 std::vector<int32_t> task_vector_rhs;
00595
00597 std::vector<float64_t> dependency_matrix;
00598 };
00599 }
00600 #endif