SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
MultitaskLinearMachine.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Sergey Lisitsyn
8  */
9 
13 
14 #include <map>
15 #include <vector>
16 
17 using namespace std;
18 
19 namespace shogun
20 {
21 
22 CMultitaskLinearMachine::CMultitaskLinearMachine() :
23  CLinearMachine(), m_current_task(0),
24  m_task_relation(NULL)
25 {
26  register_parameters();
27 }
28 
30  CDotFeatures* train_features,
31  CLabels* train_labels, CTaskRelation* task_relation) :
32  CLinearMachine(), m_current_task(0), m_task_relation(NULL)
33 {
34  set_features(train_features);
35  set_labels(train_labels);
36  set_task_relation(task_relation);
37  register_parameters();
38 }
39 
41 {
43 }
44 
45 void CMultitaskLinearMachine::register_parameters()
46 {
47  SG_ADD((CSGObject**)&m_task_relation, "task_relation", "task relation", MS_NOT_AVAILABLE);
48 }
49 
51 {
52  return m_current_task;
53 }
54 
56 {
57  ASSERT(task>=0)
59  m_current_task = task;
60 }
61 
63 {
65  return m_task_relation;
66 }
67 
69 {
70  SG_REF(task_relation);
72  m_task_relation = task_relation;
73 }
74 
76 {
78  return false;
79 }
80 
82 {
83  set_features((CDotFeatures*)features_);
84  int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
85  SGVector<index_t>* tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
86 
87  m_tasks_indices.clear();
88  for (int32_t i=0; i<n_tasks; i++)
89  {
90  set<index_t> indices_set;
91  SGVector<index_t> task_indices = tasks_indices[i];
92  for (int32_t j=0; j<task_indices.vlen; j++)
93  indices_set.insert(task_indices[j]);
94 
95  m_tasks_indices.push_back(indices_set);
96  }
97 
98  SG_FREE(tasks_indices);
99 }
100 
102 {
103  int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
104  ASSERT((int)m_tasks_indices.size()==n_tasks)
105  vector< vector<index_t> > cutted_task_indices;
106  for (int32_t i=0; i<n_tasks; i++)
107  cutted_task_indices.push_back(vector<index_t>());
108  for (int32_t i=0; i<indices.vlen; i++)
109  {
110  for (int32_t j=0; j<n_tasks; j++)
111  {
112  if (m_tasks_indices[j].count(indices[i]))
113  {
114  cutted_task_indices[j].push_back(indices[i]);
115  break;
116  }
117  }
118  }
119  SGVector<index_t>* tasks = SG_MALLOC(SGVector<index_t>, n_tasks);
120  for (int32_t i=0; i<n_tasks; i++)
121  {
122  tasks[i]=SGVector<index_t>(cutted_task_indices[i].size());
123  for (int32_t j=0; j<(int)cutted_task_indices[i].size(); j++)
124  tasks[i][j] = cutted_task_indices[i][j];
125  //tasks[i].display_vector();
126  }
127  bool res = train_locked_implementation(tasks);
128  SG_FREE(tasks);
129  return res;
130 }
131 
133 {
135  return false;
136 }
137 
139 {
140  int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
141  SGVector<float64_t> result(indices.vlen);
142  result.zero();
143  for (int32_t i=0; i<indices.vlen; i++)
144  {
145  for (int32_t j=0; j<n_tasks; j++)
146  {
147  if (m_tasks_indices[j].count(indices[i]))
148  {
149  set_current_task(j);
150  result[i] = apply_one(indices[i]);
151  break;
152  }
153  }
154  }
155  return new CBinaryLabels(result);
156 }
157 
159 {
161  return 0.0;
162 }
163 
165 {
166  if (data)
167  {
168  if (!data->has_property(FP_DOT))
169  SG_ERROR("Specified features are not of type CDotFeatures\n")
170 
171  set_features((CDotFeatures*) data);
172  }
173 
174  if (!features)
175  return SGVector<float64_t>();
176 
177  int32_t num=features->get_num_vectors();
178  ASSERT(num>0)
179  float64_t* out=SG_MALLOC(float64_t, num);
180  for (int32_t i=0; i<num; i++)
181  out[i] = apply_one(i);
182 
183  return SGVector<float64_t>(out,num);
184 }
185 
187 {
189  for (int32_t i=0; i<w_.vlen; i++)
190  w_[i] = m_tasks_w(i,m_current_task);
191  return w_;
192 }
193 
195 {
196  for (int32_t i=0; i<m_tasks_w.num_rows; i++)
197  m_tasks_w(i,m_current_task) = src_w[i];
198 }
199 
201 {
203 }
204 
206 {
207  return m_tasks_c[m_current_task];
208 }
209 
211 {
212  int n_tasks = ((CTaskGroup*)m_task_relation)->get_num_tasks();
213  SGVector<index_t>* tasks_indices = ((CTaskGroup*)m_task_relation)->get_tasks_indices();
214 
216  map<index_t,index_t> subset_inv_map = map<index_t,index_t>();
217  for (int32_t i=0; i<sstack->get_size(); i++)
218  subset_inv_map[sstack->subset_idx_conversion(i)] = i;
219 
220  SG_UNREF(sstack);
221  sstack=NULL;
222 
223  SGVector<index_t>* subset_tasks_indices = SG_MALLOC(SGVector<index_t>, n_tasks);
224  for (int32_t i=0; i<n_tasks; i++)
225  {
226  SGVector<index_t> task = tasks_indices[i];
227  //task.display_vector("task");
228  vector<index_t> cutted = vector<index_t>();
229  for (int32_t j=0; j<task.vlen; j++)
230  {
231  if (subset_inv_map.count(task[j]))
232  cutted.push_back(subset_inv_map[task[j]]);
233  }
234  SGVector<index_t> cutted_task(cutted.size());
235  for (int32_t j=0; j<cutted_task.vlen; j++)
236  cutted_task[j] = cutted[j];
237  //cutted_task.display_vector("cutted");
238  subset_tasks_indices[i] = cutted_task;
239  }
240  SG_FREE(tasks_indices);
241 
242  return subset_tasks_indices;
243 }
244 
245 
246 }
virtual SGVector< float64_t > apply_get_outputs(CFeatures *data=NULL)
index_t get_size() const
Definition: SubsetStack.h:80
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
CTaskRelation * get_task_relation() const
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_w() const
class TaskGroup used to represent a group of tasks. Tasks in group do not overlap.
Definition: TaskGroup.h:28
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
virtual CBinaryLabels * apply_locked_binary(SGVector< index_t > indices)
class to add subset support to another class. A CSubsetStackStack instance should be added and wrappe...
Definition: SubsetStack.h:37
void set_task_relation(CTaskRelation *task_relation)
index_t vlen
Definition: SGVector.h:494
virtual CSubsetStack * get_subset_stack()
Definition: Features.cpp:334
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
virtual void set_w(const SGVector< float64_t > src_w)
vector< set< index_t > > m_tasks_indices
virtual bool train_locked(SGVector< index_t > indices)
double float64_t
Definition: common.h:50
virtual void set_features(CDotFeatures *feat)
index_t subset_idx_conversion(index_t idx) const
Definition: SubsetStack.h:105
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
virtual void post_lock(CLabels *labels, CFeatures *features_)
CDotFeatures * features
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
used to represent tasks in multitask learning
Definition: TaskRelation.h:31
#define SG_ADD(...)
Definition: SGObject.h:81
virtual bool train_machine(CFeatures *data=NULL)
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
virtual float64_t apply_one(int32_t i)
virtual bool train_locked_implementation(SGVector< index_t > *tasks)
SGVector< index_t > * get_subset_tasks_indices()

SHOGUN 机器学习工具包 - 项目文档