SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
MultilabelModel.cpp
Go to the documentation of this file.
1 /*
2  * This software is distributed under BSD 3-clause license (see LICENSE file).
3  *
4  * Copyright(C) 2014 Abinash Panda
5  * Written (W) 2014 Abinash Panda
6  */
7 
12 
13 using namespace shogun;
14 
17 {
18  init();
19 }
20 
22  : CStructuredModel(features, labels)
23 {
24  init();
25 }
26 
28 {
29 }
30 
32 {
33  return new CMultilabelSOLabels(num_labels, m_num_classes);
34 }
35 
36 void CMultilabelModel::init()
37 {
38  SG_ADD(&m_false_positive, "false_positive", "Misclassification cost for false positive",
40  SG_ADD(&m_false_negative, "false_negative", "Misclassification cost for false negative",
42  SG_ADD(&m_num_classes, "num_classes", "Number of (binary) class assignment per label",
44  m_false_positive = 1;
45  m_false_negative = 1;
46  m_num_classes = 0;
47 }
48 
50 {
51  int32_t num_classes = ((CMultilabelSOLabels *)m_labels)->get_num_classes();
52  int32_t feats_dim = ((CDotFeatures *)m_features)->get_dim_feature_space();
53 
54  return feats_dim * num_classes;
55 }
56 
57 void CMultilabelModel::set_misclass_cost(float64_t false_positive, float64_t false_negative)
58 {
59  m_false_positive = false_positive;
60  m_false_negative = false_negative;
61 }
62 
64  CStructuredData * y)
65 {
67  psi.zero();
68 
70  get_computed_dot_feature_vector(feat_idx);
72  ASSERT(slabel != NULL);
73  SGVector<int32_t> slabel_data = slabel->get_data();
74 
75  for (index_t i = 0; i < slabel_data.vlen; i++)
76  {
77  for (index_t j = 0, k = slabel_data[i] * x.vlen; j < x.vlen; j++, k++)
78  {
79  psi[k] = x[j];
80  }
81  }
82 
83  return psi;
84 }
85 
87 {
90 
91  ASSERT(y1_slabel != NULL);
92  ASSERT(y2_slabel != NULL);
93 
95  return delta_loss(
97  multi_labels->get_num_classes(), 1, 0),
99  multi_labels->get_num_classes(), 1, 0));
100 }
101 
103 {
104  REQUIRE(y1.vlen == y2.vlen, "Size of both the vectors should be same\n");
105 
106  float64_t loss = 0;
107 
108  for (index_t i = 0; i < y1.vlen; i++)
109  {
110  loss += delta_loss(y1[i], y2[i]);
111  }
112 
113  return loss;
114 }
115 
117 {
118  return y1 > y2 ? m_false_negative : y1 < y2 ? m_false_positive : 0;
119 }
120 
121 SGVector<int32_t> CMultilabelModel::to_sparse(SGVector<float64_t> dense_vec,
122  float64_t d_true, float64_t d_false)
123 {
124  int32_t size = 0;
125 
126  for (index_t i = 0; i < dense_vec.vlen; i++)
127  {
128  REQUIRE(dense_vec[i] == d_true || dense_vec[i] == d_false,
129  "The values of dense vector should be either (%d) or (%d).\n",
130  d_true, d_false);
131 
132  if (dense_vec[i] == d_true)
133  {
134  size++;
135  }
136  }
137 
138  SGVector<int32_t> sparse_vec(size);
139  index_t j = 0;
140 
141  for (index_t i = 0; i < dense_vec.vlen; i++)
142  {
143  if (dense_vec[i] == d_true)
144  {
145  sparse_vec[j] = i;
146  j++;
147  }
148  }
149 
150  return sparse_vec;
151 }
152 
154  bool const training)
155 {
156  CDotFeatures * dot_feats = (CDotFeatures *)m_features;
157  int32_t feats_dim = dot_feats->get_dim_feature_space();
158 
160 
161  if (training)
162  {
163  m_num_classes = multi_labs->get_num_classes();
164  }
165  else
166  {
167  REQUIRE(m_num_classes > 0, "The model needs to be trained before using "
168  "it for prediction\n");
169  }
170 
171  int32_t dim = get_dim();
172  ASSERT(dim == w.vlen);
173 
174  float64_t score = 0, total_score = 0;
175  SGVector<float64_t> y_pred_dense(m_num_classes);
176  y_pred_dense.zero();
177 
178  for (int32_t c = 0; c < m_num_classes; c++)
179  {
180  score = dot_feats->dense_dot(feat_idx, w.vector + c * feats_dim, feats_dim);
181 
182  if (score > 0)
183  {
184  y_pred_dense[c] = 1;
185  total_score += score;
186  }
187 
188  }
189 
190  SGVector<int32_t> y_pred_sparse = to_sparse(y_pred_dense, 1, 0);
191 
192  CResultSet * ret = new CResultSet();
193  SG_REF(ret);
194  ret->psi_computed = true;
195 
196  CSparseMultilabel * y_pred = new CSparseMultilabel(y_pred_sparse);
197  SG_REF(y_pred);
198 
199  ret->psi_pred = get_joint_feature_vector(feat_idx, y_pred);
200  ret->score = total_score;
201  ret->argmax = y_pred;
202 
203  if (training)
204  {
205  ret->delta = CStructuredModel::delta_loss(feat_idx, y_pred);
207  feat_idx, feat_idx);
208  ret->score += (ret->delta - CMath::dot(w.vector,
209  ret->psi_truth.vector, dim));
210  }
211 
212  return ret;
213 }
214 
216  float64_t regularization,
224 {
226 }
227 
SGVector< float64_t > psi_truth
Base class of the labels used in Structured Output (SO) problems.
int32_t index_t
Definition: common.h:62
Class CMultilabelSOLabels used in the application of Structured Output (SO) learning to Multilabel Cl...
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual void init_primal_opt(float64_t regularization, SGMatrix< float64_t > &A, SGVector< float64_t > a, SGMatrix< float64_t > B, SGVector< float64_t > &b, SGVector< float64_t > &lb, SGVector< float64_t > &ub, SGMatrix< float64_t > &C)
#define REQUIRE(x,...)
Definition: SGIO.h:206
SGVector< float64_t > get_joint_feature_vector(int32_t feat_idx, int32_t lab_idx)
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
virtual int32_t get_dim_feature_space() const =0
virtual float64_t delta_loss(CStructuredData *y1, CStructuredData *y2)
virtual SGVector< float64_t > get_joint_feature_vector(int32_t feat_idx, CStructuredData *y)
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
virtual int32_t get_dim() const
float64_t delta_loss(int32_t ytrue_idx, CStructuredData *ypred)
Class CSparseMultilabel to be used in the application of Structured Output (SO) learning to Multilabe...
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
Class CStructuredModel that represents the application specific model and contains most of the applic...
CStructuredLabels * m_labels
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
static SGVector< float64_t > to_dense(CStructuredData *label, int32_t dense_dim, float64_t d_true, float64_t d_false)
virtual void set_misclass_cost(float64_t false_positive, float64_t false_negative)
CStructuredData * argmax
SGVector< float64_t > psi_pred
#define SG_ADD(...)
Definition: SGObject.h:81
virtual int32_t get_num_classes() const
static SGMatrix< T > create_identity_matrix(index_t size, T scale)
Base class of the components of StructuredLabels.
virtual CResultSet * argmax(SGVector< float64_t > w, int32_t feat_idx, bool const training=true)
static CSparseMultilabel * obtain_from_generic(CStructuredData *base_data)
SGVector< int32_t > get_data() const
virtual CStructuredLabels * structured_labels_factory(int32_t num_labels=0)

SHOGUN Machine Learning Toolbox - Documentation