SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
MultilabelLabels.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Zuse-Institute-Berlin (ZIB)
3  * Copyright (C) 2013-2014 Thoralf Klein
4  * Written (W) 2013-2014 Thoralf Klein
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
33 #include <shogun/io/SGIO.h> // for REQUIRE, SG_PRINT, etc
34 
35 using namespace shogun;
36 
38  : CLabels()
39 {
40  init(0, 1);
41 }
42 
43 
45  : CLabels()
46 {
47  init(0, num_classes);
48 }
49 
50 
51 CMultilabelLabels::CMultilabelLabels(int32_t num_labels, int32_t num_classes)
52  : CLabels()
53 {
54  init(num_labels, num_classes);
55 }
56 
57 
59 {
60  delete[] m_labels;
61 }
62 
63 
64 void
65 CMultilabelLabels::init(int32_t num_labels, int32_t num_classes)
66 {
67  REQUIRE(num_labels >= 0, "num_labels=%d should be >= 0", num_labels);
68  REQUIRE(num_classes > 0, "num_classes=%d should be > 0", num_classes);
69 
70  // This one does consider the contained labels, so its simply BROKEN
71  // Can be disabled as
72  SG_ADD(&m_num_labels, "m_num_labels", "number of labels", MS_NOT_AVAILABLE);
73  SG_ADD(&m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
74  // SG_ADD((CSGObject**) &m_labels, "m_labels", "The labels", MS_NOT_AVAILABLE);
75 
76 
77  // Can only be enabled after this issue has been solved:
78  // https://github.com/shogun-toolbox/shogun/issues/1972
79 /* this->m_parameters->add(&m_num_labels, "m_num_labels",
80  "Number of labels.");
81  this->m_parameters->add(&m_num_classes, "m_num_classes",
82  "Number of classes.");
83  this->m_parameters->add_vector(&m_labels, &m_num_labels, "labels_array",
84  "The label vectors for all (num_labels) outputs.");
85 */
86 
87  m_num_labels = num_labels;
88  m_num_classes = num_classes;
90 }
91 
92 
93 void
94 CMultilabelLabels::ensure_valid(const char * context)
95 {
96  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
97  {
98  if (sg_io->get_loglevel() == MSG_DEBUG && !CMath::is_sorted(m_labels[label_j]))
99  {
100  SG_PRINT("m_labels[label_j=%d] not sorted: ", label_j);
101  m_labels[label_j].display_vector("");
102  }
103 
105  "labels[%d] are not sorted!", label_j);
106 
107  int32_t c_len = m_labels[label_j].vlen;
108  if (c_len <= 0)
109  {
110  continue;
111  }
112 
113  REQUIRE(m_labels[label_j].vector[0] >= 0,
114  "first label labels[%d]=%d should be >= 0!",
115  label_j, m_labels[label_j].vector[0]);
116  REQUIRE(m_labels[label_j].vector[c_len - 1] < get_num_classes(),
117  "last label labels[%d]=%d should be < num_classes == %d!",
118  label_j, m_labels[label_j].vector[0], get_num_classes());
119  }
120 }
121 
122 
123 int32_t
125 {
126  return m_num_labels;
127 }
128 
129 
130 int32_t
132 {
133  return m_num_classes;
134 }
135 
136 
137 void
139 {
140  for (int32_t label_j = 0; label_j < m_num_labels; label_j++)
141  {
142  m_labels[label_j] = labels[label_j];
143  }
144  ensure_valid("set_labels()");
145 }
146 
147 
149 {
150  SGVector <int32_t> ** labels_list =
151  SG_MALLOC(SGVector <int32_t> *, get_num_classes());
152  int32_t * num_label_idx =
153  SG_MALLOC(int32_t, get_num_classes());
154 
155  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
156  {
157  num_label_idx[class_i] = 0;
158  }
159 
160  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
161  {
162  for (int32_t c_pos = 0; c_pos < m_labels[label_j].vlen; c_pos++)
163  {
164  int32_t class_i = m_labels[label_j][c_pos];
165  REQUIRE(class_i < get_num_classes(),
166  "class_i exceeded number of classes");
167  num_label_idx[class_i]++;
168  }
169  }
170 
171  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
172  {
173  labels_list[class_i] =
174  new SGVector <int32_t> (num_label_idx[class_i]);
175  }
176  SG_FREE(num_label_idx);
177 
178  int32_t * next_label_idx = SG_MALLOC(int32_t, get_num_classes());
179  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
180  {
181  next_label_idx[class_i] = 0;
182  }
183 
184  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
185  {
186  for (int32_t c_pos = 0; c_pos < m_labels[label_j].vlen; c_pos++)
187  {
188  // get class_i of current position
189  int32_t class_i = m_labels[label_j][c_pos];
190  REQUIRE(class_i < get_num_classes(),
191  "class_i exceeded number of classes");
192  // next free element in m_classes[class_i]:
193  int32_t l_pos = next_label_idx[class_i];
194  REQUIRE(l_pos < labels_list[class_i]->size(),
195  "l_pos exceeded length of label list");
196  next_label_idx[class_i]++;
197  // finally, story label_j into class-column
198  (*labels_list[class_i])[l_pos] = label_j;
199  }
200  }
201 
202  SG_FREE(next_label_idx);
203  return labels_list;
204 }
205 
207 {
208  if (m_num_labels==0)
209  return SGMatrix<int32_t>();
210 
211  int32_t n_outputs = m_labels[0].vlen;
212  SGMatrix<int32_t> labels(m_num_labels, n_outputs);
213 
214  for (int32_t i=0; i<m_num_labels; i++)
215  {
216  REQUIRE(m_labels[i].vlen==n_outputs,
217  "This function is valid only for multiclass multiple output lables.");
218 
219  for (int32_t j=0; j<n_outputs; j++)
220  labels(i,j) = m_labels[i][j];
221  }
222  return labels;
223 }
224 
226 {
227  REQUIRE(j < get_num_labels(),
228  "label index j=%d should be within [%d,%d[",
229  j, 0, get_num_labels());
230  return m_labels[j];
231 }
232 
233 
234 template <class S, class D>
236 (SGVector <S> * sparse, int32_t dense_len, D d_true, D d_false)
237 {
238  SGVector <D> dense(dense_len);
239  dense.set_const(d_false);
240  for (int32_t i = 0; i < sparse->vlen; i++)
241  {
242  S index = (*sparse)[i];
243  REQUIRE(index < dense_len,
244  "class index exceeded length of dense vector");
245  dense[index] = d_true;
246  }
247  return dense;
248 }
249 
250 
251 template
252 SGVector <int32_t> CMultilabelLabels::to_dense <int32_t, int32_t>
253 (SGVector <int32_t> *, int32_t, int32_t, int32_t);
254 
255 template
256 SGVector <float64_t> CMultilabelLabels::to_dense <int32_t, float64_t>
257 (SGVector <int32_t> *, int32_t, float64_t, float64_t);
258 
259 void
260 CMultilabelLabels::set_label(int32_t j, SGVector <int32_t> label)
261 {
262  REQUIRE(j < get_num_labels(),
263  "label index j=%d should be within [%d,%d[",
264  j, 0, get_num_labels());
265  m_labels[j] = label;
266 }
267 
268 
269 void
270 CMultilabelLabels::set_class_labels(SGVector <int32_t> ** labels_list)
271 {
272  int32_t * num_class_idx = SG_MALLOC(int32_t , get_num_labels());
273  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
274  {
275  num_class_idx[label_j] = 0;
276  }
277 
278  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
279  {
280  for (int32_t l_pos = 0; l_pos < labels_list[class_i]->vlen; l_pos++)
281  {
282  int32_t label_j = (*labels_list[class_i])[l_pos];
283  REQUIRE(label_j < get_num_labels(),
284  "class_i=%d/%d :: label_j=%d/%d (l_pos=%d)\n",
285  class_i, get_num_classes(), label_j, get_num_labels(),
286  l_pos);
287  num_class_idx[label_j]++;
288  }
289  }
290 
291  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
292  {
293  m_labels[label_j].resize_vector(num_class_idx[label_j]);
294  }
295  SG_FREE(num_class_idx);
296 
297  int32_t * next_class_idx = SG_MALLOC(int32_t , get_num_labels());
298  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
299  {
300  next_class_idx[label_j] = 0;
301  }
302 
303  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
304  {
305  for (int32_t l_pos = 0; l_pos < labels_list[class_i]->vlen; l_pos++)
306  {
307  // get class_i of current position
308  int32_t label_j = (*labels_list[class_i])[l_pos];
309  REQUIRE(label_j < get_num_labels(),
310  "class_i=%d/%d :: label_j=%d/%d (l_pos=%d)\n",
311  class_i, get_num_classes(), label_j, get_num_labels(),
312  l_pos);
313 
314  // next free element in m_labels[label_j]:
315  int32_t c_pos = next_class_idx[label_j];
316  REQUIRE(c_pos < m_labels[label_j].size(),
317  "c_pos exceeded length of labels vector");
318  next_class_idx[label_j]++;
319 
320  // finally, story label_j into class-column
321  m_labels[label_j][c_pos] = class_i;
322  }
323  }
324  SG_FREE(next_class_idx);
325 
326  return;
327 }
328 
329 
330 void
332 {
333  SGVector <int32_t> ** labels_list = get_class_labels();
334  SG_PRINT("printing %d binary label vectors for %d multilabels:\n",
336 
337  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
338  {
339  SG_PRINT(" yC_{class_i=%d}", class_i);
340  SGVector <float64_t> dense =
341  to_dense <int32_t, float64_t> (labels_list[class_i],
342  get_num_labels(), +1, -1);
343  dense.display_vector("");
344  delete labels_list[class_i];
345  }
346  SG_FREE(labels_list);
347 
348  SG_PRINT("printing %d binary class vectors for %d labels:\n",
350 
351  for (int32_t j = 0; j < get_num_labels(); j++)
352  {
353  SG_PRINT(" y_{j=%d}", j);
354  SGVector <float64_t> dense =
355  to_dense <int32_t , float64_t> (&m_labels[j], get_num_classes(),
356  +1, -1);
357  dense.display_vector("");
358  }
359  return;
360 }
static SGVector< D > to_dense(SGVector< S > *sparse, int32_t dense_len, D d_true, D d_false)
SGMatrix< int32_t > get_labels() const
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const
void set_class_labels(SGVector< int32_t > **labels_list)
#define REQUIRE(x,...)
Definition: SGIO.h:206
void set_labels(SGVector< int32_t > *labels)
void display_vector(const char *name="vector", const char *prefix="") const
Definition: SGVector.cpp:356
SGVector< int32_t > ** get_class_labels() const
index_t vlen
Definition: SGVector.h:494
#define SG_PRINT(...)
Definition: SGIO.h:137
virtual int32_t get_num_classes() const
SGIO * sg_io
Definition: init.cpp:36
double float64_t
Definition: common.h:50
SGVector< int32_t > get_label(int32_t j)
EMessageType get_loglevel() const
Definition: SGIO.cpp:285
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void set_label(int32_t j, SGVector< int32_t > label)
void ensure_valid(const char *context=NULL)
SGVector< int32_t > * m_labels
void resize_vector(int32_t n)
Definition: SGVector.cpp:259
#define SG_ADD(...)
Definition: SGObject.h:81
static bool is_sorted(SGVector< T > vector)
Definition: Math.h:1617
void set_const(T const_elem)
Definition: SGVector.cpp:152

SHOGUN Machine Learning Toolbox - Documentation