SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FactorGraphDataGenerator.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2015 Jiaolong Xu
8  * Copyright (C) 2015 Jiaolong Xu
9  */
10 
12 
13 using namespace shogun;
14 
16 {}
17 
19 {
20  // ftype
21  SGVector<int32_t> card(2);
22  card[0] = 2;
23  card[1] = 2;
25  w[0] = 0.0; // 0,0
26  w[1] = 0.3; // 1,0
27  w[2] = 0.2; // 0,1
28  w[3] = 0.0; // 1,1
29  int32_t tid = 0;
30  CTableFactorType* ft_pairwise = new CTableFactorType(tid, card, w);
31  SG_REF(ft_pairwise);
32 
33  SGVector<int32_t> card1(1);
34  card1[0] = 2;
35  SGVector<float64_t> w1(2);
36  w1[0] = 0.1;
37  w1[1] = 0.7;
38  int32_t tid1 = 1;
39  CTableFactorType* ft_unary_1 = new CTableFactorType(tid1, card1, w1);
40  SG_REF(ft_unary_1);
41 
42  SGVector<int32_t> card2(1);
43  card2[0] = 2;
44  SGVector<float64_t> w2(2);
45  w2[0] = 0.3;
46  w2[1] = 0.6;
47  int32_t tid2 = 2;
48  CTableFactorType* ft_unary_2 = new CTableFactorType(tid2, card2, w2);
49  SG_REF(ft_unary_2);
50 
51  // fg
52  SGVector<int32_t> vc(2);
54  CFactorGraph* fg = new CFactorGraph(vc);
55  SG_REF(fg);
56 
57  // add factors
59  SGVector<int32_t> var_index(2);
60  var_index[0] = 0;
61  var_index[1] = 1;
62  CFactor* fac1 = new CFactor(ft_pairwise, var_index, data);
63  fg->add_factor(fac1);
64 
65  SGVector<int32_t> var_index1(1);
66  var_index1[0] = 0;
67  CFactor* fac2 = new CFactor(ft_unary_1, var_index1, data);
68  fg->add_factor(fac2);
69 
70  SGVector<int32_t> var_index2(1);
71  var_index2[0] = 1;
72  CFactor* fac3 = new CFactor(ft_unary_2, var_index2, data);
73  fg->add_factor(fac3);
74 
75  SG_UNREF(ft_pairwise);
76  SG_UNREF(ft_unary_1);
77  SG_UNREF(ft_unary_2);
78 
79  // energy table
80  fg->compute_energies();
81 
82  fg->connect_components();
83 
84  return fg;
85 }
86 
87 int32_t CFactorGraphDataGenerator::grid_to_index(int32_t x, int32_t y, int32_t w)
88 {
89  return x + w * y;
90 }
91 
93 {
94  if (A + D > C + B)
95  {
96  SG_SDEBUG("\nTruncate initialized data to ensure submodularity.\n");
97  float64_t delta = A + D - C - B;
98  float64_t subtrA = delta / 3;
99  A = A - subtrA;
100  C = C + subtrA;
101  B = B + (delta - subtrA * 2) + 0.0001; // for numeric issue
102  }
103 }
104 
105 CFactorGraph* CFactorGraphDataGenerator::random_chain_graph(SGVector<int> &assignment_expect, float64_t &min_energy_expect, int32_t N)
106 {
107  CMath::init_random(17);
108 
109  // ftype
110  SGVector<int32_t> card(2);
111  card[0] = 2;
112  card[1] = 2;
114  int32_t tid = 0;
115  CTableFactorType* factortype = new CTableFactorType(tid, card, w);
116  SG_REF(factortype);
117 
118  SGVector<int32_t> card1(1);
119  card1[0] = 2;
121  int32_t tid1 = 1;
122  CTableFactorType* factortype1 = new CTableFactorType(tid1, card1, w1);
123  SG_REF(factortype1);
124 
125  // fg
126  SGVector<int32_t> vc(N * N);
128  CFactorGraph* fg = new CFactorGraph(vc);
129  SG_REF(fg);
130 
131  // Add factors
132  for (int32_t y = 0; y < N; ++y)
133  for (int32_t x = 0; x < N; ++x)
134  {
135  SGVector<float64_t> data(2);
136  data[0] = CMath::random(0.0, 1.0);
137  data[1] = CMath::random(0.0, 1.0);
138 
139  SGVector<int32_t> var_index(1);
140  var_index[0] = y * N + x;
141 
142  CFactor* fac1 = new CFactor(factortype1, var_index, data);
143  fg->add_factor(fac1);
144  }
145 
146  for (int32_t x = 0; x < N; x++)
147  {
148  for (int32_t y = 0; y < N; y++)
149  {
150  if (x > 0)
151  {
152  SGVector<float64_t> data(4);
153  float64_t A = CMath::random(0.0, 1.0);//E(0,0)->A
154  float64_t C = CMath::random(0.0, 1.0);//E(1,0)->C
155  float64_t B = CMath::random(0.0, 1.0);//E(0,1)->B
156  float64_t D = CMath::random(0.0, 1.0);//E(1,1)->D
157 
158  // Add truncation to ensure submodularity
159  truncate_energy(A, B, C, D);
160 
161  data[0] = A;
162  data[1] = C;
163  data[2] = B;
164  data[3] = D;
165 
166  SGVector<int32_t> var_index(2);
167  var_index[0] = grid_to_index(x, y, N);
168  var_index[1] = grid_to_index(x - 1, y, N);
169  CFactor* fac1 = new CFactor(factortype, var_index, data);
170  fg->add_factor(fac1);
171  }
172 
173  if (x == 0 && y > 0)
174  {
175  SGVector<float64_t> data(4);
176  float64_t A = CMath::random(0.0, 1.0);//E(0,0)->A
177  float64_t C = CMath::random(0.0, 1.0);//E(1,0)->C
178  float64_t B = CMath::random(0.0, 1.0);//E(0,1)->B
179  float64_t D = CMath::random(0.0, 1.0);//E(1,1)->D
180 
181  // Add truncation to ensure submodularity
182  truncate_energy(A, B, C, D);
183 
184  data[0] = A;
185  data[1] = C;
186  data[2] = B;
187  data[3] = D;
188 
189  SGVector<int32_t> var_index(2);
190  var_index[0] = grid_to_index(x, y - 1, N);
191  var_index[1] = grid_to_index(x, y, N);
192  CFactor* fac1 = new CFactor(factortype, var_index, data);
193  fg->add_factor(fac1);
194  }
195  }
196  }
197 
198  SG_UNREF(factortype);
199  SG_UNREF(factortype1);
200 
201  // energy table
202  fg->compute_energies();
203 
204  fg->connect_components();
205 
206  // Find minimum energy state by exhaustive search
207  SGVector<int> test_var(N * N);
208  assignment_expect = SGVector<int>(N * N);
209  min_energy_expect = std::numeric_limits<double>::infinity();
210  for (int v0 = 0; v0 < 2; ++v0)
211  {
212  test_var[0] = v0;
213  for (int v1 = 0; v1 < 2; ++v1)
214  {
215  test_var[1] = v1;
216  for (int v2 = 0; v2 < 2; ++v2)
217  {
218  test_var[2] = v2;
219  for (int v3 = 0; v3 < 2; ++v3)
220  {
221  test_var[3] = v3;
222 
223  double orig_e = fg->evaluate_energy(test_var);
224  if (orig_e < min_energy_expect)
225  {
226  assignment_expect = test_var.clone();
227  min_energy_expect = orig_e;
228  }
229  }
230  }
231  }
232  }
233 
234  return fg;
235 }
236 
238 {
239  // ftype
240  SGVector<int32_t> card(2);
241  card[0] = 3;
242  card[1] = 3;
243  SGVector<float64_t> w(9);
244  w[0] = -0.1; // 0,0
245  w[1] = -0.7; // 1,0
246  w[2] = -0.9; // 2,0
247  w[3] = -0.7; // 0,1
248  w[4] = -0.1; // 1,1
249  w[5] = -0.0; // 2,1
250  w[6] = -0.9; // 0,2
251  w[7] = -0.0; // 1,2
252  w[8] = -0.1; // 2,2
253  int32_t tid = 0;
254  CTableFactorType* factortype = new CTableFactorType(tid, card, w);
255  SG_REF(factortype);
256 
257  SGVector<int32_t> card1(1);
258  card1[0] = 3;
259  SGVector<float64_t> w1(3);
260  w1[0] = -0.1;
261  w1[1] = -0.7;
262  w1[2] = -0.6;
263  int32_t tid1 = 1;
264  CTableFactorType* factortype1 = new CTableFactorType(tid1, card1, w1);
265  SG_REF(factortype1);
266 
267  SGVector<int32_t> card2(1);
268  card2[0] = 3;
269  SGVector<float64_t> w2(3);
270  w2[0] = -0.9;
271  w2[1] = -0.1;
272  w2[2] = -0.2;
273  int32_t tid2 = 2;
274  CTableFactorType* factortype2 = new CTableFactorType(tid2, card2, w2);
275  SG_REF(factortype2);
276 
277  SGVector<int32_t> card3(1);
278  card3[0] = 3;
279  SGVector<float64_t> w3(3);
280  w3[0] = -0.3;
281  w3[1] = -0.4;
282  w3[2] = -0.5;
283  int32_t tid3 = 3;
284  CTableFactorType* factortype3 = new CTableFactorType(tid3, card3, w3);
285  SG_REF(factortype3);
286 
287  // fg
288  SGVector<int32_t> vc(3);
290  CFactorGraph* fg = new CFactorGraph(vc);
291  SG_REF(fg);
292 
293  // add factors
294  SGVector<float64_t> data;
295  SGVector<int32_t> var_index1(1);
296  var_index1[0] = 0;
297  CFactor* fac1 = new CFactor(factortype1, var_index1, data);
298  fg->add_factor(fac1);
299 
300  SGVector<int32_t> var_index2(1);
301  var_index2[0] = 1;
302  CFactor* fac2 = new CFactor(factortype2, var_index2, data);
303  fg->add_factor(fac2);
304 
305  SGVector<int32_t> var_index3(1);
306  var_index3[0] = 2;
307  CFactor* fac3 = new CFactor(factortype3, var_index3, data);
308  fg->add_factor(fac3);
309 
310  SGVector<int32_t> var_index4(2);
311  var_index4[0] = 0;
312  var_index4[1] = 1;
313  CFactor* fac4 = new CFactor(factortype, var_index4, data);
314  fg->add_factor(fac4);
315 
316  SGVector<int32_t> var_index5(2);
317  var_index5[0] = 1;
318  var_index5[1] = 2;
319  CFactor* fac5 = new CFactor(factortype, var_index5, data);
320  fg->add_factor(fac5);
321 
322  // energy table
323  fg->compute_energies();
324  fg->connect_components();
325 
326  SG_UNREF(factortype);
327  SG_UNREF(factortype1);
328  SG_UNREF(factortype2);
329  SG_UNREF(factortype3);
330 
331  return fg;
332 }
333 
334 /*---------------------------------------------------------------------------------
335 Test approximate inference algorithms with SOSVM framework
336 using randomly generated synthetic data
337 ----------------------------------------------------------------------------------*/
338 
339 void CFactorGraphDataGenerator::generate_data(int32_t len_label, int32_t len_feat, int32_t size_data,
340  SGMatrix<float64_t> &feats, SGMatrix<int32_t> &labels)
341 {
342  ASSERT(size_data > len_label);
343 
344  feats = SGMatrix<float64_t>(len_feat, size_data);
345  labels = SGMatrix<int32_t>(len_label, size_data);
346 
347  for (int32_t k = 0; k < size_data; k++)
348  {
349  // generate a label vector
350  SGVector<int32_t> v_label(len_label);
351  v_label.zero();
352  int32_t i = k % len_label;
353  v_label[i] = 1;
354 
355  // generate feature vector
356  SGVector<int32_t> random_indices(len_feat);
357  random_indices.range_fill();
358  CMath::permute(random_indices);
359  SGVector<float64_t> v_feat(len_feat);
360  v_feat.zero();
361 
362  for (int32_t j = 0; j < 3 * (i + 1); j++)
363  {
364  int32_t r = random_indices[j];
365  v_feat[r] = 1;
366  }
367 
368  for (int32_t f = 0; f < len_feat; f++)
369  feats(f, k) = v_feat[f];
370 
371  for (int32_t l = 0; l < len_label; l++)
372  labels(l, k) = v_label[l];
373  }
374 }
375 
377 {
378  // A full-connected graph is defined by a 2-d matrix where
379  // each row stores the indecies of a pair of connected nodes
380  int32_t num_rows = num_classes * (num_classes - 1) / 2;
381  ASSERT(num_rows > 0);
382 
383  SGMatrix< int32_t > mat(num_rows, 2);
384  int32_t k = 0;
385 
386  for (int32_t i = 0; i < num_classes - 1; i++)
387  {
388  for (int32_t j = i + 1; j < num_classes; j++)
389  {
390  mat[num_rows + k] = j;
391  mat[k++] = i;
392  }
393  }
394 
395  return mat;
396 }
397 
399  SGMatrix< int32_t > edge_list, const DynArray<CTableFactorType*> &v_factor_type,
400  CFactorGraphFeatures* fg_feats, CFactorGraphLabels* fg_labels)
401 {
402  int32_t num_sample = labels.num_cols;
403  int32_t num_classes = labels.num_rows;
404  int32_t dim = feats.num_rows;
405  int32_t num_edges = edge_list.num_rows;
406 
407  // prepare features and labels in factor graph
408  for (int32_t n = 0; n < num_sample; n++)
409  {
410  SGVector<int32_t> vc(num_classes);
412 
413  CFactorGraph* fg = new CFactorGraph(vc);
414 
415  float64_t* pfeat = feats.get_column_vector(n);
416  SGVector<float64_t> feat_i(dim);
417  memcpy(feat_i.vector, pfeat, dim * sizeof(float64_t));
418 
419  // add unary factors
420  for (int32_t u = 0; u < num_classes; u++)
421  {
422  SGVector<int32_t> var_index_u(1);
423  var_index_u[0] = u;
424  CFactor* fac_u = new CFactor(v_factor_type[u], var_index_u, feat_i);
425  fg->add_factor(fac_u);
426  }
427 
428  // add pairwised factors
429  for (int32_t t = 0; t < num_edges; t++)
430  {
431  SGVector<float64_t> data_t(1);
432  data_t[0] = 1.0;
433  SGVector<int32_t> var_index_t = edge_list.get_row_vector(t);
434  CFactor* fac_t = new CFactor(v_factor_type[t + num_classes], var_index_t, data_t);
435  fg->add_factor(fac_t);
436  }
437 
438  // add factor graph instance
439  fg_feats->add_sample(fg);
440 
441  // add label
442  int32_t* plabs = labels.get_column_vector(n);
443  SGVector<int32_t> states_gt(num_classes);
444  memcpy(states_gt.vector, plabs, num_classes * sizeof(int32_t));
445  SGVector<float64_t> loss_weights(num_classes);
446  SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0 / num_classes);
447  CFactorGraphObservation* fg_obs = new CFactorGraphObservation(states_gt, loss_weights);
448  fg_labels->add_label(fg_obs);
449  }
450 }
451 
459 void CFactorGraphDataGenerator::define_factor_types(int32_t num_classes, int32_t dim, int32_t num_edges,
460  DynArray<CTableFactorType*> &v_factor_type)
461 {
462  int32_t tid;
463  // we have l = num_classes different weights: w_1, w_2, ..., w_l
464  // so we create num_classes different unary factor types
465  for (int32_t u = 0; u < num_classes; u++)
466  {
467  tid = u;
468  SGVector<int32_t> card_u(1);
469  card_u[0] = 2;
470  SGVector<float64_t> w_u(dim * 2);
471  w_u.zero();
472  CTableFactorType* ft = new CTableFactorType(tid, card_u, w_u);
473  v_factor_type.append_element(ft);
474  }
475 
476  // define factor type: tree edge factor
477  // note that each edge is a new type
478  for (int32_t t = 0; t < num_edges; t++)
479  {
480  tid = t + num_classes;
481  SGVector<int32_t> card_t(2);
482  card_t[0] = 2;
483  card_t[1] = 2;
484  SGVector<float64_t> w_t(2 * 2);
485  w_t.zero();
486  CTableFactorType* ft = new CTableFactorType(tid, card_t, w_t);
487  v_factor_type.append_element(ft);
488  }
489 }
490 
492 {
493  SGMatrix<int32_t> labels_train;
494  SGMatrix<float64_t> feats_train;
495 
496  // Generate random data
497  sg_rand->set_seed(10); // fix the random seed
498  generate_data(4, 12, 8, feats_train, labels_train);
499 
500  int32_t num_sample_train = labels_train.num_cols;
501  int32_t num_classes = labels_train.num_rows;
502  int32_t dim = feats_train.num_rows;
503 
504  // 1.1 Get edge table
505  SGMatrix< int32_t > edge_table = get_edges_full(num_classes);
506  int32_t num_edges = edge_table.num_rows;
507 
508  // 1.2 Define factor type
509  DynArray<CTableFactorType*> v_factor_type;
510  define_factor_types(num_classes, dim, num_edges, v_factor_type);
511 
512  // 1.3 Prepare features and labels in factor graph
513  CFactorGraphFeatures* fg_feats_train = new CFactorGraphFeatures(num_sample_train);
514  SG_REF(fg_feats_train);
515  CFactorGraphLabels* fg_labels_train = new CFactorGraphLabels(num_sample_train);
516  SG_REF(fg_labels_train);
517 
518  build_factor_graph(feats_train, labels_train, edge_table, v_factor_type, fg_feats_train, fg_labels_train);
519 
520  // 1.4 Create factor graph model
521  CFactorGraphModel* model = new CFactorGraphModel(fg_feats_train, fg_labels_train, infer_type, false);
522  SG_REF(model);
523 
524  // Initialize model parameters
525  for (int32_t u = 0; u < num_classes; u++)
526  model->add_factor_type(v_factor_type[u]);
527 
528  for (int32_t t = 0; t < num_edges; t++)
529  model->add_factor_type(v_factor_type[t + num_classes]);
530 
531  // 2.1 Create SGD solver
532  CStochasticSOSVM* sgd = new CStochasticSOSVM(model, fg_labels_train, true);
533  sgd->set_num_iter(150);
534  sgd->set_lambda(0.0001);
535  SG_REF(sgd);
536 
537  // 2.2 Train SGD
538  sgd->train();
539 
540  // 3.1 Evaluation
542  SG_REF(labels_sgd);
543  float64_t ave_loss_sgd = 0.0;
544  float64_t acc_loss_sgd = 0.0;
545 
546  for (int32_t i = 0; i < num_sample_train; ++i)
547  {
548  CStructuredData* y_pred = labels_sgd->get_label(i);
549  CStructuredData* y_truth = fg_labels_train->get_label(i);
550  acc_loss_sgd += model->delta_loss(y_truth, y_pred);
551 
554 
555  SGVector<int32_t> s_t = y_t->get_data();
556  SGVector<int32_t> s_p = y_p->get_data();
557 
558  // training labels are expected to be correcty predicted for this dataset
559  //EXPECT_TRUE(s_t.equals(s_p));
560 
561  SG_UNREF(y_pred);
562  SG_UNREF(y_truth);
563  }
564 
565  ave_loss_sgd = acc_loss_sgd / static_cast<float64_t>(num_sample_train);
566 
567  SG_UNREF(labels_sgd);
568  SG_UNREF(sgd);
569  SG_UNREF(model);
570  SG_UNREF(fg_feats_train);
571  SG_UNREF(fg_labels_train);
572 
573  return ave_loss_sgd;
574 }
void truncate_energy(float64_t &A, float64_t &B, float64_t &C, float64_t &D)
void range_fill(T start=0)
Definition: SGVector.cpp:173
static void permute(SGVector< T > v, CRandom *rand=NULL)
Definition: Math.h:1144
Base class of the labels used in Structured Output (SO) problems.
float64_t evaluate_energy(const SGVector< int32_t > state) const
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
SGVector< int32_t > get_data() const
int32_t grid_to_index(int32_t x, int32_t y, int32_t w=10)
void add_factor(CFactor *factor)
Definition: FactorGraph.cpp:95
bool append_element(T element)
Definition: DynArray.h:244
SGMatrix< int32_t > get_edges_full(const int32_t num_classes)
CFactorGraph * random_chain_graph(SGVector< int > &assignment_expect, float64_t &min_energy_expect, int32_t N=2)
void build_factor_graph(SGMatrix< float64_t > feats, SGMatrix< int32_t > labels, SGMatrix< int32_t > edge_list, const DynArray< CTableFactorType * > &v_factor_type, CFactorGraphFeatures *fg_feats, CFactorGraphLabels *fg_labels)
void add_factor_type(CFactorType *ftype)
index_t num_cols
Definition: SGMatrix.h:378
CRandom * sg_rand
Definition: init.cpp:39
void set_num_iter(int32_t num_iter)
Class FactorGraphLabels used e.g. in the application of Structured Output (SO) learning with the Fact...
void define_factor_types(int32_t num_classes, int32_t dim, int32_t num_edges, DynArray< CTableFactorType * > &v_factor_type)
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
static uint64_t random()
Definition: Math.h:1019
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
static void init_random(uint32_t initseed=0)
Definition: Math.h:1006
Template Dynamic array class that creates an array that can be used like a list or an array...
Definition: DynArray.h:32
bool add_sample(CFactorGraph *fg)
double float64_t
Definition: common.h:50
Class CFactorGraphObservation is used as the structured output.
CFactorGraphFeatures maintains an array of factor graphs, each graph is a sample, i...
CFactorGraphModel defines a model in terms of CFactorGraph and CMAPInference, where parameters are as...
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:115
static CFactorGraphObservation * obtain_from_generic(CStructuredData *base_data)
Class CStochasticSOSVM solves SOSVM using stochastic subgradient descent on the SVM primal problem [1...
void generate_data(int32_t len_label, int32_t len_feat, int32_t size_data, SGMatrix< float64_t > &feats, SGMatrix< int32_t > &labels)
#define SG_UNREF(x)
Definition: SGObject.h:52
virtual void add_label(CStructuredData *label)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
Class CFactorGraph a factor graph is a structured input in general.
Definition: FactorGraph.h:27
float64_t test_sosvm(EMAPInferType infer_type)
void set_seed(uint32_t seed)
Definition: Random.cpp:57
Class CTableFactorType the way that store assignments of variables and energies in a table or a multi...
Definition: FactorType.h:122
virtual bool train(CFeatures *data=NULL)
Definition: Machine.cpp:39
SGVector< T > clone() const
Definition: SGVector.cpp:209
void set_lambda(float64_t lbda)
virtual CStructuredData * get_label(int32_t idx)
virtual float64_t delta_loss(CStructuredData *y1, CStructuredData *y2)
static CStructuredLabels * to_structured(CLabels *base_labels)
SGVector< T > get_row_vector(index_t row) const
Definition: SGMatrix.cpp:1088
#define delta
Definition: sfa.cpp:23
Class CFactor A factor is defined on a clique in the factor graph. Each factor can have its own data...
Definition: Factor.h:89
Base class of the components of StructuredLabels.
virtual CLabels * apply(CFeatures *data=NULL)
Definition: Machine.cpp:152

SHOGUN Machine Learning Toolbox - Documentation