SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ECOCRandomSparseEncoder.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10 
11 #include <vector>
12 #include <limits>
13 #include <algorithm>
14 
17 
18 using namespace shogun;
19 
20 CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen,
21  float64_t pzero, float64_t pposone, float64_t pnegone)
22  :m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone)
23 {
24  if (!check_probability(pzero, pposone, pnegone))
25  SG_ERROR("probability of 0, +1 and -1 must sum to one")
26 
27  init();
28 }
29 
30 void CECOCRandomSparseEncoder::init()
31 {
32  SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
33  SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
34  SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE);
35  SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
36  SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE);
37 }
38 
40 {
41  if (!check_probability(pzero, pposone, pnegone))
42  SG_ERROR("probability of 0, +1 and -1 must sum to one")
43 
44  m_pzero = pzero;
45  m_pposone = pposone;
46  m_pnegone = pnegone;
47 }
48 
50 {
51  int32_t codelen = m_codelen;
52  if (codelen <= 0)
53  codelen = get_default_code_length(num_classes);
54 
55 
56  SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
57  int32_t best_dist = 0;
58 
59  SGMatrix<int32_t> codebook(codelen, num_classes);
60  std::vector<int32_t> random_sel(num_classes);
61  int32_t n_iter = 0;
62 
63  while (true)
64  {
65  // fill codebook
66  codebook.zero();
67  for (int32_t i=0; i < codelen; ++i)
68  {
69  // randomly select two positions
70  for (int32_t j=0; j < num_classes; ++j)
71  random_sel[j] = j;
72  std::random_shuffle(random_sel.begin(), random_sel.end());
73  if (CMath::random(0.0, 1.0) > 0.5)
74  {
75  codebook(i, random_sel[0]) = +1;
76  codebook(i, random_sel[1]) = -1;
77  }
78  else
79  {
80  codebook(i, random_sel[0]) = -1;
81  codebook(i, random_sel[1]) = +1;
82  }
83 
84  // assign the remaining positions
85  for (int32_t j=2; j < num_classes; ++j)
86  {
87  float64_t randval = CMath::random(0.0, 1.0);
88  if (randval > m_pzero)
89  {
90  if (randval > m_pzero+m_pposone)
91  codebook(i, random_sel[j]) = -1;
92  else
93  codebook(i, random_sel[j]) = +1;
94  }
95  }
96  }
97 
98  // see if this is a better codebook
99  // compute the minimum pairwise code distance
100  int32_t min_dist = std::numeric_limits<int32_t>::max();
101  for (int32_t i=0; i < num_classes; ++i)
102  {
103  for (int32_t j=i+1; j < num_classes; ++j)
104  {
105  int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
106  codebook.get_column_vector(j), codelen);
107  if (dist < min_dist)
108  min_dist = dist;
109  }
110  }
111 
112  if (min_dist > best_dist)
113  {
114  best_dist = min_dist;
115  std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
116  best_codebook.matrix);
117  }
118 
119  if (++n_iter >= m_maxiter)
120  break;
121  }
122 
123  return best_codebook;
124 }
static int32_t hamming_distance(T1 *c1, T2 *c2, int32_t len)
Definition: ECOCUtil.h:31
#define SG_ERROR(...)
Definition: SGIO.h:129
CECOCRandomSparseEncoder(int32_t maxiter=10000, int32_t codelen=0, float64_t pzero=0.5, float64_t pposone=0.25, float64_t pnegone=0.25)
static uint64_t random()
Definition: Math.h:1019
void set_probability(float64_t pzero, float64_t pposone, float64_t pnegone)
virtual SGMatrix< int32_t > create_codebook(int32_t num_classes)
double float64_t
Definition: common.h:50
int32_t get_default_code_length(int32_t num_classes) const
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:113
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
#define SG_ADD(...)
Definition: SGObject.h:84

SHOGUN Machine Learning Toolbox - Documentation