SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ECOCRandomDenseEncoder.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10 
11 #include <algorithm>
12 #include <limits>
13 
16 
17 using namespace shogun;
18 
19 CECOCRandomDenseEncoder::CECOCRandomDenseEncoder(int32_t maxiter, int32_t codelen, float64_t pposone)
20  :m_maxiter(maxiter), m_codelen(codelen), m_pposone(pposone)
21 {
22  if (!check_probability(pposone))
23  SG_ERROR("invalid probability of +1");
24 
25  init();
26 }
27 
28 void CECOCRandomDenseEncoder::init()
29 {
30  SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
31  SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
32  SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
33 }
34 
36 {
37  if (!check_probability(pposone))
38  SG_ERROR("probability of 0, +1 and -1 must sum to one");
39 
40  m_pposone = pposone;
41 }
42 
44 {
45  int32_t codelen = m_codelen;
46  if (codelen <= 0)
47  codelen = get_default_code_length(num_classes);
48 
49 
50  SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
51  int32_t best_dist = 0;
52 
53  SGMatrix<int32_t> codebook(codelen, num_classes);
54  int32_t n_iter = 0;
55  while (true)
56  {
57  // fill codebook
58  codebook.zero();
59  for (int32_t i=0; i < codelen; ++i)
60  {
61  for (int32_t j=0; j < num_classes; ++j)
62  {
63  float64_t randval = CMath::random(0.0, 1.0);
64  if (randval > m_pposone)
65  codebook(i, j) = -1;
66  else
67  codebook(i, j) = +1;
68  }
69  }
70 
71  bool valid = true;
72  for (int32_t i=0; i < codelen; ++i)
73  {
74  bool p1_occur = false, n1_occur = false;
75  for (int32_t j=0; j < num_classes; ++j)
76  if (codebook(i, j) == 1)
77  p1_occur = true;
78  else if (codebook(i, j) == -1)
79  n1_occur = true;
80 
81  if (!p1_occur || !n1_occur)
82  {
83  valid = false;
84  break;
85  }
86  }
87 
88  if (valid)
89  {
90  // see if this is a better codebook
91  // compute the minimum pairwise code distance
92  int32_t min_dist = std::numeric_limits<int32_t>::max();
93  for (int32_t i=0; i < num_classes; ++i)
94  {
95  for (int32_t j=i+1; j < num_classes; ++j)
96  {
97  int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
98  codebook.get_column_vector(j), codelen);
99  if (dist < min_dist)
100  min_dist = dist;
101  }
102  }
103 
104  if (min_dist > best_dist)
105  {
106  best_dist = min_dist;
107  std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
108  best_codebook.matrix);
109  }
110  }
111 
112  if (++n_iter >= m_maxiter)
113  if (best_dist > 0) // already obtained a good codebook
114  break;
115  }
116 
117  return best_codebook;
118 }

SHOGUN Machine Learning Toolbox - Documentation