Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <algorithm>
00012 #include <limits>
00013
00014 #include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
00015 #include <shogun/multiclass/ecoc/ECOCUtil.h>
00016
00017 using namespace shogun;
00018
00019 CECOCRandomDenseEncoder::CECOCRandomDenseEncoder(int32_t maxiter, int32_t codelen, float64_t pposone)
00020 :m_maxiter(maxiter), m_codelen(codelen), m_pposone(pposone)
00021 {
00022 if (!check_probability(pposone))
00023 SG_ERROR("invalid probability of +1");
00024
00025 init();
00026 }
00027
00028 void CECOCRandomDenseEncoder::init()
00029 {
00030 SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
00031 SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
00032 SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
00033 }
00034
00035 void CECOCRandomDenseEncoder::set_probability(float64_t pposone)
00036 {
00037 if (!check_probability(pposone))
00038 SG_ERROR("probability of 0, +1 and -1 must sum to one");
00039
00040 m_pposone = pposone;
00041 }
00042
00043 SGMatrix<int32_t> CECOCRandomDenseEncoder::create_codebook(int32_t num_classes)
00044 {
00045 int32_t codelen = m_codelen;
00046 if (codelen <= 0)
00047 codelen = get_default_code_length(num_classes);
00048
00049
00050 SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
00051 int32_t best_dist = 0;
00052
00053 SGMatrix<int32_t> codebook(codelen, num_classes);
00054 int32_t n_iter = 0;
00055 while (true)
00056 {
00057
00058 codebook.zero();
00059 for (int32_t i=0; i < codelen; ++i)
00060 {
00061 for (int32_t j=0; j < num_classes; ++j)
00062 {
00063 float64_t randval = CMath::random(0.0, 1.0);
00064 if (randval > m_pposone)
00065 codebook(i, j) = -1;
00066 else
00067 codebook(i, j) = +1;
00068 }
00069 }
00070
00071 bool valid = true;
00072 for (int32_t i=0; i < codelen; ++i)
00073 {
00074 bool p1_occur = false, n1_occur = false;
00075 for (int32_t j=0; j < num_classes; ++j)
00076 if (codebook(i, j) == 1)
00077 p1_occur = true;
00078 else if (codebook(i, j) == -1)
00079 n1_occur = true;
00080
00081 if (!p1_occur || !n1_occur)
00082 {
00083 valid = false;
00084 break;
00085 }
00086 }
00087
00088 if (valid)
00089 {
00090
00091
00092 int32_t min_dist = std::numeric_limits<int32_t>::max();
00093 for (int32_t i=0; i < num_classes; ++i)
00094 {
00095 for (int32_t j=i+1; j < num_classes; ++j)
00096 {
00097 int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
00098 codebook.get_column_vector(j), codelen);
00099 if (dist < min_dist)
00100 min_dist = dist;
00101 }
00102 }
00103
00104 if (min_dist > best_dist)
00105 {
00106 best_dist = min_dist;
00107 std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
00108 best_codebook.matrix);
00109 }
00110 }
00111
00112 if (++n_iter >= m_maxiter)
00113 if (best_dist > 0)
00114 break;
00115 }
00116
00117 return best_codebook;
00118 }