00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <vector>
00012 #include <limits>
00013 #include <algorithm>
00014
00015 #include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
00016 #include <shogun/multiclass/ecoc/ECOCUtil.h>
00017
00018 using namespace shogun;
00019
00020 CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen,
00021 float64_t pzero, float64_t pposone, float64_t pnegone)
00022 :m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone)
00023 {
00024 if (!check_probability(pzero, pposone, pnegone))
00025 SG_ERROR("probability of 0, +1 and -1 must sum to one");
00026
00027 init();
00028 }
00029
00030 void CECOCRandomSparseEncoder::init()
00031 {
00032 SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
00033 SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
00034 SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE);
00035 SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
00036 SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE);
00037 }
00038
00039 void CECOCRandomSparseEncoder::set_probability(float64_t pzero, float64_t pposone, float64_t pnegone)
00040 {
00041 if (!check_probability(pzero, pposone, pnegone))
00042 SG_ERROR("probability of 0, +1 and -1 must sum to one");
00043
00044 m_pzero = pzero;
00045 m_pposone = pposone;
00046 m_pnegone = pnegone;
00047 }
00048
00049 SGMatrix<int32_t> CECOCRandomSparseEncoder::create_codebook(int32_t num_classes)
00050 {
00051 int32_t codelen = m_codelen;
00052 if (codelen <= 0)
00053 codelen = get_default_code_length(num_classes);
00054
00055
00056 SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
00057 int32_t best_dist = 0;
00058
00059 SGMatrix<int32_t> codebook(codelen, num_classes);
00060 std::vector<int32_t> random_sel(num_classes);
00061 int32_t n_iter = 0;
00062
00063 while (true)
00064 {
00065
00066 codebook.zero();
00067 for (int32_t i=0; i < codelen; ++i)
00068 {
00069
00070 for (int32_t j=0; j < num_classes; ++j)
00071 random_sel[j] = j;
00072 std::random_shuffle(random_sel.begin(), random_sel.end());
00073 if (CMath::random(0.0, 1.0) > 0.5)
00074 {
00075 codebook(i, random_sel[0]) = +1;
00076 codebook(i, random_sel[1]) = -1;
00077 }
00078 else
00079 {
00080 codebook(i, random_sel[0]) = -1;
00081 codebook(i, random_sel[1]) = +1;
00082 }
00083
00084
00085 for (int32_t j=2; j < num_classes; ++j)
00086 {
00087 float64_t randval = CMath::random(0.0, 1.0);
00088 if (randval > m_pzero)
00089 {
00090 if (randval > m_pzero+m_pposone)
00091 codebook(i, random_sel[j]) = -1;
00092 else
00093 codebook(i, random_sel[j]) = +1;
00094 }
00095 }
00096 }
00097
00098
00099
00100 int32_t min_dist = std::numeric_limits<int32_t>::max();
00101 for (int32_t i=0; i < num_classes; ++i)
00102 {
00103 for (int32_t j=i+1; j < num_classes; ++j)
00104 {
00105 int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
00106 codebook.get_column_vector(j), codelen);
00107 if (dist < min_dist)
00108 min_dist = dist;
00109 }
00110 }
00111
00112 if (min_dist > best_dist)
00113 {
00114 best_dist = min_dist;
00115 std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
00116 best_codebook.matrix);
00117 }
00118
00119 if (++n_iter >= m_maxiter)
00120 break;
00121 }
00122
00123 return best_codebook;
00124 }