ECOCRandomSparseEncoder.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Chiyuan Zhang
00008  * Copyright (C) 2012 Chiyuan Zhang
00009  */
00010 
00011 #include <vector>
00012 #include <limits>
00013 #include <algorithm>
00014 
00015 #include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
00016 #include <shogun/multiclass/ecoc/ECOCUtil.h>
00017 
00018 using namespace shogun;
00019 
00020 CECOCRandomSparseEncoder::CECOCRandomSparseEncoder(int32_t maxiter, int32_t codelen,
00021         float64_t pzero, float64_t pposone, float64_t pnegone)
00022     :m_maxiter(maxiter), m_codelen(codelen), m_pzero(pzero), m_pposone(pposone), m_pnegone(pnegone)
00023 {
00024     if (!check_probability(pzero, pposone, pnegone))
00025         SG_ERROR("probability of 0, +1 and -1 must sum to one");
00026 
00027     init();
00028 }
00029 
00030 void CECOCRandomSparseEncoder::init()
00031 {
00032     SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
00033     SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
00034     SG_ADD(&m_pzero, "pzero", "probability of 0", MS_NOT_AVAILABLE);
00035     SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
00036     SG_ADD(&m_pnegone, "pnegone", "probability of -1", MS_NOT_AVAILABLE);
00037 }
00038 
00039 void CECOCRandomSparseEncoder::set_probability(float64_t pzero, float64_t pposone, float64_t pnegone)
00040 {
00041     if (!check_probability(pzero, pposone, pnegone))
00042         SG_ERROR("probability of 0, +1 and -1 must sum to one");
00043 
00044     m_pzero   = pzero;
00045     m_pposone = pposone;
00046     m_pnegone = pnegone;
00047 }
00048 
00049 SGMatrix<int32_t> CECOCRandomSparseEncoder::create_codebook(int32_t num_classes)
00050 {
00051     int32_t codelen = m_codelen;
00052     if (codelen <= 0)
00053         codelen = get_default_code_length(num_classes);
00054 
00055 
00056     SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
00057     int32_t best_dist = 0;
00058 
00059     SGMatrix<int32_t> codebook(codelen, num_classes);
00060     std::vector<int32_t> random_sel(num_classes);
00061     int32_t n_iter = 0;
00062 
00063     while (true)
00064     {
00065         // fill codebook
00066         codebook.zero();
00067         for (int32_t i=0; i < codelen; ++i)
00068         {
00069             // randomly select two positions
00070             for (int32_t j=0; j < num_classes; ++j)
00071                 random_sel[j] = j;
00072             std::random_shuffle(random_sel.begin(), random_sel.end());
00073             if (CMath::random(0.0, 1.0) > 0.5)
00074             {
00075                 codebook(i, random_sel[0]) = +1;
00076                 codebook(i, random_sel[1]) = -1;
00077             }
00078             else
00079             {
00080                 codebook(i, random_sel[0]) = -1;
00081                 codebook(i, random_sel[1]) = +1;
00082             }
00083 
00084             // assign the remaining positions
00085             for (int32_t j=2; j < num_classes; ++j)
00086             {
00087                 float64_t randval = CMath::random(0.0, 1.0);
00088                 if (randval > m_pzero)
00089                 {
00090                     if (randval > m_pzero+m_pposone)
00091                         codebook(i, random_sel[j]) = -1;
00092                     else
00093                         codebook(i, random_sel[j]) = +1;
00094                 }
00095             }
00096         }
00097 
00098         // see if this is a better codebook
00099         // compute the minimum pairwise code distance
00100         int32_t min_dist = std::numeric_limits<int32_t>::max();
00101         for (int32_t i=0; i < num_classes; ++i)
00102         {
00103             for (int32_t j=i+1; j < num_classes; ++j)
00104             {
00105                 int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
00106                         codebook.get_column_vector(j), codelen);
00107                 if (dist < min_dist)
00108                     min_dist = dist;
00109             }
00110         }
00111 
00112         if (min_dist > best_dist)
00113         {
00114             best_dist = min_dist;
00115             std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
00116                     best_codebook.matrix);
00117         }
00118 
00119         if (++n_iter >= m_maxiter)
00120             break;
00121     }
00122 
00123     return best_codebook;
00124 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation