ECOCRandomDenseEncoder.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Chiyuan Zhang
00008  * Copyright (C) 2012 Chiyuan Zhang
00009  */
00010 
00011 #include <algorithm>
00012 #include <limits>
00013 
00014 #include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
00015 #include <shogun/multiclass/ecoc/ECOCUtil.h>
00016 
00017 using namespace shogun;
00018 
00019 CECOCRandomDenseEncoder::CECOCRandomDenseEncoder(int32_t maxiter, int32_t codelen, float64_t pposone)
00020     :m_maxiter(maxiter), m_codelen(codelen), m_pposone(pposone)
00021 {
00022     if (!check_probability(pposone))
00023         SG_ERROR("invalid probability of +1");
00024 
00025     init();
00026 }
00027 
00028 void CECOCRandomDenseEncoder::init()
00029 {
00030     SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
00031     SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
00032     SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
00033 }
00034 
00035 void CECOCRandomDenseEncoder::set_probability(float64_t pposone)
00036 {
00037     if (!check_probability(pposone))
00038         SG_ERROR("probability of 0, +1 and -1 must sum to one");
00039 
00040     m_pposone = pposone;
00041 }
00042 
00043 SGMatrix<int32_t> CECOCRandomDenseEncoder::create_codebook(int32_t num_classes)
00044 {
00045     int32_t codelen = m_codelen;
00046     if (codelen <= 0)
00047         codelen = get_default_code_length(num_classes);
00048 
00049 
00050     SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
00051     int32_t best_dist = 0;
00052 
00053     SGMatrix<int32_t> codebook(codelen, num_classes);
00054     int32_t n_iter = 0;
00055     while (true)
00056     {
00057         // fill codebook
00058         codebook.zero();
00059         for (int32_t i=0; i < codelen; ++i)
00060         {
00061             for (int32_t j=0; j < num_classes; ++j)
00062             {
00063                 float64_t randval = CMath::random(0.0, 1.0);
00064                 if (randval > m_pposone)
00065                     codebook(i, j) = -1;
00066                 else
00067                     codebook(i, j) = +1;
00068             }
00069         }
00070 
00071         bool valid = true;
00072         for (int32_t i=0; i < codelen; ++i)
00073         {
00074             bool p1_occur = false, n1_occur = false;
00075             for (int32_t j=0; j < num_classes; ++j)
00076                 if (codebook(i, j) == 1)
00077                     p1_occur = true;
00078                 else if (codebook(i, j) == -1)
00079                     n1_occur = true;
00080 
00081             if (!p1_occur || !n1_occur)
00082             {
00083                 valid = false;
00084                 break;
00085             }
00086         }
00087 
00088         if (valid)
00089         {
00090             // see if this is a better codebook
00091             // compute the minimum pairwise code distance
00092             int32_t min_dist = std::numeric_limits<int32_t>::max();
00093             for (int32_t i=0; i < num_classes; ++i)
00094             {
00095                 for (int32_t j=i+1; j < num_classes; ++j)
00096                 {
00097                     int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
00098                             codebook.get_column_vector(j), codelen);
00099                     if (dist < min_dist)
00100                         min_dist = dist;
00101                 }
00102             }
00103 
00104             if (min_dist > best_dist)
00105             {
00106                 best_dist = min_dist;
00107                 std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
00108                         best_codebook.matrix);
00109             }
00110         }
00111 
00112         if (++n_iter >= m_maxiter)
00113             if (best_dist > 0) // already obtained a good codebook
00114                 break;
00115     }
00116 
00117     return best_codebook;
00118 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation