DiffusionMaps.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/converter/DiffusionMaps.h>
00012 #include <shogun/converter/EmbeddingConverter.h>
00013 #include <shogun/lib/config.h>
00014 #ifdef HAVE_LAPACK
00015 #include <shogun/mathematics/arpack.h>
00016 #include <shogun/mathematics/lapack.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/kernel/GaussianKernel.h>
00019 #include <shogun/io/SGIO.h>
00020 #include <shogun/kernel/Kernel.h>
00021 #include <shogun/lib/Signal.h>
00022 #include <shogun/lib/Time.h>
00023 
00024 using namespace shogun;
00025 
00026 CDiffusionMaps::CDiffusionMaps() :
00027         CEmbeddingConverter()
00028 {
00029     m_t = 10;
00030     set_kernel(new CGaussianKernel(10,1.0));
00031 
00032     init();
00033 }
00034 
00035 void CDiffusionMaps::init()
00036 {
00037     SG_ADD(&m_t, "t", "number of steps", MS_AVAILABLE);
00038 }
00039 
00040 CDiffusionMaps::~CDiffusionMaps()
00041 {
00042 }
00043 
00044 void CDiffusionMaps::set_t(int32_t t)
00045 {
00046     m_t = t;
00047 }
00048 
00049 int32_t CDiffusionMaps::get_t() const
00050 {
00051     return m_t;
00052 }
00053 
00054 const char* CDiffusionMaps::get_name() const
00055 {
00056     return "DiffusionMaps";
00057 };
00058 
00059 CFeatures* CDiffusionMaps::apply(CFeatures* features)
00060 {
00061     ASSERT(features);
00062     // shorthand for simplefeatures
00063     SG_REF(features);
00064     // compute distance matrix
00065     ASSERT(m_kernel);
00066     m_kernel->init(features,features);
00067     CDenseFeatures<float64_t>* embedding = embed_kernel(m_kernel);
00068     m_kernel->cleanup();
00069     SG_UNREF(features);
00070     return (CFeatures*)embedding;
00071 }
00072 
00073 CDenseFeatures<float64_t>* CDiffusionMaps::embed_kernel(CKernel* kernel)
00074 {
00075 #ifdef HAVE_ARPACK
00076     bool use_arpack = true;
00077 #else
00078     bool use_arpack = false;
00079 #endif
00080     int32_t i,j;
00081     SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
00082     ASSERT(kernel_matrix.num_rows==kernel_matrix.num_cols);
00083     int32_t N = kernel_matrix.num_rows;
00084 
00085     float64_t* p_vector = SG_CALLOC(float64_t, N);
00086     for (i=0; i<N; i++)
00087     {
00088         for (j=0; j<N; j++)
00089         {
00090             p_vector[i] += kernel_matrix.matrix[j*N+i];
00091         }
00092     }
00093     //CMath::display_matrix(kernel_matrix.matrix,N,N,"K");
00094     for (i=0; i<N; i++)
00095     {
00096         for (j=0; j<N; j++)
00097         {
00098             kernel_matrix.matrix[i*N+j] /= CMath::pow(p_vector[i]*p_vector[j], m_t);
00099         }
00100     }
00101     //CMath::display_matrix(kernel_matrix.matrix,N,N,"K");
00102 
00103     for (i=0; i<N; i++)
00104     {
00105         p_vector[i] = 0.0;
00106         for (j=0; j<N; j++)
00107         {
00108             p_vector[i] += kernel_matrix.matrix[j*N+i];
00109         }
00110         p_vector[i] = CMath::sqrt(p_vector[i]);
00111     }
00112 
00113     for (i=0; i<N; i++)
00114     {
00115         for (j=0; j<N; j++)
00116         {
00117             kernel_matrix.matrix[i*N+j] /= p_vector[i]*p_vector[j];
00118         }
00119     }
00120 
00121     SG_FREE(p_vector);
00122     float64_t* s_values = SG_MALLOC(float64_t, N);
00123 
00124     int32_t info = 0;
00125     SGMatrix<float64_t> new_feature_matrix = SGMatrix<float64_t>(m_target_dim,N);
00126 
00127     if (use_arpack)
00128     {
00129 #ifdef HAVE_ARPACK
00130         arpack_dsxupd(kernel_matrix.matrix,NULL,false,N,m_target_dim,"LA",false,1,false,true,0.0,0.0,s_values,kernel_matrix.matrix,info);
00131 #endif /* HAVE_ARPACK */
00132         for (i=0; i<m_target_dim; i++)
00133         {
00134             for (j=0; j<N; j++)
00135                 new_feature_matrix[j*m_target_dim+i] = kernel_matrix[j*m_target_dim+i];
00136         }
00137     }
00138     else
00139     {
00140         SG_WARNING("LAPACK does not provide efficient routines to construct embedding (this may take time). Consider installing ARPACK.");
00141         wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info);
00142         for (i=0; i<m_target_dim; i++)
00143         {
00144             for (j=0; j<N; j++)
00145                 new_feature_matrix[j*m_target_dim+i] =
00146                     kernel_matrix[(m_target_dim-i-1)*N+j];
00147         }
00148     }
00149     if (info)
00150         SG_ERROR("Eigenproblem solving  failed with %d code", info);
00151 
00152     SG_FREE(s_values);
00153 
00154     return new CDenseFeatures<float64_t>(new_feature_matrix);
00155 }
00156 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation