DiffusionMaps.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Sergey Lisitsyn
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/converter/DiffusionMaps.h>
00012 #include <shogun/converter/EmbeddingConverter.h>
00013 #include <shogun/lib/config.h>
00014 #ifdef HAVE_LAPACK
00015 #include <shogun/mathematics/lapack.h>
00016 #include <shogun/mathematics/Math.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/kernel/Kernel.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/lib/Time.h>
00021 
00022 using namespace shogun;
00023 
00024 CDiffusionMaps::CDiffusionMaps() :
00025         CEmbeddingConverter()
00026 {
00027     m_t = 10;
00028     
00029     init();
00030 }
00031 
00032 void CDiffusionMaps::init()
00033 {
00034     m_parameters->add(&m_t, "t", "number of steps");
00035 }
00036 
00037 CDiffusionMaps::~CDiffusionMaps()
00038 {
00039 }
00040 
00041 void CDiffusionMaps::set_t(int32_t t)
00042 {
00043     m_t = t;
00044 }
00045 
00046 int32_t CDiffusionMaps::get_t() const
00047 {
00048     return m_t;
00049 }
00050 
00051 const char* CDiffusionMaps::get_name() const
00052 {
00053     return "DiffusionMaps";
00054 };
00055 
00056 CFeatures* CDiffusionMaps::apply(CFeatures* features)
00057 {
00058     ASSERT(features);
00059     // shorthand for simplefeatures
00060     SG_REF(features);
00061     // compute distance matrix
00062     ASSERT(m_kernel);
00063     m_kernel->init(features,features);
00064     CSimpleFeatures<float64_t>* embedding = embed_kernel(m_kernel);
00065     m_kernel->cleanup();
00066     SG_UNREF(features);
00067     return (CFeatures*)embedding;
00068 }
00069 
00070 CSimpleFeatures<float64_t>* CDiffusionMaps::embed_kernel(CKernel* kernel)
00071 {
00072     int32_t i,j;
00073     SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
00074     ASSERT(kernel_matrix.num_rows==kernel_matrix.num_cols);
00075     int32_t N = kernel_matrix.num_rows;
00076 
00077     float64_t* p_vector = SG_CALLOC(float64_t, N);
00078     for (i=0; i<N; i++)
00079     {
00080         for (j=0; j<N; j++)
00081         {
00082             p_vector[i] += kernel_matrix.matrix[j*N+i];
00083         }
00084     }
00085 
00086     float64_t* p_matrix = SG_CALLOC(float64_t, N*N);
00087     cblas_dger(CblasColMajor,N,N,1.0,p_vector,1,p_vector,1,p_matrix,N);
00088     for (i=0; i<N*N; i++)
00089     {
00090         kernel_matrix.matrix[i] /= CMath::pow(p_matrix[i], m_t);
00091     }
00092     SG_FREE(p_matrix);
00093 
00094     for (i=0; i<N; i++)
00095     {
00096         p_vector[i] = 0.0;
00097         for (j=0; j<N; j++)
00098         {
00099             p_vector[i] += kernel_matrix.matrix[j*N+i];
00100         }
00101         p_vector[i] = CMath::sqrt(p_vector[i]);
00102     }
00103     float64_t ppt = cblas_ddot(N,p_vector,1,p_vector,1);
00104     SG_FREE(p_vector);
00105 
00106     for (i=0; i<N*N; i++)
00107     {
00108         kernel_matrix.matrix[i] /= ppt;
00109     }
00110 
00111 
00112     float64_t* s_values = SG_MALLOC(float64_t, N);
00113 
00114     float64_t* kkt_matrix = SG_MALLOC(float64_t, N*N);
00115 
00116     cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,
00117                 N,N,N,
00118                 1.0,kernel_matrix.matrix,N,
00119                 kernel_matrix.matrix,N,
00120                 0.0,kkt_matrix,N);
00121 
00122     int32_t info = 0;
00123 
00124     wrap_dsyevr('V','U',N,kkt_matrix,N,N-m_target_dim,N,s_values,kernel_matrix.matrix,&info);
00125     if (info)
00126         SG_ERROR("DGESVD failed with %d code", info);
00127 
00128     SG_FREE(s_values);
00129 /*
00130     int32_t info = 0;
00131     wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info);
00132 */
00133 
00134     SG_FREE(kkt_matrix);
00135     SGMatrix<float64_t> new_feature_matrix = SGMatrix<float64_t>(m_target_dim,N);
00136 
00137     for (i=0; i<m_target_dim; i++)
00138     {
00139         for (j=0; j<N; j++)
00140             new_feature_matrix[j*m_target_dim+i] = kernel_matrix.matrix[(m_target_dim-i-1)*N+j]/kernel_matrix.matrix[(m_target_dim)*N+j];
00141     }
00142     kernel_matrix.destroy_matrix();
00143 
00144     return new CSimpleFeatures<float64_t>(new_feature_matrix);
00145 }
00146 #endif /* HAVE_LAPACK */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation