Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/converter/DiffusionMaps.h>
00012 #include <shogun/converter/EmbeddingConverter.h>
00013 #include <shogun/lib/config.h>
00014 #ifdef HAVE_LAPACK
00015 #include <shogun/mathematics/arpack.h>
00016 #include <shogun/mathematics/lapack.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/kernel/GaussianKernel.h>
00019 #include <shogun/io/SGIO.h>
00020 #include <shogun/kernel/Kernel.h>
00021 #include <shogun/lib/Signal.h>
00022 #include <shogun/lib/Time.h>
00023
00024 using namespace shogun;
00025
00026 CDiffusionMaps::CDiffusionMaps() :
00027 CEmbeddingConverter()
00028 {
00029 m_t = 10;
00030 set_kernel(new CGaussianKernel(10,1.0));
00031
00032 init();
00033 }
00034
00035 void CDiffusionMaps::init()
00036 {
00037 SG_ADD(&m_t, "t", "number of steps", MS_AVAILABLE);
00038 }
00039
00040 CDiffusionMaps::~CDiffusionMaps()
00041 {
00042 }
00043
00044 void CDiffusionMaps::set_t(int32_t t)
00045 {
00046 m_t = t;
00047 }
00048
00049 int32_t CDiffusionMaps::get_t() const
00050 {
00051 return m_t;
00052 }
00053
00054 const char* CDiffusionMaps::get_name() const
00055 {
00056 return "DiffusionMaps";
00057 };
00058
00059 CFeatures* CDiffusionMaps::apply(CFeatures* features)
00060 {
00061 ASSERT(features);
00062
00063 SG_REF(features);
00064
00065 ASSERT(m_kernel);
00066 m_kernel->init(features,features);
00067 CDenseFeatures<float64_t>* embedding = embed_kernel(m_kernel);
00068 m_kernel->cleanup();
00069 SG_UNREF(features);
00070 return (CFeatures*)embedding;
00071 }
00072
00073 CDenseFeatures<float64_t>* CDiffusionMaps::embed_kernel(CKernel* kernel)
00074 {
00075 #ifdef HAVE_ARPACK
00076 bool use_arpack = true;
00077 #else
00078 bool use_arpack = false;
00079 #endif
00080 int32_t i,j;
00081 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix();
00082 ASSERT(kernel_matrix.num_rows==kernel_matrix.num_cols);
00083 int32_t N = kernel_matrix.num_rows;
00084
00085 float64_t* p_vector = SG_CALLOC(float64_t, N);
00086 for (i=0; i<N; i++)
00087 {
00088 for (j=0; j<N; j++)
00089 {
00090 p_vector[i] += kernel_matrix.matrix[j*N+i];
00091 }
00092 }
00093
00094 for (i=0; i<N; i++)
00095 {
00096 for (j=0; j<N; j++)
00097 {
00098 kernel_matrix.matrix[i*N+j] /= CMath::pow(p_vector[i]*p_vector[j], m_t);
00099 }
00100 }
00101
00102
00103 for (i=0; i<N; i++)
00104 {
00105 p_vector[i] = 0.0;
00106 for (j=0; j<N; j++)
00107 {
00108 p_vector[i] += kernel_matrix.matrix[j*N+i];
00109 }
00110 p_vector[i] = CMath::sqrt(p_vector[i]);
00111 }
00112
00113 for (i=0; i<N; i++)
00114 {
00115 for (j=0; j<N; j++)
00116 {
00117 kernel_matrix.matrix[i*N+j] /= p_vector[i]*p_vector[j];
00118 }
00119 }
00120
00121 SG_FREE(p_vector);
00122 float64_t* s_values = SG_MALLOC(float64_t, N);
00123
00124 int32_t info = 0;
00125 SGMatrix<float64_t> new_feature_matrix = SGMatrix<float64_t>(m_target_dim,N);
00126
00127 if (use_arpack)
00128 {
00129 #ifdef HAVE_ARPACK
00130 arpack_dsxupd(kernel_matrix.matrix,NULL,false,N,m_target_dim,"LA",false,1,false,true,0.0,0.0,s_values,kernel_matrix.matrix,info);
00131 #endif
00132 for (i=0; i<m_target_dim; i++)
00133 {
00134 for (j=0; j<N; j++)
00135 new_feature_matrix[j*m_target_dim+i] = kernel_matrix[j*m_target_dim+i];
00136 }
00137 }
00138 else
00139 {
00140 SG_WARNING("LAPACK does not provide efficient routines to construct embedding (this may take time). Consider installing ARPACK.");
00141 wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info);
00142 for (i=0; i<m_target_dim; i++)
00143 {
00144 for (j=0; j<N; j++)
00145 new_feature_matrix[j*m_target_dim+i] =
00146 kernel_matrix[(m_target_dim-i-1)*N+j];
00147 }
00148 }
00149 if (info)
00150 SG_ERROR("Eigenproblem solving failed with %d code", info);
00151
00152 SG_FREE(s_values);
00153
00154 return new CDenseFeatures<float64_t>(new_feature_matrix);
00155 }
00156 #endif