00001 #include <shogun/preprocessor/DimensionReductionPreprocessor.h> 00002 #include <shogun/converter/EmbeddingConverter.h> 00003 #include <shogun/kernel/LinearKernel.h> 00004 #include <shogun/distance/EuclidianDistance.h> 00005 00006 using namespace shogun; 00007 00008 namespace shogun 00009 { 00010 CDimensionReductionPreprocessor::CDimensionReductionPreprocessor() 00011 : CSimplePreprocessor<float64_t>() 00012 { 00013 m_target_dim = 1; 00014 m_distance = new CEuclidianDistance(); 00015 m_kernel = new CLinearKernel(); 00016 m_converter = NULL; 00017 00018 init(); 00019 } 00020 00021 CDimensionReductionPreprocessor::CDimensionReductionPreprocessor(CEmbeddingConverter* converter) 00022 : CSimplePreprocessor<float64_t>() 00023 { 00024 SG_REF(converter); 00025 m_target_dim = 1; 00026 m_distance = new CEuclidianDistance(); 00027 m_kernel = new CLinearKernel(); 00028 m_converter = converter; 00029 00030 init(); 00031 } 00032 00033 CDimensionReductionPreprocessor::~CDimensionReductionPreprocessor() 00034 { 00035 SG_UNREF(m_distance); 00036 SG_UNREF(m_kernel); 00037 SG_UNREF(m_converter); 00038 } 00039 00040 SGMatrix<float64_t> CDimensionReductionPreprocessor::apply_to_feature_matrix(CFeatures* features) 00041 { 00042 if (m_converter) 00043 { 00044 m_converter->set_target_dim(m_target_dim); 00045 CSimpleFeatures<float64_t>* embedding = m_converter->embed(features); 00046 SGMatrix<float64_t> embedding_feature_matrix = embedding->steal_feature_matrix(); 00047 ((CSimpleFeatures<float64_t>*)features)->set_feature_matrix(embedding_feature_matrix); 00048 delete embedding; 00049 return embedding_feature_matrix; 00050 } 00051 else 00052 { 00053 SG_WARNING("Converter to process was not set.\n"); 00054 return ((CSimpleFeatures<float64_t>*)features)->get_feature_matrix(); 00055 } 00056 } 00057 00058 bool CDimensionReductionPreprocessor::init(CFeatures* data) 00059 { 00060 return true; 00061 } 00062 00063 void CDimensionReductionPreprocessor::cleanup() 00064 { 00065 00066 } 00067 00068 EPreprocessorType CDimensionReductionPreprocessor::get_type() const { return P_DIMENSIONREDUCTIONPREPROCESSOR; }; 00069 00070 void CDimensionReductionPreprocessor::set_target_dim(int32_t dim) 00071 { 00072 ASSERT(dim>0); 00073 m_target_dim = dim; 00074 } 00075 00076 int32_t CDimensionReductionPreprocessor::get_target_dim() const 00077 { 00078 return m_target_dim; 00079 } 00080 00081 void CDimensionReductionPreprocessor::set_distance(CDistance* distance) 00082 { 00083 SG_UNREF(m_distance); 00084 SG_REF(distance); 00085 m_distance = distance; 00086 } 00087 00088 CDistance* CDimensionReductionPreprocessor::get_distance() const 00089 { 00090 SG_REF(m_distance); 00091 return m_distance; 00092 } 00093 00094 void CDimensionReductionPreprocessor::set_kernel(CKernel* kernel) 00095 { 00096 SG_UNREF(m_kernel); 00097 SG_REF(kernel); 00098 m_kernel = kernel; 00099 } 00100 00101 CKernel* CDimensionReductionPreprocessor::get_kernel() const 00102 { 00103 SG_REF(m_kernel); 00104 return m_kernel; 00105 } 00106 00107 void CDimensionReductionPreprocessor::init() 00108 { 00109 this->m_parameters->add((CSGObject**)&m_converter, "converter", 00110 "embedding converter used to apply to data"); 00111 this->m_parameters->add(&m_target_dim, "target_dim", 00112 "target dimensionality of preprocessor"); 00113 this->m_parameters->add((CSGObject**)&m_distance, "distance", 00114 "distance to be used for embedding"); 00115 this->m_parameters->add((CSGObject**)&m_kernel, "kernel", 00116 "kernel to be used for embedding"); 00117 } 00118 }