00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Berlin Institute of Technology 00009 */ 00010 00011 #ifndef _CSTRINGFILEFEATURES__H__ 00012 #define _CSTRINGFILEFEATURES__H__ 00013 00014 #include <shogun/features/StringFeatures.h> 00015 #include <shogun/features/Alphabet.h> 00016 #include <shogun/io/MemoryMappedFile.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/io/SGIO.h> 00019 00020 namespace shogun 00021 { 00022 class CAlphabet; 00023 template <class T> class CMemoryMappedFile; 00024 00034 template <class ST> class CStringFileFeatures : public CStringFeatures<ST> 00035 { 00036 public: 00037 00041 CStringFileFeatures(); 00042 00048 CStringFileFeatures(const char* fname, EAlphabet alpha); 00049 00053 virtual ~CStringFileFeatures(); 00054 00055 protected: 00070 ST* get_line(uint64_t& len, uint64_t& offs, int32_t& line_nr, uint64_t file_length); 00071 00073 virtual void cleanup(); 00074 00076 virtual void cleanup_feature_vector(int32_t num); 00077 00082 void fetch_meta_info_from_file(int32_t granularity=1048576); 00083 00084 protected: 00086 CMemoryMappedFile<ST>* file; 00087 }; 00088 } 00089 #endif // _CSTRINGFILEFEATURES__H__