MemoryMappedFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #ifndef __MEMORYMAPPEDFILE_H__
00012 #define __MEMORYMAPPEDFILE_H__
00013 
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/base/SGObject.h>
00016 
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <sys/mman.h>
00020 #include <sys/stat.h>
00021 #include <sys/types.h>
00022 #include <fcntl.h>
00023 #include <unistd.h>
00024 
00025 namespace shogun
00026 {
00031 template <class T> class CMemoryMappedFile : public CSGObject
00032 {
00033     public:
00035         CMemoryMappedFile() :CSGObject()
00036         {
00037             SG_UNSTABLE("CMemoryMappedFile::CMemoryMappedFile()",
00038                         "\n");
00039 
00040             fd = 0;
00041             length = 0;
00042             address = NULL;
00043             rw = 'r';
00044             last_written_byte = 0;
00045         }
00046 
00060         CMemoryMappedFile(const char* fname, char flag='r', int64_t fsize=0)
00061         : CSGObject()
00062         {
00063             last_written_byte=0;
00064             rw=flag;
00065 
00066             int open_flags;
00067             int mmap_prot;
00068             int mmap_flags;
00069 
00070             if (rw=='w')
00071             {
00072                 open_flags=O_RDWR | O_CREAT;
00073                 mmap_prot=PROT_READ|PROT_WRITE;
00074                 mmap_flags=MAP_SHARED;
00075             }
00076             else if (rw=='r')
00077             {
00078                 open_flags=O_RDONLY;
00079                 mmap_prot=PROT_READ;
00080                 mmap_flags=MAP_PRIVATE;
00081             }
00082             else
00083                 SG_ERROR("Unknown flags\n");
00084 
00085             fd = open(fname, open_flags, S_IRWXU | S_IRWXG | S_IRWXO);
00086             if (fd == -1)
00087                 SG_ERROR("Error opening file\n");
00088 
00089             if (rw=='w' && fsize)
00090             {
00091                 uint8_t byte=0;
00092                 if (lseek(fd, fsize, SEEK_SET) != fsize || write(fd, &byte, 1) != 1)
00093                     SG_ERROR("Error creating file of size %ld bytes\n", fsize);
00094             }
00095 
00096             struct stat sb;
00097             if (fstat(fd, &sb) == -1)
00098                 SG_ERROR("Error determining file size\n");
00099 
00100             length = sb.st_size;
00101             address = mmap(NULL, length, mmap_prot, mmap_flags, fd, 0);
00102             if (address == MAP_FAILED)
00103                 SG_ERROR("Error mapping file");
00104         }
00105 
00107         virtual ~CMemoryMappedFile()
00108         {
00109             munmap(address, length);
00110             if (rw=='w' && last_written_byte && ftruncate(fd, last_written_byte) == -1)
00111 
00112             {
00113                 close(fd);
00114                 SG_ERROR("Error Truncating file to %ld bytes\n", last_written_byte);
00115             }
00116             close(fd);
00117         }
00118 
00128         inline T* get_map()
00129         {
00130             return (T*) address;
00131         }
00132 
00137         uint64_t get_length()
00138         {
00139             return length/sizeof(T);
00140         }
00141 
00146         uint64_t get_size()
00147         {
00148             return length;
00149         }
00150 
00162         char* get_line(uint64_t& len, uint64_t& offs)
00163         {
00164             char* s = (char*) address;
00165             for (uint64_t i=offs; i<length; i++)
00166             {
00167                 if (s[i] == '\n')
00168                 {
00169                     char* line=&s[offs];
00170                     len=i-offs;
00171                     offs=i+1;
00172                     return line;
00173                 }
00174             }
00175 
00176             len=0;
00177             offs=length;
00178             return NULL;
00179         }
00180 
00191         void write_line(const char* line, uint64_t len, uint64_t& offs)
00192         {
00193             char* s = ((char*) address) + offs;
00194             if (len+1+offs > length)
00195                 SG_ERROR("Writing beyond size of file\n");
00196 
00197             for (uint64_t i=0; i<len; i++)
00198                 s[i] = line[i];
00199 
00200             s[len]='\n';
00201             offs+=length+1;
00202             last_written_byte=offs-1;
00203         }
00204 
00216         inline void set_truncate_size(uint64_t sz=0)
00217         {
00218             last_written_byte=sz;
00219         }
00220 
00225         int32_t get_num_lines()
00226         {
00227             char* s = (char*) address;
00228             int32_t linecount=0;
00229             for (uint64_t i=0; i<length; i++)
00230             {
00231                 if (s[i] == '\n')
00232                     linecount++;
00233             }
00234 
00235             return linecount;
00236         }
00237 
00245         inline T operator[](uint64_t index) const
00246         {
00247           return ((T*)address)[index];
00248         }
00249 
00257         inline T operator[](int32_t index) const
00258         {
00259           return ((T*)address)[index];
00260         }
00261         
00263         inline virtual const char* get_name() const { return "MemoryMappedFile"; }
00264 
00265     protected:
00267         int fd;
00269         uint64_t length;
00271         void* address;
00273         char rw;
00274 
00276         uint64_t last_written_byte;
00277 };
00278 }
00279 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation