IOBuffer.cpp

Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003   embodied in the content of this file are licensed under the BSD
00004   (revised) open source license.
00005 
00006   Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society.
00007 
00008   This program is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012 
00013   Shogun adjustments (w) 2011 Shashwat Lal Das
00014 */
00015 
00016 #include <string.h>
00017 #include <shogun/io/IOBuffer.h>
00018 
00019 using namespace shogun;
00020 
00021 CIOBuffer::CIOBuffer()
00022 {
00023     init();
00024 }
00025 
00026 CIOBuffer::CIOBuffer(int fd)
00027 {
00028     init();
00029     working_file = fd;
00030 }
00031 
00032 CIOBuffer::~CIOBuffer()
00033 {
00034 }
00035 
00036 void CIOBuffer::init()
00037 {
00038     size_t s = 1 << 16;
00039     space.reserve(s);
00040     endloaded = space.begin;
00041 }
00042 
00043 void CIOBuffer::use_file(int fd)
00044 {
00045     working_file = fd;
00046 }
00047 
00048 int CIOBuffer::open_file(const char* name, char flag)
00049 {
00050     int ret=1;
00051     switch(flag)
00052     {
00053     case 'r':
00054         working_file = open(name, O_RDONLY|O_LARGEFILE);
00055         break;
00056 
00057     case 'w':
00058         working_file = open(name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
00059         break;
00060 
00061     default:
00062         SG_ERROR("Unknown file operation. Something other than 'r'/'w' specified.\n");
00063         ret = 0;
00064     }
00065     return ret;
00066 }
00067 
00068 void CIOBuffer::reset_file()
00069 {
00070     lseek(working_file, 0, SEEK_SET);
00071     endloaded = space.begin;
00072     space.end = space.begin;
00073 }
00074 
00075 void CIOBuffer::set(char *p)
00076 {
00077     space.end = p;
00078 }
00079 
00080 ssize_t CIOBuffer::read_file(void* buf, size_t nbytes)
00081 {
00082     return read(working_file, buf, nbytes);
00083 }
00084 
00085 size_t CIOBuffer::fill()
00086 {
00087     if (space.end_array - endloaded == 0)
00088     {
00089         size_t offset = endloaded - space.begin;
00090         space.reserve(2 * (space.end_array - space.begin));
00091         endloaded = space.begin+offset;
00092     }
00093     ssize_t num_read = read_file(endloaded, space.end_array - endloaded);
00094     if (num_read >= 0)
00095     {
00096         endloaded = endloaded+num_read;
00097         return num_read;
00098     }
00099     else
00100         return 0;
00101 }
00102 
00103 ssize_t CIOBuffer::write_file(const void* buf, size_t nbytes)
00104 {
00105     return write(working_file, buf, nbytes);
00106 }
00107 
00108 void CIOBuffer::flush()
00109 {
00110     if (write_file(space.begin, space.index()) != (int) space.index())
00111         SG_ERROR("Error, failed to write example!\n");
00112     space.end = space.begin;
00113     fsync(working_file);
00114 }
00115 
00116 bool CIOBuffer::close_file()
00117 {
00118     if (working_file < 0)
00119         return false;
00120     else
00121     {
00122         int r = close(working_file);
00123         if (r < 0)
00124             SG_ERROR("Error closing the file!\n");
00125         return true;
00126     }
00127 }
00128 
00129 ssize_t CIOBuffer::readto(char* &pointer, char terminal)
00130 {
00131 //Return a pointer to the bytes before the terminal.  Must be less
00132 //than the buffer size.
00133     pointer = space.end;
00134     while (pointer != endloaded && *pointer != terminal)
00135         pointer++;
00136     if (pointer != endloaded)
00137     {
00138         size_t n = pointer - space.end;
00139         space.end = pointer+1;
00140         pointer -= n;
00141         return n;
00142     }
00143     else
00144     {
00145         if (endloaded == space.end_array)
00146         {
00147             size_t left = endloaded - space.end;
00148             memmove(space.begin, space.end, left);
00149             space.end = space.begin;
00150             endloaded = space.begin+left;
00151             pointer = endloaded;
00152         }
00153         if (fill() > 0)// more bytes are read.
00154             return readto(pointer,terminal);
00155         else //no more bytes to read, return nothing.
00156             return 0;
00157     }
00158 }
00159 
00160 void CIOBuffer::buf_write(char* &pointer, int n)
00161 {
00162     if (space.end + n <= space.end_array)
00163     {
00164         pointer = space.end;
00165         space.end += n;
00166     }
00167     else // Time to dump the file
00168     {
00169         if (space.end != space.begin)
00170             flush();
00171         else // Array is short, so increase size.
00172         {
00173             space.reserve(2 * (space.end_array - space.begin));
00174             endloaded = space.begin;
00175         }
00176         buf_write(pointer,n);
00177     }
00178 }
00179 
00180 unsigned int CIOBuffer::buf_read(char* &pointer, int n)
00181 {
00182     // Return a pointer to the next n bytes.
00183     // n must be smaller than the maximum size.
00184     if (space.end + n <= endloaded)
00185     {
00186         pointer = space.end;
00187         space.end += n;
00188         return n;
00189     }
00190     else // out of bytes, so refill.
00191     {
00192         if (space.end != space.begin) //There exists room to shift.
00193         {
00194             // Out of buffer so swap to beginning.
00195             int left = endloaded - space.end;
00196             memmove(space.begin, space.end, left);
00197             space.end = space.begin;
00198             endloaded = space.begin+left;
00199         }
00200         if (fill() > 0)
00201             return buf_read(pointer,n);// more bytes are read.
00202         else
00203         {
00204             // No more bytes to read, return all that we have left.
00205             pointer = space.end;
00206             space.end = endloaded;
00207             return endloaded - pointer;
00208         }
00209     }
00210 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation