IOBuffer.cpp

Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003   embodied in the content of this file are licensed under the BSD
00004   (revised) open source license.
00005 
00006   Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society.
00007 
00008   This program is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012 
00013   Shogun adjustments (w) 2011 Shashwat Lal Das
00014 */
00015 
00016 #include <string.h>
00017 #include <shogun/io/IOBuffer.h>
00018 
00019 using namespace shogun;
00020 
00021 CIOBuffer::CIOBuffer()
00022 {
00023     init();
00024 }
00025 
00026 CIOBuffer::CIOBuffer(int fd)
00027 {
00028     init();
00029     working_file = fd;
00030 }
00031 
00032 CIOBuffer::~CIOBuffer()
00033 {
00034     free(space.begin);
00035 }
00036 
00037 void CIOBuffer::init()
00038 {
00039     size_t s = 1 << 16;
00040     space.reserve(s);
00041     endloaded = space.begin;
00042 }
00043 
00044 void CIOBuffer::use_file(int fd)
00045 {
00046     working_file = fd;
00047 }
00048 
00049 int CIOBuffer::open_file(const char* name, char flag)
00050 {
00051     int ret=1;
00052     switch(flag)
00053     {
00054     case 'r':
00055         working_file = open(name, O_RDONLY|O_LARGEFILE);
00056         break;
00057 
00058     case 'w':
00059         working_file = open(name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
00060         break;
00061 
00062     default:
00063         SG_ERROR("Unknown file operation. Something other than 'r'/'w' specified.\n");
00064         ret = 0;
00065     }
00066     return ret;
00067 }
00068 
00069 void CIOBuffer::reset_file()
00070 {
00071     lseek(working_file, 0, SEEK_SET);
00072     endloaded = space.begin;
00073     space.end = space.begin;
00074 }
00075 
00076 void CIOBuffer::set(char *p)
00077 {
00078     space.end = p;
00079 }
00080 
00081 ssize_t CIOBuffer::read_file(void* buf, size_t nbytes)
00082 {
00083     return read(working_file, buf, nbytes);
00084 }
00085 
00086 size_t CIOBuffer::fill()
00087 {
00088     if (space.end_array - endloaded == 0)
00089     {
00090         size_t offset = endloaded - space.begin;
00091         space.reserve(2 * (space.end_array - space.begin));
00092         endloaded = space.begin+offset;
00093     }
00094     ssize_t num_read = read_file(endloaded, space.end_array - endloaded);
00095     if (num_read >= 0)
00096     {
00097         endloaded = endloaded+num_read;
00098         return num_read;
00099     }
00100     else
00101         return 0;
00102 }
00103 
00104 ssize_t CIOBuffer::write_file(const void* buf, size_t nbytes)
00105 {
00106     return write(working_file, buf, nbytes);
00107 }
00108 
00109 void CIOBuffer::flush()
00110 {
00111     if (write_file(space.begin, space.index()) != (int) space.index())
00112         SG_ERROR("Error, failed to write example!\n");
00113     space.end = space.begin;
00114     fsync(working_file);
00115 }
00116 
00117 bool CIOBuffer::close_file()
00118 {
00119     if (working_file < 0)
00120         return false;
00121     else
00122     {
00123         int r = close(working_file);
00124         if (r < 0)
00125             SG_ERROR("Error closing the file!\n");
00126         return true;
00127     }
00128 }
00129 
00130 ssize_t CIOBuffer::readto(char* &pointer, char terminal)
00131 {
00132 //Return a pointer to the bytes before the terminal.  Must be less
00133 //than the buffer size.
00134     pointer = space.end;
00135     while (pointer != endloaded && *pointer != terminal)
00136         pointer++;
00137     if (pointer != endloaded)
00138     {
00139         size_t n = pointer - space.end;
00140         space.end = pointer+1;
00141         pointer -= n;
00142         return n;
00143     }
00144     else
00145     {
00146         if (endloaded == space.end_array)
00147         {
00148             size_t left = endloaded - space.end;
00149             memmove(space.begin, space.end, left);
00150             space.end = space.begin;
00151             endloaded = space.begin+left;
00152             pointer = endloaded;
00153         }
00154         if (fill() > 0)// more bytes are read.
00155             return readto(pointer,terminal);
00156         else //no more bytes to read, return nothing.
00157             return 0;
00158     }
00159 }
00160 
00161 void CIOBuffer::buf_write(char* &pointer, int n)
00162 {
00163     if (space.end + n <= space.end_array)
00164     {
00165         pointer = space.end;
00166         space.end += n;
00167     }
00168     else // Time to dump the file
00169     {
00170         if (space.end != space.begin)
00171             flush();
00172         else // Array is short, so increase size.
00173         {
00174             space.reserve(2 * (space.end_array - space.begin));
00175             endloaded = space.begin;
00176         }
00177         buf_write(pointer,n);
00178     }
00179 }
00180 
00181 unsigned int CIOBuffer::buf_read(char* &pointer, int n)
00182 {
00183     // Return a pointer to the next n bytes.
00184     // n must be smaller than the maximum size.
00185     if (space.end + n <= endloaded)
00186     {
00187         pointer = space.end;
00188         space.end += n;
00189         return n;
00190     }
00191     else // out of bytes, so refill.
00192     {
00193         if (space.end != space.begin) //There exists room to shift.
00194         {
00195             // Out of buffer so swap to beginning.
00196             int left = endloaded - space.end;
00197             memmove(space.begin, space.end, left);
00198             space.end = space.begin;
00199             endloaded = space.begin+left;
00200         }
00201         if (fill() > 0)
00202             return buf_read(pointer,n);// more bytes are read.
00203         else
00204         {
00205             // No more bytes to read, return all that we have left.
00206             pointer = space.end;
00207             space.end = endloaded;
00208             return endloaded - pointer;
00209         }
00210     }
00211 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation