SHOGUN  6.1.3
StreamingDataFetcher.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2016 - 2017 Soumyajit De
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #include <algorithm>
32 #include <shogun/io/SGIO.h>
37 
38 using namespace shogun;
39 using namespace internal;
40 
41 StreamingDataFetcher::StreamingDataFetcher(CStreamingFeatures* samples)
42 : DataFetcher(), parser_running(false)
43 {
44  REQUIRE(samples!=nullptr, "Samples cannot be null!\n");
45  m_samples=samples;
46  SG_REF(m_samples);
47  m_num_samples=0;
48 }
49 
50 StreamingDataFetcher::~StreamingDataFetcher()
51 {
52  end();
53  SG_UNREF(m_samples);
54 }
55 
56 void StreamingDataFetcher::set_num_samples(index_t num_samples)
57 {
58  m_num_samples=num_samples;
59 }
60 
61 void StreamingDataFetcher::shuffle_features()
62 {
63 }
64 
65 void StreamingDataFetcher::unshuffle_features()
66 {
67 }
68 
69 void StreamingDataFetcher::use_fold(index_t i)
70 {
71 }
72 
73 void StreamingDataFetcher::init_active_subset()
74 {
75 }
76 
77 index_t StreamingDataFetcher::get_num_samples() const
78 {
79  if (train_test_mode)
80  {
81  if (train_mode)
82  return m_num_samples*train_test_ratio/(train_test_ratio+1);
83  else
84  return m_num_samples/(train_test_ratio+1);
85  }
86  return m_num_samples;
87 }
88 
89 void StreamingDataFetcher::start()
90 {
91  REQUIRE(get_num_samples()>0, "Number of samples is not set! It is MANDATORY for streaming features!\n");
92  if (m_block_details.m_full_data || m_block_details.m_blocksize>get_num_samples())
93  {
94  SG_SINFO("Fetching entire data (%d samples)!\n", get_num_samples());
95  m_block_details.with_blocksize(get_num_samples());
96  }
97  m_block_details.m_total_num_blocks=get_num_samples()/m_block_details.m_blocksize;
98  m_block_details.m_next_block_index=0;
99  if (!parser_running)
100  {
101  m_samples->start_parser();
102  parser_running=true;
103  }
104 }
105 
106 CFeatures* StreamingDataFetcher::next()
107 {
108  CFeatures* next_samples=nullptr;
109  // figure out how many samples to fetch in this burst
110  auto num_already_fetched=m_block_details.m_next_block_index*m_block_details.m_blocksize;
111  auto num_more_samples=get_num_samples()-num_already_fetched;
112  if (num_more_samples>0)
113  {
114  auto num_samples_this_burst=std::min(m_block_details.m_max_num_samples_per_burst, num_more_samples);
115  next_samples=m_samples->get_streamed_features(num_samples_this_burst);
116  SG_REF(next_samples);
117  m_block_details.m_next_block_index+=m_block_details.m_num_blocks_per_burst;
118  }
119  return next_samples;
120 }
121 
122 void StreamingDataFetcher::reset()
123 {
124  m_block_details.m_next_block_index=0;
125  m_samples->reset_stream();
126 }
127 
128 void StreamingDataFetcher::end()
129 {
130  if (parser_running)
131  {
132  m_samples->end_parser();
133  parser_running=false;
134  }
135 }
int32_t index_t
Definition: common.h:72
#define REQUIRE(x,...)
Definition: SGIO.h:181
#define SG_REF(x)
Definition: SGObject.h:52
#define SG_UNREF(x)
Definition: SGObject.h:53
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:69
#define SG_SINFO(...)
Definition: SGIO.h:158
Streaming features are features which are used for online algorithms.

SHOGUN Machine Learning Toolbox - Documentation