SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UAIFile.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Abinash Panda
8  */
9 
10 #include <shogun/io/UAIFile.h>
11 
12 #include <shogun/lib/SGVector.h>
13 #include <shogun/lib/SGMatrix.h>
14 
15 using namespace shogun;
16 
18 {
19  init();
20 }
21 
22 CUAIFile::CUAIFile(FILE* f, const char* name) :
23  CFile(f, name)
24 {
25  init();
26  init_with_defaults();
27 }
28 
29 CUAIFile::CUAIFile(int fd, const char* mode, const char* name) :
30  CFile(fd, mode, name)
31 {
32  init();
33  init_with_defaults();
34 }
35 
36 CUAIFile::CUAIFile(const char* fname, char rw, const char* name) :
37  CFile(fname, rw, name)
38 {
39  init();
40  init_with_defaults();
41 }
42 
44 {
49 
50  SG_FREE(m_factors_table);
51  SG_FREE(m_factors_scope);
52 }
53 
54 void CUAIFile::init()
55 {
56  SG_ADD((CSGObject**)&m_line_reader, "line_reader", "line reader used to read lines from file", MS_NOT_AVAILABLE);
57  SG_ADD((CSGObject**)&m_parser, "parser", "parser used to parse file", MS_NOT_AVAILABLE);
58  SG_ADD((CSGObject**)&m_line_tokenizer, "line_tokenizer", "line tokenizer used to parse file", MS_NOT_AVAILABLE);
59  SG_ADD((CSGObject**)&m_tokenizer, "tokenizer", "tokenizer used to parse file", MS_NOT_AVAILABLE);
60  SG_ADD(&m_delimiter, "delimiter", "delimiter used in get_vector function", MS_NOT_AVAILABLE);
61 
62  SG_ADD(&m_num_vars, "num_vars", "number of variables", MS_NOT_AVAILABLE);
63  SG_ADD(&m_num_factors, "num_factors", "number of factors", MS_NOT_AVAILABLE);
64  SG_ADD(&m_net_type, "net_type", "network type (either BAYES or MARKOV)", MS_NOT_AVAILABLE);
65  SG_ADD(&m_vars_card, "vars_card", "cardinality of all the variables", MS_NOT_AVAILABLE);
66 
73  m_delimiter = ' ';
74  m_tokenizer = NULL;
75  m_line_tokenizer = NULL;
76  m_parser = NULL;
77  m_line_reader = NULL;
78 
79  m_num_vars = 0;
80  m_num_factors = 0;
81  m_factors_table = NULL;
82  m_factors_scope = NULL;
83 }
84 
85 void CUAIFile::init_with_defaults()
86 {
87  m_delimiter=' ';
88 
92 
96 
97  m_parser=new CParser();
100 
103 }
104 
105 #define GET_VECTOR(read_func, sg_type) \
106 void CUAIFile::get_vector(sg_type*& vector, int32_t& len) \
107 { \
108  if (!m_line_reader->has_next()) \
109  return; \
110  \
111  SGVector<char> line; \
112  int32_t num_elements = 0; \
113  \
114  line = m_line_reader->read_line(); \
115  m_tokenizer->set_text(line); \
116  while (m_tokenizer->has_next()) \
117  { \
118  int32_t temp_start; \
119  m_tokenizer->next_token_idx(temp_start); \
120  num_elements++; \
121  } \
122  \
123  vector = SG_MALLOC(sg_type, num_elements); \
124  m_parser->set_text(line); \
125  for (int32_t i=0; i<num_elements; i++) \
126  vector[i] = m_parser->read_func(); \
127  len = num_elements; \
128 }
129 
130 GET_VECTOR(read_char, int8_t)
131 GET_VECTOR(read_byte, uint8_t)
132 GET_VECTOR(read_char, char)
133 GET_VECTOR(read_int, int32_t)
134 GET_VECTOR(read_uint, uint32_t)
135 GET_VECTOR(read_short_real, float32_t)
136 GET_VECTOR(read_real, float64_t)
137 GET_VECTOR(read_long_real, floatmax_t)
138 GET_VECTOR(read_short, int16_t)
139 GET_VECTOR(read_word, uint16_t)
140 GET_VECTOR(read_long, int64_t)
141 GET_VECTOR(read_ulong, uint64_t)
142 #undef GET_VECTOR
143 
144 #define SET_VECTOR(format, sg_type) \
145 void CUAIFile::set_vector(const sg_type* vector, int32_t len) \
146 { \
147  SG_SET_LOCALE_C; \
148  \
149  int32_t i; \
150  for (i=0; i<len-1; i++) \
151  fprintf(file, "%" format "%c", vector[i], m_delimiter); \
152  fprintf(file, "%" format "\n", vector[i]); \
153  \
154  SG_RESET_LOCALE; \
155 }
156 
157 SET_VECTOR(SCNi8, int8_t)
158 SET_VECTOR(SCNu8, uint8_t)
159 SET_VECTOR(SCNu8, char)
160 SET_VECTOR(SCNi32, int32_t)
161 SET_VECTOR(SCNu32, uint32_t)
162 SET_VECTOR(SCNi64, int64_t)
163 SET_VECTOR(SCNu64, uint64_t)
164 SET_VECTOR(".16g", float32_t)
165 SET_VECTOR(".16g", float64_t)
166 SET_VECTOR(".16Lg", floatmax_t)
167 SET_VECTOR(SCNi16, int16_t)
168 SET_VECTOR(SCNu16, uint16_t)
169 #undef SET_VECTOR
170 
172 {
173  if (!file)
174  SG_SERROR("No file specified");
175 
176  SGVector<char> line, n_type;
177 
178  line = m_line_reader->read_line();
179  m_parser->set_text(line);
181 
182  line = m_line_reader->read_line();
183  m_parser->set_text(line);
185 
187 
188  line = m_line_reader->read_line();
189  m_parser->set_text(line);
191 
193  for (int32_t i=0; i<m_num_factors; i++)
194  {
195  int32_t num_elems;
196  line = m_line_reader->read_line();
197  m_parser->set_text(line);
198  num_elems = m_parser->read_int();
199  SGVector<int32_t> vars_index(num_elems);
200  for (int32_t j=0; j<num_elems; j++)
201  vars_index[j] = m_parser->read_int();
202  m_factors_scope[i] = vars_index;
203  }
204 
206  for (int32_t i=0; i<m_num_factors; i++)
207  {
208  int32_t data_size;
209  line=m_line_reader->read_line();
210  m_parser->set_text(line);
211  data_size = m_parser->read_int();
212  SGVector<float64_t> data;
213  get_vector(data.vector, data.vlen);
214  if (data_size != data.vlen)
215  SG_SERROR("Data size mismatch. Expected %d size data; \
216  got %d size data\n", data_size, data.vlen);
217  m_factors_table[i] = data;
218  }
219 }
220 
221 void CUAIFile::set_net_type(const char* net_type)
222 {
223  REQUIRE ((strncmp(net_type, "BAYES", 5) == 0 || strncmp(net_type, "MARKOV", 6) == 0),
224  "Network type should be either MARKOV or BAYES");
225 
226  m_net_type = SGVector<char>(strlen(net_type));
227  for (uint32_t i=0; i<strlen(net_type); i++)
228  m_net_type[i] = net_type[i];
229 
230  fprintf(file, "%s\n", net_type);
231 }
232 
233 void CUAIFile::set_num_vars(int32_t num_vars)
234 {
235  m_num_vars = num_vars;
236  fprintf(file, "%d\n", num_vars);
237 }
238 
240 {
241  REQUIRE (m_num_vars == vars_card.vlen,
242  "Variables mismatch. Expected %d variables, got %d variables",
243  m_num_vars, vars_card.vlen);
244 
245  m_vars_card = vars_card;
246  set_vector(vars_card.vector, vars_card.vlen);
247 }
248 
249 void CUAIFile::set_num_factors(int32_t num_factors)
250 {
251  m_num_factors = num_factors;
252  fprintf(file, "%d\n", num_factors);
253 }
254 
255 void CUAIFile::set_factors_scope(int num_factors,
256  const SGVector<int32_t>* factors_scope)
257 {
258  REQUIRE(num_factors == m_num_factors, "Factors mismatch. Expected %d factors; \
259  got %d factors", m_num_factors, num_factors)
260 
262  for (int32_t i=0; i<m_num_factors; i++)
263  {
264  SGVector<int32_t> scope = factors_scope[i];
265  m_factors_scope[i] = scope;
266  fprintf(file, "%d ", scope.vlen);
267  for (int32_t j=0; j<scope.vlen; j++)
268  fprintf(file, "%d ", scope[j]);
269  fprintf(file, "\n");
270  }
271 }
272 
273 void CUAIFile::set_factors_table(int32_t num_factors,
274  const SGVector<float64_t>* factors_table)
275 {
276  REQUIRE(num_factors == m_num_factors, "Factors mismatch. Expected %d factors; \
277  got %d factors", m_num_factors, num_factors);
278 
280  for (int32_t i=0; i<m_num_factors; i++)
281  {
282  fprintf(file, "\n");
283  SGVector<float64_t> data = factors_table[i];
284  m_factors_table[i] = data;
285  fprintf(file, "%d\n", data.size());
286  set_vector(data.vector, data.vlen);
287  }
288 }
289 
291  int32_t& num_vars,
292  SGVector<int32_t>& vars_card,
293  int32_t& num_factors,
294  SGVector<int32_t>*& factors_scope)
295 {
296  net_type = m_net_type;
297  num_vars = m_num_vars;
298  vars_card = m_vars_card;
299  num_factors = m_num_factors;
300 
301  factors_scope = new SGVector<int32_t> [m_num_factors];
302  for (int32_t i=0; i<m_num_factors; i++)
303  factors_scope[i] = m_factors_scope[i];
304 }
305 
307 {
308  factors_table = new SGVector<float64_t> [m_num_factors];
309  for (int32_t i=0; i<m_num_factors; i++)
310  factors_table[i] = m_factors_table[i];
311 }
312 

SHOGUN Machine Learning Toolbox - Documentation