SHOGUN  4.1.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
UAIFile.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Abinash Panda
8  */
9 
10 #include <shogun/io/UAIFile.h>
11 
12 #include <shogun/lib/SGVector.h>
13 #include <shogun/lib/SGMatrix.h>
14 
15 using namespace shogun;
16 
18 {
19  init();
20 }
21 
22 CUAIFile::CUAIFile(FILE* f, const char* name) :
23  CFile(f, name)
24 {
25  init();
26  init_with_defaults();
27 }
28 
29 #ifdef HAVE_FDOPEN
30 CUAIFile::CUAIFile(int fd, const char* mode, const char* name) :
31  CFile(fd, mode, name)
32 {
33  init();
34  init_with_defaults();
35 }
36 #endif
37 
38 CUAIFile::CUAIFile(const char* fname, char rw, const char* name) :
39  CFile(fname, rw, name)
40 {
41  init();
42  init_with_defaults();
43 }
44 
46 {
51 
52  SG_FREE(m_factors_table);
53  SG_FREE(m_factors_scope);
54 }
55 
56 void CUAIFile::init()
57 {
58  SG_ADD((CSGObject**)&m_line_reader, "line_reader", "line reader used to read lines from file", MS_NOT_AVAILABLE);
59  SG_ADD((CSGObject**)&m_parser, "parser", "parser used to parse file", MS_NOT_AVAILABLE);
60  SG_ADD((CSGObject**)&m_line_tokenizer, "line_tokenizer", "line tokenizer used to parse file", MS_NOT_AVAILABLE);
61  SG_ADD((CSGObject**)&m_tokenizer, "tokenizer", "tokenizer used to parse file", MS_NOT_AVAILABLE);
62  SG_ADD(&m_delimiter, "delimiter", "delimiter used in get_vector function", MS_NOT_AVAILABLE);
63 
64  SG_ADD(&m_num_vars, "num_vars", "number of variables", MS_NOT_AVAILABLE);
65  SG_ADD(&m_num_factors, "num_factors", "number of factors", MS_NOT_AVAILABLE);
66  SG_ADD(&m_net_type, "net_type", "network type (either BAYES or MARKOV)", MS_NOT_AVAILABLE);
67  SG_ADD(&m_vars_card, "vars_card", "cardinality of all the variables", MS_NOT_AVAILABLE);
68 
75  m_delimiter = ' ';
76  m_tokenizer = NULL;
77  m_line_tokenizer = NULL;
78  m_parser = NULL;
79  m_line_reader = NULL;
80 
81  m_num_vars = 0;
82  m_num_factors = 0;
83  m_factors_table = NULL;
84  m_factors_scope = NULL;
85 }
86 
87 void CUAIFile::init_with_defaults()
88 {
89  m_delimiter=' ';
90 
94 
98 
99  m_parser=new CParser();
101  SG_REF(m_parser);
102 
105 }
106 
107 #define GET_VECTOR(read_func, sg_type) \
108 void CUAIFile::get_vector(sg_type*& vector, int32_t& len) \
109 { \
110  if (!m_line_reader->has_next()) \
111  return; \
112  \
113  SGVector<char> line; \
114  int32_t num_elements = 0; \
115  \
116  line = m_line_reader->read_line(); \
117  m_tokenizer->set_text(line); \
118  while (m_tokenizer->has_next()) \
119  { \
120  int32_t temp_start; \
121  m_tokenizer->next_token_idx(temp_start); \
122  num_elements++; \
123  } \
124  \
125  vector = SG_MALLOC(sg_type, num_elements); \
126  m_parser->set_text(line); \
127  for (int32_t i=0; i<num_elements; i++) \
128  vector[i] = m_parser->read_func(); \
129  len = num_elements; \
130 }
131 
132 GET_VECTOR(read_char, int8_t)
133 GET_VECTOR(read_byte, uint8_t)
134 GET_VECTOR(read_char, char)
135 GET_VECTOR(read_int, int32_t)
136 GET_VECTOR(read_uint, uint32_t)
137 GET_VECTOR(read_short_real, float32_t)
138 GET_VECTOR(read_real, float64_t)
139 GET_VECTOR(read_long_real, floatmax_t)
140 GET_VECTOR(read_short, int16_t)
141 GET_VECTOR(read_word, uint16_t)
142 GET_VECTOR(read_long, int64_t)
143 GET_VECTOR(read_ulong, uint64_t)
144 #undef GET_VECTOR
145 
146 #define SET_VECTOR(format, sg_type) \
147 void CUAIFile::set_vector(const sg_type* vector, int32_t len) \
148 { \
149  SG_SET_LOCALE_C; \
150  \
151  int32_t i; \
152  for (i=0; i<len-1; i++) \
153  fprintf(file, "%" format "%c", vector[i], m_delimiter); \
154  fprintf(file, "%" format "\n", vector[i]); \
155  \
156  SG_RESET_LOCALE; \
157 }
158 
159 SET_VECTOR(SCNi8, int8_t)
160 SET_VECTOR(SCNu8, uint8_t)
161 SET_VECTOR(SCNu8, char)
162 SET_VECTOR(SCNi32, int32_t)
163 SET_VECTOR(SCNu32, uint32_t)
164 SET_VECTOR(SCNi64, int64_t)
165 SET_VECTOR(SCNu64, uint64_t)
166 SET_VECTOR(".16g", float32_t)
167 SET_VECTOR(".16g", float64_t)
168 SET_VECTOR(".16Lg", floatmax_t)
169 SET_VECTOR(SCNi16, int16_t)
170 SET_VECTOR(SCNu16, uint16_t)
171 #undef SET_VECTOR
172 
174 {
175  if (!file)
176  SG_SERROR("No file specified");
177 
178  SGVector<char> line, n_type;
179 
180  line = m_line_reader->read_line();
181  m_parser->set_text(line);
183 
184  line = m_line_reader->read_line();
185  m_parser->set_text(line);
187 
189 
190  line = m_line_reader->read_line();
191  m_parser->set_text(line);
193 
195  for (int32_t i=0; i<m_num_factors; i++)
196  {
197  int32_t num_elems;
198  line = m_line_reader->read_line();
199  m_parser->set_text(line);
200  num_elems = m_parser->read_int();
201  SGVector<int32_t> vars_index(num_elems);
202  for (int32_t j=0; j<num_elems; j++)
203  vars_index[j] = m_parser->read_int();
204  m_factors_scope[i] = vars_index;
205  }
206 
208  for (int32_t i=0; i<m_num_factors; i++)
209  {
210  int32_t data_size;
211  line=m_line_reader->read_line();
212  m_parser->set_text(line);
213  data_size = m_parser->read_int();
214  SGVector<float64_t> data;
215  get_vector(data.vector, data.vlen);
216  if (data_size != data.vlen)
217  SG_SERROR("Data size mismatch. Expected %d size data; \
218  got %d size data\n", data_size, data.vlen);
219  m_factors_table[i] = data;
220  }
221 }
222 
223 void CUAIFile::set_net_type(const char* net_type)
224 {
225  REQUIRE ((strncmp(net_type, "BAYES", 5) == 0 || strncmp(net_type, "MARKOV", 6) == 0),
226  "Network type should be either MARKOV or BAYES");
227 
228  m_net_type = SGVector<char>(strlen(net_type));
229  for (uint32_t i=0; i<strlen(net_type); i++)
230  m_net_type[i] = net_type[i];
231 
232  fprintf(file, "%s\n", net_type);
233 }
234 
235 void CUAIFile::set_num_vars(int32_t num_vars)
236 {
237  m_num_vars = num_vars;
238  fprintf(file, "%d\n", num_vars);
239 }
240 
242 {
243  REQUIRE (m_num_vars == vars_card.vlen,
244  "Variables mismatch. Expected %d variables, got %d variables",
245  m_num_vars, vars_card.vlen);
246 
247  m_vars_card = vars_card;
248  set_vector(vars_card.vector, vars_card.vlen);
249 }
250 
251 void CUAIFile::set_num_factors(int32_t num_factors)
252 {
253  m_num_factors = num_factors;
254  fprintf(file, "%d\n", num_factors);
255 }
256 
257 void CUAIFile::set_factors_scope(int num_factors,
258  const SGVector<int32_t>* factors_scope)
259 {
260  REQUIRE(num_factors == m_num_factors, "Factors mismatch. Expected %d factors; \
261  got %d factors", m_num_factors, num_factors)
262 
264  for (int32_t i=0; i<m_num_factors; i++)
265  {
266  SGVector<int32_t> scope = factors_scope[i];
267  m_factors_scope[i] = scope;
268  fprintf(file, "%d ", scope.vlen);
269  for (int32_t j=0; j<scope.vlen; j++)
270  fprintf(file, "%d ", scope[j]);
271  fprintf(file, "\n");
272  }
273 }
274 
275 void CUAIFile::set_factors_table(int32_t num_factors,
276  const SGVector<float64_t>* factors_table)
277 {
278  REQUIRE(num_factors == m_num_factors, "Factors mismatch. Expected %d factors; \
279  got %d factors", m_num_factors, num_factors);
280 
282  for (int32_t i=0; i<m_num_factors; i++)
283  {
284  fprintf(file, "\n");
285  SGVector<float64_t> data = factors_table[i];
286  m_factors_table[i] = data;
287  fprintf(file, "%d\n", data.size());
288  set_vector(data.vector, data.vlen);
289  }
290 }
291 
293  int32_t& num_vars,
294  SGVector<int32_t>& vars_card,
295  int32_t& num_factors,
296  SGVector<int32_t>*& factors_scope)
297 {
298  net_type = m_net_type;
299  num_vars = m_num_vars;
300  vars_card = m_vars_card;
301  num_factors = m_num_factors;
302 
303  factors_scope = new SGVector<int32_t> [m_num_factors];
304  for (int32_t i=0; i<m_num_factors; i++)
305  factors_scope[i] = m_factors_scope[i];
306 }
307 
309 {
310  factors_table = new SGVector<float64_t> [m_num_factors];
311  for (int32_t i=0; i<m_num_factors; i++)
312  factors_table[i] = m_factors_table[i];
313 }
314 
virtual void set_factors_table(int32_t num_factors, const SGVector< float64_t > *factors_table)
Definition: UAIFile.cpp:275
void set_text(SGVector< char > text)
Definition: Parser.cpp:138
SGVector< float64_t > * m_factors_table
Definition: UAIFile.h:217
SGVector< int32_t > m_vars_card
Definition: UAIFile.h:211
virtual void set_vector(const int8_t *vector, int32_t len)
virtual ~CUAIFile()
Definition: UAIFile.cpp:45
virtual void get_preamble(SGVector< char > &net_type, int32_t &num_vars, SGVector< int32_t > &vars_card, int32_t &num_factors, SGVector< int32_t > *&factors_scope)
Definition: UAIFile.cpp:292
#define SET_VECTOR(format, sg_type)
Definition: UAIFile.cpp:146
CLineReader * m_line_reader
Definition: UAIFile.h:187
#define GET_VECTOR(read_func, sg_type)
Definition: UAIFile.cpp:107
FILE * file
Definition: File.h:505
#define REQUIRE(x,...)
Definition: SGIO.h:206
SGVector< char > m_net_type
Definition: UAIFile.h:208
Class for buffered reading from a ascii file.
Definition: LineReader.h:24
char m_delimiter
Definition: UAIFile.h:199
#define SG_REF(x)
Definition: SGObject.h:51
virtual void get_factors_table(SGVector< float64_t > *&factors_table)
Definition: UAIFile.cpp:308
virtual void get_vector(int8_t *&vector, int32_t &len)
virtual int32_t read_int()
CDelimiterTokenizer * m_tokenizer
Definition: UAIFile.h:196
virtual SGVector< char > read_string()
Definition: Parser.cpp:53
virtual SGVector< char > read_line()
Definition: LineReader.cpp:89
SGVector< int32_t > * m_factors_scope
Definition: UAIFile.h:214
int32_t size() const
Definition: SGVector.h:115
index_t vlen
Definition: SGVector.h:494
Class for reading from a string.
Definition: Parser.h:23
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
double float64_t
Definition: common.h:50
long double floatmax_t
Definition: common.h:51
A File access base class.
Definition: File.h:34
int32_t m_num_vars
Definition: UAIFile.h:202
virtual void set_num_vars(int32_t num_vars)
Definition: UAIFile.cpp:235
void set_tokenizer(CTokenizer *tokenizer)
Definition: Parser.cpp:146
float float32_t
Definition: common.h:49
CDelimiterTokenizer * m_line_tokenizer
Definition: UAIFile.h:193
virtual void set_vars_card(SGVector< int32_t > vars_card)
Definition: UAIFile.cpp:241
virtual void set_factors_scope(int32_t num_factors, const SGVector< int32_t > *factors_scope)
Definition: UAIFile.cpp:257
CParser * m_parser
Definition: UAIFile.h:190
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class CDelimiterTokenizer is used to tokenize a SGVector into tokens using custom chars as ...
virtual void set_num_factors(int32_t num_vars)
Definition: UAIFile.cpp:251
#define SG_SERROR(...)
Definition: SGIO.h:179
virtual void parse()
Definition: UAIFile.cpp:173
#define SG_ADD(...)
Definition: SGObject.h:81
int32_t m_num_factors
Definition: UAIFile.h:205
virtual void set_net_type(const char *net_type)
Definition: UAIFile.cpp:223

SHOGUN 机器学习工具包 - 项目文档