SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Histogram.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CDistribution()
22 {
23  hist=SG_CALLOC(float64_t, 1<<16);
24 }
25 
27 : CDistribution()
28 {
29  hist=SG_CALLOC(float64_t, 1<<16);
30  features=f;
31 }
32 
34 {
35  SG_FREE(hist);
36 }
37 
39 {
40  int32_t vec;
41  int32_t feat;
42  int32_t i;
43 
44  if (data)
45  {
46  if (data->get_feature_class() != C_STRING ||
47  data->get_feature_type() != F_WORD)
48  {
49  SG_ERROR("Expected features of class string type word\n");
50  }
51  set_features(data);
52  }
53 
57 
58  for (i=0; i< (int32_t) (1<<16); i++)
59  hist[i]=0;
60 
61  for (vec=0; vec<features->get_num_vectors(); vec++)
62  {
63  int32_t len;
64  bool free_vec;
65 
66  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
67  get_feature_vector(vec, len, free_vec);
68 
69  for (feat=0; feat<len ; feat++)
70  hist[vector[feat]]++;
71 
73  free_feature_vector(vector, vec, free_vec);
74  }
75 
76  for (i=0; i< (int32_t) (1<<16); i++)
77  hist[i]=log(hist[i]);
78 
79  return true;
80 }
81 
83 {
87 
88  int32_t len;
89  bool free_vec;
90  float64_t loglik=0;
91 
92  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
93  get_feature_vector(num_example, len, free_vec);
94 
95  for (int32_t i=0; i<len; i++)
96  loglik+=hist[vector[i]];
97 
99  free_feature_vector(vector, num_example, free_vec);
100 
101  return loglik;
102 }
103 
104 float64_t CHistogram::get_log_derivative(int32_t num_param, int32_t num_example)
105 {
106  if (hist[num_param] < CMath::ALMOST_NEG_INFTY)
107  return -CMath::INFTY;
108  else
109  {
110  ASSERT(features);
113 
114  int32_t len;
115  bool free_vec;
116  float64_t deriv=0;
117 
118  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
119  get_feature_vector(num_example, len, free_vec);
120 
121  int32_t num_occurences=0;
122 
123  for (int32_t i=0; i<len; i++)
124  {
125  deriv+=hist[vector[i]];
126 
127  if (vector[i]==num_param)
128  num_occurences++;
129  }
130 
132  free_feature_vector(vector, num_example, free_vec);
133 
134  if (num_occurences>0)
135  deriv+=CMath::log((float64_t) num_occurences)-hist[num_param];
136  else
137  deriv=-CMath::INFTY;
138 
139  return deriv;
140  }
141 }
142 
144 {
145  return hist[num_param];
146 }
147 
149 {
150  ASSERT(histogram.vlen==get_num_model_parameters());
151 
152  SG_FREE(hist);
153  hist=SG_MALLOC(float64_t, histogram.vlen);
154  for (int32_t i=0; i<histogram.vlen; i++)
155  hist[i]=histogram.vector[i];
156 
157  return true;
158 }
159 
161 {
163 }
164 

SHOGUN Machine Learning Toolbox - Documentation