SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BinaryLabels.cpp
Go to the documentation of this file.
3 
4 using namespace shogun;
5 
7 {
8 }
9 
10 CBinaryLabels::CBinaryLabels(int32_t num_labels) : CDenseLabels(num_labels)
11 {
12 }
13 
15 {
16  SGVector<float64_t> labels(src.vlen);
17  for (int32_t i=0; i<labels.vlen; i++)
18  labels[i] = src[i]+threshold>=0 ? +1.0 : -1.0;
19  set_labels(labels);
20  set_confidences(src);
21 }
22 
24 {
25 }
26 
28 {
29  if ( base_labels->get_label_type() == LT_BINARY )
30  return (CBinaryLabels*) base_labels;
31  else
32  SG_SERROR("base_labels must be of dynamic type CBinaryLabels");
33 
34  return NULL;
35 }
36 
37 
38 void CBinaryLabels::ensure_valid(const char* context)
39 {
41  bool found_plus_one=false;
42  bool found_minus_one=false;
43 
44  int32_t subset_size=get_num_labels();
45  for (int32_t i=0; i<subset_size; i++)
46  {
47  int32_t real_i=m_subset_stack->subset_idx_conversion(i);
48  if (m_labels[real_i]==+1.0)
49  found_plus_one=true;
50  else if (m_labels[real_i]==-1.0)
51  found_minus_one=true;
52  else
53  {
54  SG_ERROR("%s%sNot a two class labeling label[%d]=%f (only +1/-1 "
55  "allowed)\n", context?context:"", context?": ":"", i, m_labels[real_i]);
56  }
57  }
58 
59  if (!found_plus_one)
60  {
61  SG_ERROR("%s%sNot a two class labeling - no positively labeled examples found\n",
62  context?context:"", context?": ":"");
63  }
64 
65  if (!found_minus_one)
66  {
67  SG_ERROR("%s%sNot a two class labeling - no negatively labeled examples found\n",
68  context?context:"", context?": ":"");
69  }
70 }
71 
73 {
74  return LT_BINARY;
75 }
76 
78 {
79  SG_DEBUG("entering CBinaryLabels::scores_to_probabilities()\n");
80 
81  REQUIRE(m_confidences.vector, "%s::scores_to_probabilities() requires "
82  "confidences vector!\n", get_name());
83 
84  /* count prior0 and prior1 if needed */
85  int32_t prior0=0;
86  int32_t prior1=0;
87  SG_DEBUG("counting number of positive and negative labels\n");
88  {
89  for (index_t i=0; i<m_confidences.vlen; ++i)
90  {
91  if (m_confidences[i]>0)
92  prior1++;
93  else
94  prior0++;
95  }
96  }
97  SG_DEBUG("%d pos; %d neg\n", prior1, prior0);
98 
99  /* parameter setting */
100  /* maximum number of iterations */
101  index_t maxiter=100;
102 
103  /* minimum step taken in line search */
104  float64_t minstep=1E-10;
105 
106  /* for numerically strict pd of hessian */
107  float64_t sigma=1E-12;
108  float64_t eps=1E-5;
109 
110  /* construct target support */
111  float64_t hiTarget=(prior1+1.0)/(prior1+2.0);
112  float64_t loTarget=1/(prior0+2.0);
113  index_t length=prior1+prior0;
114 
115  SGVector<float64_t> t(length);
116  for (index_t i=0; i<length; ++i)
117  {
118  if (m_confidences[i]>0)
119  t[i]=hiTarget;
120  else
121  t[i]=loTarget;
122  }
123 
124  /* initial Point and Initial Fun Value */
125  /* result parameters of sigmoid */
126  float64_t a=0;
127  float64_t b=CMath::log((prior0+1.0)/(prior1+1.0));
128  float64_t fval=0.0;
129 
130  for (index_t i=0; i<length; ++i)
131  {
132  float64_t fApB=m_confidences[i]*a+b;
133  if (fApB>=0)
134  fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
135  else
136  fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
137  }
138 
139  index_t it;
140  float64_t g1;
141  float64_t g2;
142  for (it=0; it<maxiter; ++it)
143  {
144  SG_DEBUG("Iteration %d, a=%f, b=%f, fval=%f\n", it, a, b, fval);
145 
146  /* Update Gradient and Hessian (use H' = H + sigma I) */
147  float64_t h11=sigma; //Numerically ensures strict PD
148  float64_t h22=h11;
149  float64_t h21=0;
150  g1=0;
151  g2=0;
152 
153  for (index_t i=0; i<length; ++i)
154  {
155  float64_t fApB=m_confidences[i]*a+b;
156  float64_t p;
157  float64_t q;
158  if (fApB>=0)
159  {
160  p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB));
161  q=1.0/(1.0+CMath::exp(-fApB));
162  }
163  else
164  {
165  p=1.0/(1.0+CMath::exp(fApB));
166  q=CMath::exp(fApB)/(1.0+CMath::exp(fApB));
167  }
168 
169  float64_t d2=p*q;
170  h11+=m_confidences[i]*m_confidences[i]*d2;
171  h22+=d2;
172  h21+=m_confidences[i]*d2;
173  float64_t d1=t[i]-p;
174  g1+=m_confidences[i]*d1;
175  g2+=d1;
176  }
177 
178  /* Stopping Criteria */
179  if (CMath::abs(g1)<eps && CMath::abs(g2)<eps)
180  break;
181 
182  /* Finding Newton direction: -inv(H') * g */
183  float64_t det=h11*h22-h21*h21;
184  float64_t dA=-(h22*g1-h21*g2)/det;
185  float64_t dB=-(-h21*g1+h11*g2)/det;
186  float64_t gd=g1*dA+g2*dB;
187 
188  /* Line Search */
189  float64_t stepsize=1;
190 
191  while (stepsize>=minstep)
192  {
193  float64_t newA=a+stepsize*dA;
194  float64_t newB=b+stepsize*dB;
195 
196  /* New function value */
197  float64_t newf=0.0;
198  for (index_t i=0; i<length; ++i)
199  {
200  float64_t fApB=m_confidences[i]*newA+newB;
201  if (fApB>=0)
202  newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
203  else
204  newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
205  }
206 
207  /* Check sufficient decrease */
208  if (newf<fval+0.0001*stepsize*gd)
209  {
210  a=newA;
211  b=newB;
212  fval=newf;
213  break;
214  }
215  else
216  stepsize=stepsize/2.0;
217  }
218 
219  if (stepsize<minstep)
220  {
221  SG_WARNING("%s::scores_to_probabilities(): line search fails, A=%f, "
222  "B=%f, g1=%f, g2=%f, dA=%f, dB=%f, gd=%f\n",
223  get_name(), a, b, g1, g2, dA, dB, gd);
224  }
225  }
226 
227  if (it>=maxiter-1)
228  {
229  SG_WARNING("%s::scores_to_probabilities(): reaching maximal iterations,"
230  " g1=%f, g2=%f\n", get_name(), g1, g2);
231  }
232 
233  SG_DEBUG("fitted sigmoid: a=%f, b=%f\n", a, b);
234 
235  /* now the sigmoid is fitted, convert all confidences to probabilities */
236  for (index_t i=0; i<m_confidences.vlen; ++i)
237  {
238  float64_t fApB=m_confidences[i]*a+b;
239  m_confidences[i]=fApB>=0 ? CMath::exp(-fApB)/(1.0+exp(-fApB)) :
240  1.0/(1+CMath::exp(fApB));
241  }
242 
243  SG_DEBUG("leaving CBinaryLabels::scores_to_probabilities()\n");
244 }

SHOGUN Machine Learning Toolbox - Documentation