00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include "lib/common.h"
00018 #include "lib/Mathematics.h"
00019 #include "lib/lapack.h"
00020 #include "lib/io.h"
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025
00026 using namespace shogun;
00027
00028 #ifdef USE_LOGCACHE
00029 #ifdef USE_HMMDEBUG
00030 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00031 #define LOG_TABLE_PRECISION 1e-6
00032 #else //USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-15
00035 #endif //USE_HMMDEBUG
00036 int32_t CMath::LOGACCURACY = 0;
00037 #endif // USE_LOGCACHE
00038
00039 int32_t CMath::LOGRANGE = 0;
00040
00041 const float64_t CMath::INFTY = -log(0.0);
00042 const float64_t CMath::ALMOST_INFTY = +1e+20;
00043 const float64_t CMath::ALMOST_NEG_INFTY = -1000;
00044 #ifdef USE_LOGCACHE
00045 float64_t* CMath::logtable = NULL;
00046 #endif
00047 char* CMath::rand_state = NULL;
00048 uint32_t CMath::seed = 0;
00049
00050 CMath::CMath()
00051 : CSGObject()
00052 {
00053 CMath::rand_state=new char[RNG_SEED_SIZE];
00054 init_random();
00055
00056 #ifdef USE_LOGCACHE
00057 LOGRANGE=CMath::determine_logrange();
00058 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00059 CMath::logtable=new float64_t[LOGRANGE*LOGACCURACY];
00060 init_log_table();
00061 #else
00062 int32_t i=0;
00063 while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
00064 i++;
00065
00066 LOGRANGE=i;
00067 #endif
00068 }
00069
00070 CMath::~CMath()
00071 {
00072 delete[] CMath::rand_state;
00073 CMath::rand_state=NULL;
00074 #ifdef USE_LOGCACHE
00075 delete[] CMath::logtable;
00076 CMath::logtable=NULL;
00077 #endif
00078 }
00079
00080 #ifdef USE_LOGCACHE
00081 int32_t CMath::determine_logrange()
00082 {
00083 int32_t i;
00084 float64_t acc=0;
00085 for (i=0; i<50; i++)
00086 {
00087 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
00088 if (acc<=(float64_t)LOG_TABLE_PRECISION)
00089 break;
00090 }
00091
00092 SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00093 return i;
00094 }
00095
00096 int32_t CMath::determine_logaccuracy(int32_t range)
00097 {
00098 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
00099 SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00100 return range;
00101 }
00102
00103
00104 void CMath::init_log_table()
00105 {
00106 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
00107 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
00108 }
00109 #endif
00110
00111 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
00112 {
00113 int32_t changed=1;
00114 if (a[0]==-1) return ;
00115 while (changed)
00116 {
00117 changed=0; int32_t i=0 ;
00118 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1))
00119 {
00120 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00121 {
00122 for (int32_t j=0; j<cols; j++)
00123 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00124 changed=1 ;
00125 } ;
00126 i++ ;
00127 } ;
00128 } ;
00129 }
00130
00131 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
00132 {
00133 int32_t changed=1;
00134 while (changed)
00135 {
00136 changed=0;
00137 for (int32_t i=0; i<N-1; i++)
00138 {
00139 if (a[i]>a[i+1])
00140 {
00141 swap(a[i],a[i+1]) ;
00142 swap(idx[i],idx[i+1]) ;
00143 changed=1 ;
00144 } ;
00145 } ;
00146 } ;
00147
00148 }
00149
00150 float64_t CMath::Align(
00151 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
00152 {
00153 float64_t actCost=0 ;
00154 int32_t i1, i2 ;
00155 float64_t* const gapCosts1 = new float64_t[ l1 ];
00156 float64_t* const gapCosts2 = new float64_t[ l2 ];
00157 float64_t* costs2_0 = new float64_t[ l2 + 1 ];
00158 float64_t* costs2_1 = new float64_t[ l2 + 1 ];
00159
00160
00161 for( i1 = 0; i1 < l1; ++i1 ) {
00162 gapCosts1[ i1 ] = gapCost * i1;
00163 }
00164 costs2_1[ 0 ] = 0;
00165 for( i2 = 0; i2 < l2; ++i2 ) {
00166 gapCosts2[ i2 ] = gapCost * i2;
00167 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00168 }
00169
00170 for( i1 = 0; i1 < l1; ++i1 ) {
00171 swap( costs2_0, costs2_1 );
00172 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00173 costs2_1[ 0 ] = actCost;
00174 for( i2 = 0; i2 < l2; ++i2 ) {
00175 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00176 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00177 const float64_t actGap2 = actCost + gapCosts2[ i2 ];
00178 const float64_t actGap = min( actGap1, actGap2 );
00179 actCost = min( actMatch, actGap );
00180 costs2_1[ i2+1 ] = actCost;
00181 }
00182 }
00183
00184 delete [] gapCosts1;
00185 delete [] gapCosts2;
00186 delete [] costs2_0;
00187 delete [] costs2_1;
00188
00189
00190 return actCost;
00191 }
00192
00193
00194
00195 int32_t CMath::calcroc(
00196 float64_t* fp, float64_t* tp, float64_t* output, int32_t* label,
00197 int32_t& size, int32_t& possize, int32_t& negsize, float64_t& tresh,
00198 FILE* rocfile)
00199 {
00200 int32_t left=0;
00201 int32_t right=size-1;
00202 int32_t i;
00203
00204 for (i=0; i<size; i++)
00205 {
00206 if (!(label[i]== -1 || label[i]==1))
00207 return -1;
00208 }
00209
00210
00211 while (left<right)
00212 {
00213 while ((label[left] < 0) && (left<right))
00214 left++;
00215 while ((label[right] > 0) && (left<right))
00216 right--;
00217
00218 swap(output[left],output[right]);
00219 swap(label[left], label[right]);
00220 }
00221
00222
00223 negsize=left;
00224 possize=size-left;
00225 float64_t* negout=output;
00226 float64_t* posout=&output[left];
00227
00228
00229 qsort(negout, negsize);
00230 qsort(posout, possize);
00231
00232
00233 float64_t minimum=min(negout[0], posout[0]);
00234 float64_t maximum=minimum;
00235 if (negsize>0)
00236 maximum=max(maximum,negout[negsize-1]);
00237 if (possize>0)
00238 maximum=max(maximum,posout[possize-1]);
00239
00240 float64_t treshhold=minimum;
00241 float64_t old_treshhold=treshhold;
00242
00243
00244 for (i=0; i<size; i++)
00245 {
00246 fp[i]=1.0;
00247 tp[i]=1.0;
00248 }
00249
00250
00251
00252 int32_t posidx=0;
00253 int32_t negidx=0;
00254 int32_t iteration=1;
00255 int32_t returnidx=-1;
00256
00257 float64_t minerr=10;
00258
00259 while (iteration < size && treshhold<=maximum)
00260 {
00261 old_treshhold=treshhold;
00262
00263 while (treshhold==old_treshhold && treshhold<=maximum)
00264 {
00265 if (posidx<possize && negidx<negsize)
00266 {
00267 if (posout[posidx]<negout[negidx])
00268 {
00269 if (posout[posidx]==treshhold)
00270 posidx++;
00271 else
00272 treshhold=posout[posidx];
00273 }
00274 else
00275 {
00276 if (negout[negidx]==treshhold)
00277 negidx++;
00278 else
00279 treshhold=negout[negidx];
00280 }
00281 }
00282 else
00283 {
00284 if (posidx>=possize && negidx<negsize-1)
00285 {
00286 negidx++;
00287 treshhold=negout[negidx];
00288 }
00289 else if (negidx>=negsize && posidx<possize-1)
00290 {
00291 posidx++;
00292 treshhold=posout[posidx];
00293 }
00294 else if (negidx<negsize && treshhold!=negout[negidx])
00295 treshhold=negout[negidx];
00296 else if (posidx<possize && treshhold!=posout[posidx])
00297 treshhold=posout[posidx];
00298 else
00299 {
00300 treshhold=2*(maximum+100);
00301 posidx=possize;
00302 negidx=negsize;
00303 break;
00304 }
00305 }
00306 }
00307
00308
00309 tp[iteration]=(possize-posidx)/(float64_t (possize));
00310 fp[iteration]=(negsize-negidx)/(float64_t (negsize));
00311
00312
00313 if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
00314 {
00315 minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
00316 tresh=(old_treshhold+treshhold)/2;
00317 returnidx=iteration;
00318 }
00319
00320 iteration++;
00321 }
00322
00323
00324 size=iteration;
00325
00326 if (rocfile)
00327 {
00328 const char id[]="ROC";
00329 fwrite(id, sizeof(char), sizeof(id), rocfile);
00330 fwrite(fp, sizeof(float64_t), size, rocfile);
00331 fwrite(tp, sizeof(float64_t), size, rocfile);
00332 }
00333
00334 return returnidx;
00335 }
00336
00337 float64_t CMath::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
00338 {
00339 double e=0;
00340
00341 for (int32_t i=0; i<len; i++)
00342 for (int32_t j=0; j<len; j++)
00343 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00344
00345 return (float64_t) e;
00346 }
00347
00348 float64_t CMath::relative_entropy(float64_t* p, float64_t* q, int32_t len)
00349 {
00350 double e=0;
00351
00352 for (int32_t i=0; i<len; i++)
00353 e+=exp(p[i])*(p[i]-q[i]);
00354
00355 return (float64_t) e;
00356 }
00357
00358 float64_t CMath::entropy(float64_t* p, int32_t len)
00359 {
00360 double e=0;
00361
00362 for (int32_t i=0; i<len; i++)
00363 e-=exp(p[i])*p[i];
00364
00365 return (float64_t) e;
00366 }
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378 #ifdef HAVE_LAPACK
00379 float64_t* CMath::pinv(
00380 float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
00381 {
00382 if (!target)
00383 target=new float64_t[rows*cols];
00384
00385 char jobu='A';
00386 char jobvt='A';
00387 int m=rows;
00388 int n=cols;
00389 int lda=m;
00390 int ldu=m;
00391 int ldvt=n;
00392 int info=-1;
00393 int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
00394 double* s=new double[lsize];
00395 double* u=new double[m*m];
00396 double* vt=new double[n*n];
00397
00398 wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00399 ASSERT(info==0);
00400
00401 for (int32_t i=0; i<n; i++)
00402 {
00403 for (int32_t j=0; j<lsize; j++)
00404 vt[i*n+j]=vt[i*n+j]/s[j];
00405 }
00406
00407 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00408
00409 delete[] u;
00410 delete[] vt;
00411 delete[] s;
00412
00413 return target;
00414 }
00415 #endif
00416
00417 namespace shogun
00418 {
00419 template <>
00420 void CMath::display_vector(const uint8_t* vector, int32_t n, const char* name)
00421 {
00422 ASSERT(n>=0);
00423 SG_SPRINT("%s=[", name);
00424 for (int32_t i=0; i<n; i++)
00425 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00426 SG_SPRINT("]\n");
00427 }
00428
00429 template <>
00430 void CMath::display_vector(const int32_t* vector, int32_t n, const char* name)
00431 {
00432 ASSERT(n>=0);
00433 SG_SPRINT("%s=[", name);
00434 for (int32_t i=0; i<n; i++)
00435 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00436 SG_SPRINT("]\n");
00437 }
00438
00439 template <>
00440 void CMath::display_vector(const int64_t* vector, int32_t n, const char* name)
00441 {
00442 ASSERT(n>=0);
00443 SG_SPRINT("%s=[", name);
00444 for (int32_t i=0; i<n; i++)
00445 SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00446 SG_SPRINT("]\n");
00447 }
00448
00449 template <>
00450 void CMath::display_vector(const uint64_t* vector, int32_t n, const char* name)
00451 {
00452 ASSERT(n>=0);
00453 SG_SPRINT("%s=[", name);
00454 for (int32_t i=0; i<n; i++)
00455 SG_SPRINT("%llu%s", vector[i], i==n-1? "" : ",");
00456 SG_SPRINT("]\n");
00457 }
00458
00459 template <>
00460 void CMath::display_vector(const float32_t* vector, int32_t n, const char* name)
00461 {
00462 ASSERT(n>=0);
00463 SG_SPRINT("%s=[", name);
00464 for (int32_t i=0; i<n; i++)
00465 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00466 SG_SPRINT("]\n");
00467 }
00468
00469 template <>
00470 void CMath::display_vector(const float64_t* vector, int32_t n, const char* name)
00471 {
00472 ASSERT(n>=0);
00473 SG_SPRINT("%s=[", name);
00474 for (int32_t i=0; i<n; i++)
00475 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00476 SG_SPRINT("]\n");
00477 }
00478
00479 template <>
00480 void CMath::display_matrix(
00481 const int32_t* matrix, int32_t rows, int32_t cols, const char* name)
00482 {
00483 ASSERT(rows>=0 && cols>=0);
00484 SG_SPRINT("%s=[\n", name);
00485 for (int32_t i=0; i<rows; i++)
00486 {
00487 SG_SPRINT("[");
00488 for (int32_t j=0; j<cols; j++)
00489 SG_SPRINT("\t%d%s", matrix[j*rows+i],
00490 j==cols-1? "" : ",");
00491 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00492 }
00493 SG_SPRINT("]\n");
00494 }
00495
00496 template <>
00497 void CMath::display_matrix(
00498 const float64_t* matrix, int32_t rows, int32_t cols, const char* name)
00499 {
00500 ASSERT(rows>=0 && cols>=0);
00501 SG_SPRINT("%s=[\n", name);
00502 for (int32_t i=0; i<rows; i++)
00503 {
00504 SG_SPRINT("[");
00505 for (int32_t j=0; j<cols; j++)
00506 SG_SPRINT("\t%lf%s", (double) matrix[j*rows+i],
00507 j==cols-1? "" : ",");
00508 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00509 }
00510 SG_SPRINT("]\n");
00511 }
00512 }