00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include <shogun/lib/common.h>
00018 #include <shogun/mathematics/Math.h>
00019 #include <shogun/mathematics/lapack.h>
00020 #include <shogun/io/SGIO.h>
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025
00026 using namespace shogun;
00027
00028 #ifdef USE_LOGCACHE
00029 #ifdef USE_HMMDEBUG
00030 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00031 #define LOG_TABLE_PRECISION 1e-6
00032 #else //USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-15
00035 #endif //USE_HMMDEBUG
00036 int32_t CMath::LOGACCURACY = 0;
00037 #endif // USE_LOGCACHE
00038
00039 int32_t CMath::LOGRANGE = 0;
00040
00041 const float64_t CMath::INFTY = -log(0.0);
00042 const float64_t CMath::ALMOST_INFTY = +1e+20;
00043 const float64_t CMath::ALMOST_NEG_INFTY = -1000;
00044 const float64_t CMath::PI=PI;
00045 const float64_t CMath::MACHINE_EPSILON=5E-16;
00046 const float64_t CMath::MAX_REAL_NUMBER=1E300;
00047 const float64_t CMath::MIN_REAL_NUMBER=1E-300;
00048
00049 #ifdef USE_LOGCACHE
00050 float64_t* CMath::logtable = NULL;
00051 #endif
00052 char* CMath::rand_state = NULL;
00053 uint32_t CMath::seed = 0;
00054
00055 CMath::CMath()
00056 : CSGObject()
00057 {
00058 CMath::rand_state=SG_MALLOC(char, RNG_SEED_SIZE);
00059 init_random();
00060
00061 #ifdef USE_LOGCACHE
00062 LOGRANGE=CMath::determine_logrange();
00063 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00064 CMath::logtable=SG_MALLOC(float64_t, LOGRANGE*LOGACCURACY);
00065 init_log_table();
00066 #else
00067 int32_t i=0;
00068 while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
00069 i++;
00070
00071 LOGRANGE=i;
00072 #endif
00073 }
00074
00075 CMath::~CMath()
00076 {
00077 SG_FREE(CMath::rand_state);
00078 CMath::rand_state=NULL;
00079 #ifdef USE_LOGCACHE
00080 SG_FREE(CMath::logtable);
00081 CMath::logtable=NULL;
00082 #endif
00083 }
00084
00085 namespace shogun
00086 {
00087 template <>
00088 void CMath::display_vector(const uint8_t* vector, int32_t n, const char* name)
00089 {
00090 ASSERT(n>=0);
00091 SG_SPRINT("%s=[", name);
00092 for (int32_t i=0; i<n; i++)
00093 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00094 SG_SPRINT("]\n");
00095 }
00096
00097 template <>
00098 void CMath::display_vector(const int32_t* vector, int32_t n, const char* name)
00099 {
00100 ASSERT(n>=0);
00101 SG_SPRINT("%s=[", name);
00102 for (int32_t i=0; i<n; i++)
00103 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00104 SG_SPRINT("]\n");
00105 }
00106
00107 template <>
00108 void CMath::display_vector(const int64_t* vector, int32_t n, const char* name)
00109 {
00110 ASSERT(n>=0);
00111 SG_SPRINT("%s=[", name);
00112 for (int32_t i=0; i<n; i++)
00113 SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00114 SG_SPRINT("]\n");
00115 }
00116
00117 template <>
00118 void CMath::display_vector(const uint64_t* vector, int32_t n, const char* name)
00119 {
00120 ASSERT(n>=0);
00121 SG_SPRINT("%s=[", name);
00122 for (int32_t i=0; i<n; i++)
00123 SG_SPRINT("%llu%s", vector[i], i==n-1? "" : ",");
00124 SG_SPRINT("]\n");
00125 }
00126
00127 template <>
00128 void CMath::display_vector(const float32_t* vector, int32_t n, const char* name)
00129 {
00130 ASSERT(n>=0);
00131 SG_SPRINT("%s=[", name);
00132 for (int32_t i=0; i<n; i++)
00133 SG_SPRINT("%g%s", vector[i], i==n-1? "" : ",");
00134 SG_SPRINT("]\n");
00135 }
00136
00137 template <>
00138 void CMath::display_vector(const float64_t* vector, int32_t n, const char* name)
00139 {
00140 ASSERT(n>=0);
00141 SG_SPRINT("%s=[", name);
00142 for (int32_t i=0; i<n; i++)
00143 SG_SPRINT("%.18g%s", vector[i], i==n-1? "" : ",");
00144 SG_SPRINT("]\n");
00145 }
00146
00147 template <>
00148 void CMath::display_vector(const floatmax_t* vector, int32_t n, const char* name)
00149 {
00150 ASSERT(n>=0);
00151 SG_SPRINT("%s=[", name);
00152 for (int32_t i=0; i<n; i++)
00153 SG_SPRINT("%.36Lg%s", (long double) vector[i], i==n-1? "" : ",");
00154 SG_SPRINT("]\n");
00155 }
00156
00157 template <>
00158 void CMath::display_matrix(
00159 const int32_t* matrix, int32_t rows, int32_t cols, const char* name)
00160 {
00161 ASSERT(rows>=0 && cols>=0);
00162 SG_SPRINT("%s=[\n", name);
00163 for (int32_t i=0; i<rows; i++)
00164 {
00165 SG_SPRINT("[");
00166 for (int32_t j=0; j<cols; j++)
00167 SG_SPRINT("\t%d%s", matrix[j*rows+i],
00168 j==cols-1? "" : ",");
00169 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00170 }
00171 SG_SPRINT("]\n");
00172 }
00173
00174 template <>
00175 void CMath::display_matrix(
00176 const float64_t* matrix, int32_t rows, int32_t cols, const char* name)
00177 {
00178 ASSERT(rows>=0 && cols>=0);
00179 SG_SPRINT("%s=[\n", name);
00180 for (int32_t i=0; i<rows; i++)
00181 {
00182 SG_SPRINT("[");
00183 for (int32_t j=0; j<cols; j++)
00184 SG_SPRINT("\t%.18g%s", (double) matrix[j*rows+i],
00185 j==cols-1? "" : ",");
00186 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00187 }
00188 SG_SPRINT("]\n");
00189 }
00190
00191 template <>
00192 void CMath::display_matrix(
00193 const float32_t* matrix, int32_t rows, int32_t cols, const char* name)
00194 {
00195 ASSERT(rows>=0 && cols>=0);
00196 SG_SPRINT("%s=[\n", name);
00197 for (int32_t i=0; i<rows; i++)
00198 {
00199 SG_SPRINT("[");
00200 for (int32_t j=0; j<cols; j++)
00201 SG_SPRINT("\t%.18g%s", (float) matrix[j*rows+i],
00202 j==cols-1? "" : ",");
00203 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00204 }
00205 SG_SPRINT("]\n");
00206 }
00207
00208 }
00209
00210 SGVector<float64_t> CMath::fishers_exact_test_for_multiple_2x3_tables(SGMatrix<float64_t> tables)
00211 {
00212 SGMatrix<float64_t> table(NULL,2,3,false);
00213 int32_t len=tables.num_cols/3;
00214
00215 SGVector<float64_t> v(len);
00216 for (int32_t i=0; i<len; i++)
00217 {
00218 table.matrix=&tables.matrix[2*3*i];
00219 v.vector[i]=fishers_exact_test_for_2x3_table(table);
00220 }
00221 return v;
00222 }
00223
00224 float64_t CMath::fishers_exact_test_for_2x3_table(SGMatrix<float64_t> table)
00225 {
00226 ASSERT(table.num_rows==2);
00227 ASSERT(table.num_cols==3);
00228
00229 int32_t m_len=3+2;
00230 float64_t* m=SG_MALLOC(float64_t, 3+2);
00231 m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4];
00232 m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5];
00233 m[2]=table.matrix[0]+table.matrix[1];
00234 m[3]=table.matrix[2]+table.matrix[3];
00235 m[4]=table.matrix[4]+table.matrix[5];
00236
00237 float64_t n = CMath::sum(m, m_len) / 2.0;
00238 int32_t x_len=2*3* CMath::sq(CMath::max(m, m_len));
00239 float64_t* x = SG_MALLOC(float64_t, x_len);
00240 CMath::fill_vector(x, x_len, 0.0);
00241
00242 float64_t log_nom=0.0;
00243 for (int32_t i=0; i<3+2; i++)
00244 log_nom+=CMath::lgamma(m[i]+1);
00245 log_nom-=CMath::lgamma(n+1.0);
00246
00247 float64_t log_denomf=0;
00248 floatmax_t log_denom=0;
00249
00250 for (int32_t i=0; i<3*2; i++)
00251 {
00252 log_denom+=CMath::lgammal((floatmax_t) table.matrix[i]+1);
00253 log_denomf+=gamma(table.matrix[i]+1);
00254 }
00255
00256 floatmax_t prob_table_log=log_nom - log_denom;
00257
00258 int32_t dim1 = CMath::min(m[0], m[2]);
00259
00260
00261 int32_t counter = 0;
00262 for (int32_t k=0; k<=dim1; k++)
00263 {
00264 for (int32_t l=CMath::max(0.0,m[0]-m[4]-k); l<=CMath::min(m[0]-k, m[3]); l++)
00265 {
00266 x[0 + 0*2 + counter*2*3] = k;
00267 x[0 + 1*2 + counter*2*3] = l;
00268 x[0 + 2*2 + counter*2*3] = m[0] - x[0 + 0*2 + counter*2*3] - x[0 + 1*2 + counter*2*3];
00269 x[1 + 0*2 + counter*2*3] = m[2] - x[0 + 0*2 + counter*2*3];
00270 x[1 + 1*2 + counter*2*3] = m[3] - x[0 + 1*2 + counter*2*3];
00271 x[1 + 2*2 + counter*2*3] = m[4] - x[0 + 2*2 + counter*2*3];
00272
00273 counter++;
00274 }
00275 }
00276
00277
00278 #ifdef DEBUG_FISHER_TABLE
00279 SG_SPRINT("counter=%d\n", counter);
00280 SG_SPRINT("dim1=%d\n", dim1);
00281 SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3]));
00282 SG_SPRINT("n=%g\n", n);
00283 SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log);
00284 SG_SPRINT("log_denomf=%.18g\n", log_denomf);
00285 SG_SPRINT("log_denom=%.18Lg\n", log_denom);
00286 SG_SPRINT("log_nom=%.18g\n", log_nom);
00287 display_vector(m, m_len, "marginals");
00288 display_vector(x, 2*3*counter, "x");
00289 #endif // DEBUG_FISHER_TABLE
00290
00291
00292 floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter);
00293 CMath::fill_vector(log_denom_vec, counter, (floatmax_t) 0.0);
00294
00295 for (int32_t k=0; k<counter; k++)
00296 {
00297 for (int32_t j=0; j<3; j++)
00298 {
00299 for (int32_t i=0; i<2; i++)
00300 log_denom_vec[k]+=CMath::lgammal(x[i + j*2 + k*2*3]+1.0);
00301 }
00302 }
00303
00304 for (int32_t i=0; i<counter; i++)
00305 log_denom_vec[i]=log_nom-log_denom_vec[i];
00306
00307 #ifdef DEBUG_FISHER_TABLE
00308 display_vector(log_denom_vec, counter, "log_denom_vec");
00309 #endif // DEBUG_FISHER_TABLE
00310
00311
00312 float64_t nonrand_p=-CMath::INFTY;
00313 for (int32_t i=0; i<counter; i++)
00314 {
00315 if (log_denom_vec[i]<=prob_table_log)
00316 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
00317 }
00318
00319 #ifdef DEBUG_FISHER_TABLE
00320 SG_SPRINT("nonrand_p=%.18g\n", nonrand_p);
00321 SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p));
00322 #endif // DEBUG_FISHER_TABLE
00323
00324 nonrand_p=CMath::exp(nonrand_p);
00325
00326 SG_FREE(log_denom_vec);
00327 SG_FREE(x);
00328 SG_FREE(m);
00329
00330 return nonrand_p;
00331 }
00332
00333
00334 #ifdef USE_LOGCACHE
00335 int32_t CMath::determine_logrange()
00336 {
00337 int32_t i;
00338 float64_t acc=0;
00339 for (i=0; i<50; i++)
00340 {
00341 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
00342 if (acc<=(float64_t)LOG_TABLE_PRECISION)
00343 break;
00344 }
00345
00346 SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00347 return i;
00348 }
00349
00350 int32_t CMath::determine_logaccuracy(int32_t range)
00351 {
00352 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
00353 SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00354 return range;
00355 }
00356
00357
00358 void CMath::init_log_table()
00359 {
00360 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
00361 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
00362 }
00363 #endif
00364
00365 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
00366 {
00367 int32_t changed=1;
00368 if (a[0]==-1) return ;
00369 while (changed)
00370 {
00371 changed=0; int32_t i=0 ;
00372 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1))
00373 {
00374 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00375 {
00376 for (int32_t j=0; j<cols; j++)
00377 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00378 changed=1 ;
00379 } ;
00380 i++ ;
00381 } ;
00382 } ;
00383 }
00384
00385 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
00386 {
00387 int32_t changed=1;
00388 while (changed)
00389 {
00390 changed=0;
00391 for (int32_t i=0; i<N-1; i++)
00392 {
00393 if (a[i]>a[i+1])
00394 {
00395 swap(a[i],a[i+1]) ;
00396 swap(idx[i],idx[i+1]) ;
00397 changed=1 ;
00398 } ;
00399 } ;
00400 } ;
00401
00402 }
00403
00404 float64_t CMath::Align(
00405 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
00406 {
00407 float64_t actCost=0 ;
00408 int32_t i1, i2 ;
00409 float64_t* const gapCosts1 = SG_MALLOC(float64_t, l1 );
00410 float64_t* const gapCosts2 = SG_MALLOC(float64_t, l2 );
00411 float64_t* costs2_0 = SG_MALLOC(float64_t, l2 + 1 );
00412 float64_t* costs2_1 = SG_MALLOC(float64_t, l2 + 1 );
00413
00414
00415 for( i1 = 0; i1 < l1; ++i1 ) {
00416 gapCosts1[ i1 ] = gapCost * i1;
00417 }
00418 costs2_1[ 0 ] = 0;
00419 for( i2 = 0; i2 < l2; ++i2 ) {
00420 gapCosts2[ i2 ] = gapCost * i2;
00421 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00422 }
00423
00424 for( i1 = 0; i1 < l1; ++i1 ) {
00425 swap( costs2_0, costs2_1 );
00426 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00427 costs2_1[ 0 ] = actCost;
00428 for( i2 = 0; i2 < l2; ++i2 ) {
00429 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00430 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00431 const float64_t actGap2 = actCost + gapCosts2[ i2 ];
00432 const float64_t actGap = min( actGap1, actGap2 );
00433 actCost = min( actMatch, actGap );
00434 costs2_1[ i2+1 ] = actCost;
00435 }
00436 }
00437
00438 delete [] gapCosts1;
00439 delete [] gapCosts2;
00440 delete [] costs2_0;
00441 delete [] costs2_1;
00442
00443
00444 return actCost;
00445 }
00446
00447 float64_t CMath::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
00448 {
00449 double e=0;
00450
00451 for (int32_t i=0; i<len; i++)
00452 for (int32_t j=0; j<len; j++)
00453 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00454
00455 return (float64_t) e;
00456 }
00457
00458 float64_t CMath::relative_entropy(float64_t* p, float64_t* q, int32_t len)
00459 {
00460 double e=0;
00461
00462 for (int32_t i=0; i<len; i++)
00463 e+=exp(p[i])*(p[i]-q[i]);
00464
00465 return (float64_t) e;
00466 }
00467
00468 float64_t CMath::entropy(float64_t* p, int32_t len)
00469 {
00470 double e=0;
00471
00472 for (int32_t i=0; i<len; i++)
00473 e-=exp(p[i])*p[i];
00474
00475 return (float64_t) e;
00476 }
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488 #ifdef HAVE_LAPACK
00489 float64_t* CMath::pinv(
00490 float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
00491 {
00492 if (!target)
00493 target=SG_MALLOC(float64_t, rows*cols);
00494
00495 char jobu='A';
00496 char jobvt='A';
00497 int m=rows;
00498 int n=cols;
00499 int lda=m;
00500 int ldu=m;
00501 int ldvt=n;
00502 int info=-1;
00503 int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
00504 double* s=SG_MALLOC(double, lsize);
00505 double* u=SG_MALLOC(double, m*m);
00506 double* vt=SG_MALLOC(double, n*n);
00507
00508 wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00509 ASSERT(info==0);
00510
00511 for (int32_t i=0; i<n; i++)
00512 {
00513 for (int32_t j=0; j<lsize; j++)
00514 vt[i*n+j]=vt[i*n+j]/s[j];
00515 }
00516
00517 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00518
00519 SG_FREE(u);
00520 SG_FREE(vt);
00521 SG_FREE(s);
00522
00523 return target;
00524 }
00525 #endif