SHOGUN: Math.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/base/SGObject.h>
00012 #include <shogun/lib/common.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/mathematics/lapack.h>
00015 #include <shogun/io/SGIO.h>
00016 
00017 #include <stdio.h>
00018 #include <stdlib.h>
00019 #include <math.h>
00020 
00021 using namespace shogun;
00022 
00023 #ifdef USE_LOGCACHE
00024 #ifdef USE_HMMDEBUG
00025 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00026 #define LOG_TABLE_PRECISION 1e-6
00027 #else //USE_HMMDEBUG
00028 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00029 #define LOG_TABLE_PRECISION 1e-15
00030 #endif //USE_HMMDEBUG
00031 int32_t CMath::LOGACCURACY         = 0; // 100000 steps per integer
00032 #endif // USE_LOGCACHE
00033 
00034 int32_t CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
00035 
00036 const float64_t CMath::INFTY            =  -log(0.0);   // infinity
00037 const float64_t CMath::ALMOST_INFTY     =  +1e+20;      //a large number
00038 const float64_t CMath::ALMOST_NEG_INFTY =  -1000;
00039 const float64_t CMath::PI=M_PI;
00040 const float64_t CMath::MACHINE_EPSILON=5E-16;
00041 const float64_t CMath::MAX_REAL_NUMBER=1E300;
00042 const float64_t CMath::MIN_REAL_NUMBER=1E-300;
00043 
00044 #ifdef USE_LOGCACHE
00045 float64_t* CMath::logtable = NULL;
00046 #endif
00047 char* CMath::rand_state = NULL;
00048 uint32_t CMath::seed = 0;
00049 
00050 CMath::CMath()
00051 : CSGObject()
00052 {
00053     CMath::rand_state=SG_MALLOC(char, RNG_SEED_SIZE);
00054     init_random();
00055 
00056 #ifdef USE_LOGCACHE
00057     LOGRANGE=CMath::determine_logrange();
00058     LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00059     CMath::logtable=SG_MALLOC(float64_t, LOGRANGE*LOGACCURACY);
00060     init_log_table();
00061 #else
00062     int32_t i=0;
00063     while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
00064         i++;
00065 
00066     LOGRANGE=i;
00067 #endif
00068 }
00069 
00070 CMath::~CMath()
00071 {
00072     SG_FREE(CMath::rand_state);
00073     CMath::rand_state=NULL;
00074 #ifdef USE_LOGCACHE
00075     SG_FREE(CMath::logtable);
00076     CMath::logtable=NULL;
00077 #endif
00078 }
00079 
00080 #ifdef USE_LOGCACHE
00081 int32_t CMath::determine_logrange()
00082 {
00083     int32_t i;
00084     float64_t acc=0;
00085     for (i=0; i<50; i++)
00086     {
00087         acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
00088         if (acc<=(float64_t)LOG_TABLE_PRECISION)
00089             break;
00090     }
00091 
00092     SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00093     return i;
00094 }
00095 
00096 int32_t CMath::determine_logaccuracy(int32_t range)
00097 {
00098     range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
00099     SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00100     return range;
00101 }
00102 
00103 //init log table of form log(1+exp(x))
00104 void CMath::init_log_table()
00105 {
00106   for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
00107     logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
00108 }
00109 #endif
00110 
00111 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
00112 {
00113   int32_t changed=1;
00114   if (a[0]==-1) return ;
00115   while (changed)
00116   {
00117       changed=0; int32_t i=0 ;
00118       while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
00119       {
00120           if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00121           {
00122               for (int32_t j=0; j<cols; j++)
00123                   CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00124               changed=1 ;
00125           } ;
00126           i++ ;
00127       } ;
00128   } ;
00129 }
00130 
00131 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
00132 {
00133     int32_t changed=1;
00134     while (changed)
00135     {
00136         changed=0;
00137         for (int32_t i=0; i<N-1; i++)
00138         {
00139             if (a[i]>a[i+1])
00140             {
00141                 swap(a[i],a[i+1]) ;
00142                 swap(idx[i],idx[i+1]) ;
00143                 changed=1 ;
00144             } ;
00145         } ;
00146     } ;
00147 
00148 }
00149 
00150 float64_t CMath::Align(
00151     char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
00152 {
00153   float64_t actCost=0 ;
00154   int32_t i1, i2 ;
00155   float64_t* const gapCosts1 = SG_MALLOC(float64_t,  l1 );
00156   float64_t* const gapCosts2 = SG_MALLOC(float64_t,  l2 );
00157   float64_t* costs2_0 = SG_MALLOC(float64_t,  l2 + 1 );
00158   float64_t* costs2_1 = SG_MALLOC(float64_t,  l2 + 1 );
00159 
00160   // initialize borders
00161   for( i1 = 0; i1 < l1; ++i1 ) {
00162     gapCosts1[ i1 ] = gapCost * i1;
00163   }
00164   costs2_1[ 0 ] = 0;
00165   for( i2 = 0; i2 < l2; ++i2 ) {
00166     gapCosts2[ i2 ] = gapCost * i2;
00167     costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00168   }
00169   // compute alignment
00170   for( i1 = 0; i1 < l1; ++i1 ) {
00171     swap( costs2_0, costs2_1 );
00172     actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00173     costs2_1[ 0 ] = actCost;
00174     for( i2 = 0; i2 < l2; ++i2 ) {
00175       const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00176       const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00177       const float64_t actGap2 = actCost + gapCosts2[ i2 ];
00178       const float64_t actGap = min( actGap1, actGap2 );
00179       actCost = min( actMatch, actGap );
00180       costs2_1[ i2+1 ] = actCost;
00181     }
00182   }
00183 
00184   SG_FREE(gapCosts1);
00185   SG_FREE(gapCosts2);
00186   SG_FREE(costs2_0);
00187   SG_FREE(costs2_1);
00188 
00189   // return the final cost
00190   return actCost;
00191 }