SHOGUN: Mathematics.h Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2007 Konrad Rieck
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #ifndef __MATHEMATICS_H_
00014 #define __MATHEMATICS_H_
00015 
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/lapack.h"
00019 #include "base/SGObject.h"
00020 #include "base/Parallel.h"
00021 
00022 #include <math.h>
00023 #include <stdio.h>
00024 #include <float.h>
00025 #include <pthread.h>
00026 #include <unistd.h>
00027 #include <sys/types.h>
00028 #include <sys/time.h>
00029 #include <time.h>
00030 
00031 #ifdef SUNOS
00032 #include <ieeefp.h>
00033 #endif
00034 
00036 #ifdef log2
00037 #define cygwin_log2 log2
00038 #undef log2
00039 #endif
00040 
00041 
00042 
00044 #ifdef _GLIBCXX_CMATH
00045 #if _GLIBCXX_USE_C99_MATH
00046 #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
00047 
00049   using std::signbit;
00050 
00051   using std::fpclassify;
00052 
00053   using std::isfinite;
00054   using std::isinf;
00055   using std::isnan;
00056   using std::isnormal;
00057 
00058   using std::isgreater;
00059   using std::isgreaterequal;
00060   using std::isless;
00061   using std::islessequal;
00062   using std::islessgreater;
00063   using std::isunordered;
00064 #endif
00065 #endif
00066 #endif
00067 
00068 
00069 #ifdef _WIN32
00070 #ifndef isnan
00071 #define isnan _isnan
00072 #endif
00073 
00074 #ifndef isfinite
00075 #define isfinite _isfinite
00076 #endif
00077 #endif //_WIN32
00078 
00079 #ifndef NAN
00080 #include <stdlib.h>
00081 #define NAN (strtod("NAN",NULL))
00082 #endif
00083 
00084 /* Size of RNG seed */
00085 #define RNG_SEED_SIZE 256
00086 
00087 /* Maximum stack size */
00088 #define RADIX_STACK_SIZE        512
00089 
00090 /* Stack macros */
00091 #define radix_push(a, n, i)     sp->sa = a, sp->sn = n, (sp++)->si = i
00092 #define radix_pop(a, n, i)      a = (--sp)->sa, n = sp->sn, i = sp->si
00093 
00094 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00095 
00096 template <class T> struct radix_stack_t
00097 {
00099     T *sa;
00101     size_t sn;
00103     uint16_t si;
00104 };
00105 
00107 
00109 template <class T1, class T2> struct thread_qsort
00110 {
00112     T1* output;
00114     T2* index;
00116     uint32_t size;
00117 
00119     int32_t* qsort_threads;
00121     int32_t sort_limit;
00123     int32_t num_threads;
00124 };
00125 #endif // DOXYGEN_SHOULD_SKIP_THIS
00126 
00127 namespace shogun
00128 {
00131 class CMath : public CSGObject
00132 {
00133     public:
00137 
00138         CMath();
00139 
00141         virtual ~CMath();
00143 
00147 
00149         //
00150         template <class T>
00151             static inline T min(T a, T b)
00152             {
00153                 return (a<=b) ? a : b;
00154             }
00155 
00157         template <class T>
00158             static inline T max(T a, T b) 
00159             {
00160                 return (a>=b) ? a : b;
00161             }
00162 
00164         template <class T>
00165             static inline T clamp(T value, T lb, T ub) 
00166             {
00167                 if (value<=lb)
00168                     return lb;
00169                 else if (value>=ub)
00170                     return ub;
00171                 else
00172                     return value;
00173             }
00174 
00176         template <class T>
00177             static inline T abs(T a)
00178             {
00179                 // can't be a>=0?(a):(-a), because compiler complains about
00180                 // 'comparison always true' when T is unsigned
00181                 if (a==0)
00182                     return 0;
00183                 else if (a>0)
00184                     return a;
00185                 else
00186                     return -a;
00187             }
00189 
00192 
00193         static inline float64_t round(float64_t d)
00194         {
00195             return ::floor(d+0.5);
00196         }
00197 
00198         static inline float64_t floor(float64_t d)
00199         {
00200             return ::floor(d);
00201         }
00202 
00203         static inline float64_t ceil(float64_t d)
00204         {
00205             return ::ceil(d);
00206         }
00207 
00209         template <class T>
00210             static inline T sign(T a)
00211             {
00212                 if (a==0)
00213                     return 0;
00214                 else return (a<0) ? (-1) : (+1);
00215             }
00216 
00218         template <class T>
00219             static inline void swap(T &a,T &b)
00220             {
00221                 T c=a;
00222                 a=b;
00223                 b=c;
00224             }
00225 
00229         template <class T>
00230             static inline void resize(T* &data, int64_t old_size, int64_t new_size)
00231             {
00232                 if (old_size==new_size)
00233                     return;
00234                 T* new_data = new T[new_size];
00235                 for (int64_t i=0; i<old_size && i<new_size; i++)
00236                     new_data[i]=data[i];
00237                 delete[] data;
00238                 data=new_data;
00239             }
00240 
00242         template <class T>
00243             static inline T twonorm(T* x, int32_t len)
00244             {
00245                 float64_t result=0;
00246                 for (int32_t i=0; i<len; i++)
00247                     result+=x[i]*x[i];
00248 
00249                 return CMath::sqrt(result);
00250             }
00251 
00253         template <class T>
00254             static inline T qsq(T* x, int32_t len, float64_t q)
00255             {
00256                 float64_t result=0;
00257                 for (int32_t i=0; i<len; i++)
00258                     result+=CMath::pow(x[i], q);
00259 
00260                 return result;
00261             }
00262 
00264         template <class T>
00265             static inline T qnorm(T* x, int32_t len, float64_t q)
00266             {
00267                 ASSERT(q!=0);
00268                 return CMath::pow(qsq(x, len, q), 1/q);
00269             }
00270 
00272         template <class T>
00273             static inline T sq(T x)
00274             {
00275                 return x*x;
00276             }
00277 
00279         static inline float32_t sqrt(float32_t x)
00280         {
00281             return ::sqrtf(x);
00282         }
00283 
00285         static inline float64_t sqrt(float64_t x)
00286         {
00287             return ::sqrt(x);
00288         }
00289 
00291         static inline floatmax_t sqrt(floatmax_t x)
00292         {
00293             //fall back to double precision sqrt if sqrtl is not
00294             //available
00295 #ifdef HAVE_SQRTL
00296             return ::sqrtl(x);
00297 #else
00298             return ::sqrt(x);
00299 #endif
00300         }
00301 
00302 
00304         static inline floatmax_t powl(floatmax_t x, floatmax_t n)
00305         {
00306             //fall back to double precision pow if powl is not
00307             //available
00308 #ifdef HAVE_POWL
00309             return ::powl((long double) x, (long double) n);
00310 #else
00311             return ::pow((double) x, (double) n);
00312 #endif
00313         }
00314 
00315         static inline int32_t pow(int32_t x, int32_t n)
00316         {
00317             ASSERT(n>=0);
00318             int32_t result=1;
00319             while (n--)
00320                 result*=x;
00321 
00322             return result;
00323         }
00324 
00325         static inline float64_t pow(float64_t x, int32_t n)
00326         {
00327             ASSERT(n>=0);
00328             float64_t result=1;
00329             while (n--)
00330                 result*=x;
00331 
00332             return result;
00333         }
00334 
00335         static inline float64_t pow(float64_t x, float64_t n)
00336         {
00337             return ::pow((double) x, (double) n);
00338         }
00339 
00340         static inline float64_t exp(float64_t x)
00341         {
00342             return ::exp((double) x);
00343         }
00344 
00345         static inline float64_t log10(float64_t v)
00346         {
00347             return ::log(v)/::log(10.0);
00348         }
00349 
00350         static inline float64_t log2(float64_t v)
00351         {
00352 #ifdef HAVE_LOG2
00353             return ::log2(v);
00354 #else
00355             return ::log(v)/::log(2.0);
00356 #endif //HAVE_LOG2
00357         }
00358 
00359         static inline float64_t log(float64_t v)
00360         {
00361             return ::log(v);
00362         }
00363 
00364         template <class T>
00365         static void transpose_matrix(
00366             T*& matrix, int32_t& num_feat, int32_t& num_vec)
00367         {
00368             T* transposed=new T[num_vec*num_feat];
00369             for (int32_t i=0; i<num_vec; i++)
00370             {
00371                 for (int32_t j=0; j<num_feat; j++)
00372                     transposed[i+j*num_vec]=matrix[i*num_feat+j];
00373             }
00374 
00375             delete[] matrix;
00376             matrix=transposed;
00377 
00378             CMath::swap(num_feat, num_vec);
00379         }
00380 
00381 #ifdef HAVE_LAPACK
00382 
00383 
00384         static float64_t* pinv(
00385             float64_t* matrix, int32_t rows, int32_t cols,
00386             float64_t* target=NULL);
00387 
00388 
00389         //C := alpha*op( A )*op( B ) + beta*C
00390         //op( X ) = X   or   op( X ) = X',
00391         static inline void dgemm(
00392             double alpha, const double* A, int rows, int cols,
00393             CBLAS_TRANSPOSE transposeA, double *B, int cols_B,
00394             CBLAS_TRANSPOSE transposeB, double beta, double *C)
00395         {
00396             cblas_dgemm(CblasColMajor, transposeA, transposeB, rows, cols, cols_B,
00397                     alpha, A, cols, B, cols_B, beta, C, cols);
00398         }
00399 
00400         //y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
00401         static inline void dgemv(
00402             double alpha, const double *A, int rows, int cols,
00403             const CBLAS_TRANSPOSE transposeA, const double* X, double beta,
00404             double* Y)
00405         {
00406             cblas_dgemv(CblasColMajor, transposeA,
00407                     rows, cols, alpha, A, cols,
00408                     X, 1, beta, Y, 1);
00409         }
00410 #endif
00411 
00412         static inline int64_t factorial(int32_t n)
00413         {
00414             int64_t res=1;
00415             for (int i=2; i<=n; i++)
00416                 res*=i ;
00417             return res ;
00418         }
00419 
00420         static void init_random(uint32_t initseed=0)
00421         {
00422             if (initseed==0)
00423             {
00424                 struct timeval tv;
00425                 gettimeofday(&tv, NULL);
00426                 seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
00427             }
00428             else
00429                 seed=initseed;
00430 #if !defined(CYGWIN) && !defined(__INTERIX)
00431             //seed=42
00432             //SG_SPRINT("initializing random number generator with %d (seed size %d)\n", seed, RNG_SEED_SIZE);
00433             initstate(seed, CMath::rand_state, RNG_SEED_SIZE);
00434 #endif
00435         }
00436 
00437         static inline int64_t random()
00438         {
00439 #if defined(CYGWIN) || defined(__INTERIX)
00440             return rand();
00441 #else
00442             return ::random();
00443 #endif
00444         }
00445 
00446         static inline int32_t random(int32_t min_value, int32_t max_value)
00447         {
00448             int32_t ret = min_value + (int32_t) ((max_value-min_value+1) * (random() / (RAND_MAX+1.0)));
00449             ASSERT(ret>=min_value && ret<=max_value);
00450             return ret ;
00451         }
00452 
00453         static inline float32_t random(float32_t min_value, float32_t max_value)
00454         {
00455             float32_t ret = min_value + ((max_value-min_value) * (random() / (1.0*RAND_MAX)));
00456 
00457             if (ret<min_value || ret>max_value)
00458                 SG_SPRINT("min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value);
00459             ASSERT(ret>=min_value && ret<=max_value);
00460             return ret;
00461         }
00462 
00463         static inline float64_t random(float64_t min_value, float64_t max_value)
00464         {
00465             float64_t ret = min_value + ((max_value-min_value) * (random() / (1.0*RAND_MAX)));
00466 
00467             if (ret<min_value || ret>max_value)
00468                 SG_SPRINT("min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value);
00469             ASSERT(ret>=min_value && ret<=max_value);
00470             return ret;
00471         }
00472 
00473         template <class T>
00474             static T* clone_vector(const T* vec, int32_t len)
00475             {
00476                 T* result = new T[len];
00477                 for (int32_t i=0; i<len; i++)
00478                     result[i]=vec[i];
00479 
00480                 return result;
00481             }
00482         template <class T>
00483             static void fill_vector(T* vec, int32_t len, T value)
00484             {
00485                 for (int32_t i=0; i<len; i++)
00486                     vec[i]=value;
00487             }
00488         template <class T>
00489             static void range_fill_vector(T* vec, int32_t len, T start=0)
00490             {
00491                 for (int32_t i=0; i<len; i++)
00492                     vec[i]=i+start;
00493             }
00494 
00495         template <class T>
00496             static void random_vector(T* vec, int32_t len, T min_value, T max_value)
00497             {
00498                 for (int32_t i=0; i<len; i++)
00499                     vec[i]=CMath::random(min_value, max_value);
00500             }
00501 
00502         static inline int32_t* randperm(int32_t n)
00503         {
00504             int32_t* perm = new int32_t[n];
00505 
00506             if (!perm)
00507                 return NULL;
00508             for (int32_t i = 0; i < n; i++)
00509                 perm[i] = i;
00510             for (int32_t i = 0; i < n; i++)
00511                 swap(perm[random(0, n - 1)], perm[i]);
00512             return perm;
00513         }
00514 
00515         static inline int64_t nchoosek(int32_t n, int32_t k)
00516         {
00517             int64_t res=1;
00518 
00519             for (int32_t i=n-k+1; i<=n; i++)
00520                 res*=i;
00521 
00522             return res/factorial(k);
00523         }
00524 
00526         template <class T>
00527             static inline void vec1_plus_scalar_times_vec2(T* vec1,
00528                     T scalar, const T* vec2, int32_t n)
00529             {
00530                 for (int32_t i=0; i<n; i++)
00531                     vec1[i]+=scalar*vec2[i];
00532             }
00533 
00535         static inline float64_t dot(const bool* v1, const bool* v2, int32_t n)
00536         {
00537             float64_t r=0;
00538             for (int32_t i=0; i<n; i++)
00539                 r+=((v1[i]) ? 1 : 0) * ((v2[i]) ? 1 : 0);
00540             return r;
00541         }
00542 
00544         static inline floatmax_t dot(const floatmax_t* v1, const floatmax_t* v2, int32_t n)
00545         {
00546             floatmax_t r=0;
00547             for (int32_t i=0; i<n; i++)
00548                 r+=v1[i]*v2[i];
00549             return r;
00550         }
00551 
00553         static inline float64_t dot(const float64_t* v1, const float64_t* v2, int32_t n)
00554         {
00555             float64_t r=0;
00556 #ifdef HAVE_LAPACK
00557             int32_t skip=1;
00558             r = cblas_ddot(n, v1, skip, v2, skip);
00559 #else
00560             for (int32_t i=0; i<n; i++)
00561                 r+=v1[i]*v2[i];
00562 #endif
00563             return r;
00564         }
00565 
00567         static inline float32_t dot(
00568             const float32_t* v1, const float32_t* v2, int32_t n)
00569         {
00570             float64_t r=0;
00571 #ifdef HAVE_LAPACK
00572             int32_t skip=1;
00573             r = cblas_sdot(n, v1, skip, v2, skip);
00574 #else
00575             for (int32_t i=0; i<n; i++)
00576                 r+=v1[i]*v2[i];
00577 #endif
00578             return r;
00579         }
00580 
00582         static inline float64_t dot(
00583             const uint64_t* v1, const uint64_t* v2, int32_t n)
00584         {
00585             float64_t r=0;
00586             for (int32_t i=0; i<n; i++)
00587                 r+=((float64_t) v1[i])*v2[i];
00588 
00589             return r;
00590         }
00592         static inline float64_t dot(
00593             const int64_t* v1, const int64_t* v2, int32_t n)
00594         {
00595             float64_t r=0;
00596             for (int32_t i=0; i<n; i++)
00597                 r+=((float64_t) v1[i])*v2[i];
00598 
00599             return r;
00600         }
00601 
00603         static inline float64_t dot(
00604             const int32_t* v1, const int32_t* v2, int32_t n)
00605         {
00606             float64_t r=0;
00607             for (int32_t i=0; i<n; i++)
00608                 r+=((float64_t) v1[i])*v2[i];
00609 
00610             return r;
00611         }
00612 
00614         static inline float64_t dot(
00615             const uint32_t* v1, const uint32_t* v2, int32_t n)
00616         {
00617             float64_t r=0;
00618             for (int32_t i=0; i<n; i++)
00619                 r+=((float64_t) v1[i])*v2[i];
00620 
00621             return r;
00622         }
00623 
00625         static inline float64_t dot(
00626             const uint16_t* v1, const uint16_t* v2, int32_t n)
00627         {
00628             float64_t r=0;
00629             for (int32_t i=0; i<n; i++)
00630                 r+=((float64_t) v1[i])*v2[i];
00631 
00632             return r;
00633         }
00634 
00636         static inline float64_t dot(
00637             const int16_t* v1, const int16_t* v2, int32_t n)
00638         {
00639             float64_t r=0;
00640             for (int32_t i=0; i<n; i++)
00641                 r+=((float64_t) v1[i])*v2[i];
00642 
00643             return r;
00644         }
00645 
00647         static inline float64_t dot(
00648             const char* v1, const char* v2, int32_t n)
00649         {
00650             float64_t r=0;
00651             for (int32_t i=0; i<n; i++)
00652                 r+=((float64_t) v1[i])*v2[i];
00653 
00654             return r;
00655         }
00656 
00658         static inline float64_t dot(
00659             const uint8_t* v1, const uint8_t* v2, int32_t n)
00660         {
00661             float64_t r=0;
00662             for (int32_t i=0; i<n; i++)
00663                 r+=((float64_t) v1[i])*v2[i];
00664 
00665             return r;
00666         }
00667 
00669         static inline float64_t dot(
00670             const int8_t* v1, const int8_t* v2, int32_t n)
00671         {
00672             float64_t r=0;
00673             for (int32_t i=0; i<n; i++)
00674                 r+=((float64_t) v1[i])*v2[i];
00675 
00676             return r;
00677         }
00678 
00680         static inline float64_t dot(
00681             const float64_t* v1, const char* v2, int32_t n)
00682         {
00683             float64_t r=0;
00684             for (int32_t i=0; i<n; i++)
00685                 r+=((float64_t) v1[i])*v2[i];
00686 
00687             return r;
00688         }
00689 
00691         template <class T>
00692             static inline void add(
00693                 T* target, T alpha, const T* v1, T beta, const T* v2,
00694                 int32_t len)
00695             {
00696                 for (int32_t i=0; i<len; i++)
00697                     target[i]=alpha*v1[i]+beta*v2[i];
00698             }
00699 
00701         template <class T>
00702             static inline void add_scalar(T alpha, T* vec, int32_t len)
00703             {
00704                 for (int32_t i=0; i<len; i++)
00705                     vec[i]+=alpha;
00706             }
00707 
00709         template <class T>
00710             static inline void scale_vector(T alpha, T* vec, int32_t len)
00711             {
00712                 for (int32_t i=0; i<len; i++)
00713                     vec[i]*=alpha;
00714             }
00715 
00717         template <class T>
00718             static inline T sum(T* vec, int32_t len)
00719             {
00720                 T result=0;
00721                 for (int32_t i=0; i<len; i++)
00722                     result+=vec[i];
00723 
00724                 return result;
00725             }
00726 
00728         template <class T>
00729             static inline T max(T* vec, int32_t len)
00730             {
00731                 ASSERT(len>0);
00732                 T maxv=vec[0];
00733 
00734                 for (int32_t i=1; i<len; i++)
00735                     maxv=CMath::max(vec[i], maxv);
00736 
00737                 return maxv;
00738             }
00739 
00741         template <class T>
00742             static inline T sum_abs(T* vec, int32_t len)
00743             {
00744                 T result=0;
00745                 for (int32_t i=0; i<len; i++)
00746                     result+=CMath::abs(vec[i]);
00747 
00748                 return result;
00749             }
00750 
00752         template <class T>
00753             static inline bool fequal(T x, T y, float64_t precision=1e-6)
00754             {
00755                 return CMath::abs(x-y)<precision;
00756             }
00757 
00758         static inline float64_t mean(float64_t* vec, int32_t len)
00759         {
00760             ASSERT(vec);
00761             ASSERT(len>0);
00762 
00763             float64_t mean=0;
00764             for (int32_t i=0; i<len; i++)
00765                 mean+=vec[i];
00766             return mean/len;
00767         }
00768 
00769         static inline float64_t trace(
00770             float64_t* mat, int32_t cols, int32_t rows)
00771         {
00772             float64_t trace=0;
00773             for (int32_t i=0; i<rows; i++)
00774                 trace+=mat[i*cols+i];
00775             return trace;
00776         }
00777 
00781         static void sort(int32_t *a, int32_t cols, int32_t sort_col=0);
00782         static void sort(float64_t *a, int32_t*idx, int32_t N);
00783 
00784         /*
00785          * Inline function to extract the byte at position p (from left)
00786          * of an 64 bit integer. The function is somewhat identical to 
00787          * accessing an array of characters via [].
00788          */
00789 
00791         template <class T>
00792             inline static void radix_sort(T* array, int32_t size)
00793             {
00794                 radix_sort_helper(array,size,0);
00795             }
00796 
00797         template <class T>
00798             static inline uint8_t byte(T word, uint16_t p)
00799             {
00800                 return (word >> (sizeof(T)-p-1) * 8) & 0xff;
00801             }
00802 
00803         template <class T>
00804             static void radix_sort_helper(T* array, int32_t size, uint16_t i)
00805             {
00806                 static size_t count[256], nc, cmin;
00807                 T *ak;
00808                 uint8_t c=0;
00809                 radix_stack_t<T> s[RADIX_STACK_SIZE], *sp, *olds, *bigs;
00810                 T *an, *aj, *pile[256];
00811                 size_t *cp, cmax;
00812 
00813                 /* Push initial array to stack */
00814                 sp = s;
00815                 radix_push(array, size, i);
00816 
00817                 /* Loop until all digits have been sorted */
00818                 while (sp>s) {
00819                     radix_pop(array, size, i);
00820                     an = array + size;
00821 
00822                     /* Make character histogram */
00823                     if (nc == 0) {
00824                         cmin = 0xff;
00825                         for (ak = array; ak < an; ak++) {
00826                             c = byte(*ak, i);
00827                             count[c]++;
00828                             if (count[c] == 1) {
00829                                 /* Determine smallest character */
00830                                 if (c < cmin)
00831                                     cmin = c;
00832                                 nc++;
00833                             }
00834                         }
00835 
00836                         /* Sort recursively if stack size too small */
00837                         if (sp + nc > s + RADIX_STACK_SIZE) {
00838                             radix_sort_helper(array, size, i);
00839                             continue;
00840                         }
00841                     }
00842 
00843                     /*
00844                      * Set pile[]; push incompletely sorted bins onto stack.
00845                      * pile[] = pointers to last out-of-place element in bins.
00846                      * Before permuting: pile[c-1] + count[c] = pile[c];
00847                      * during deal: pile[c] counts down to pile[c-1].
00848                      */
00849                     olds = bigs = sp;
00850                     cmax = 2;
00851                     ak = array;
00852                     pile[0xff] = an;
00853                     for (cp = count + cmin; nc > 0; cp++) {
00854                         /* Find next non-empty pile */
00855                         while (*cp == 0)
00856                             cp++;
00857                         /* Pile with several entries */
00858                         if (*cp > 1) {
00859                             /* Determine biggest pile */
00860                             if (*cp > cmax) {
00861                                 cmax = *cp;
00862                                 bigs = sp;
00863                             }
00864 
00865                             if (i < sizeof(T)-1)
00866                                 radix_push(ak, *cp, (uint16_t) (i + 1));
00867                         }
00868                         pile[cp - count] = ak += *cp;
00869                         nc--;
00870                     }
00871 
00872                     /* Play it safe -- biggest bin last. */
00873                     swap(*olds, *bigs);
00874 
00875                     /*
00876                      * Permute misplacements home. Already home: everything
00877                      * before aj, and in pile[c], items from pile[c] on.
00878                      * Inner loop:
00879                      *      r = next element to put in place;
00880                      *      ak = pile[r[i]] = location to put the next element.
00881                      *      aj = bottom of 1st disordered bin.
00882                      * Outer loop:
00883                      *      Once the 1st disordered bin is done, ie. aj >= ak,
00884                      *      aj<-aj + count[c] connects the bins in array linked list;
00885                      *      reset count[c].
00886                      */
00887                     aj = array;
00888                     while (aj<an)
00889                     {
00890                         T r;
00891 
00892                         for (r = *aj; aj < (ak = --pile[c = byte(r, i)]);)
00893                             swap(*ak, r);
00894 
00895                         *aj = r;
00896                         aj += count[c];
00897                         count[c] = 0;
00898                     }
00899                 }
00900             }
00901 
00904         template <class T>
00905             static void insertion_sort(T* output, int32_t size)
00906             {
00907                 for (int32_t i=0; i<size-1; i++)
00908                 {
00909                     int32_t j=i-1;
00910                     T value=output[i];
00911                     while (j >= 0 && output[j] > value)
00912                     {
00913                         output[j+1] = output[j];
00914                         j--;
00915                     }
00916                     output[j+1]=value;
00917                 }
00918             }
00919 
00920 
00923         template <class T>
00924             static void qsort(T* output, int32_t size)
00925             {
00926                 if (size==1)
00927                     return;
00928 
00929                 if (size==2)
00930                 {
00931                     if (output[0] > output [1])
00932                         swap(output[0],output[1]);
00933                     return;
00934                 }
00935                 //T split=output[random(0,size-1)];
00936                 T split=output[size/2];
00937 
00938                 int32_t left=0;
00939                 int32_t right=size-1;
00940 
00941                 while (left<=right)
00942                 {
00943                     while (output[left] < split)
00944                         left++;
00945                     while (output[right] > split)
00946                         right--;
00947 
00948                     if (left<=right)
00949                     {
00950                         swap(output[left],output[right]);
00951                         left++;
00952                         right--;
00953                     }
00954                 }
00955 
00956                 if (right+1> 1)
00957                     qsort(output,right+1);
00958 
00959                 if (size-left> 1)
00960                     qsort(&output[left],size-left);
00961             }
00962 
00964         template <class T> static void display_bits(T word, int32_t width=8*sizeof(T))
00965         {
00966             ASSERT(width>=0);
00967             for (int i=0; i<width; i++)
00968             {
00969                 T mask = ((T) 1)<<(sizeof(T)*8-1);
00970                 while (mask)
00971                 {
00972                     if (mask & word)
00973                         SG_SPRINT("1");
00974                     else
00975                         SG_SPRINT("0");
00976 
00977                     mask>>=1;
00978                 }
00979             }
00980         }
00981 
00983         template <class T> static void display_vector(
00984             const T* vector, int32_t n, const char* name="vector");
00985 
00987         template <class T> static void display_matrix(
00988             const T* matrix, int32_t rows, int32_t cols, const char* name="matrix");
00989 
00995         template <class T1,class T2>
00996             static void qsort_index(T1* output, T2* index, uint32_t size);
00997 
01003         template <class T1,class T2>
01004             static void qsort_backward_index(
01005                 T1* output, T2* index, int32_t size);
01006 
01014         template <class T1,class T2>
01015             inline static void parallel_qsort_index(T1* output, T2* index, uint32_t size, int32_t n_threads, int32_t limit=262144)
01016             {
01017                 int32_t n=0;
01018                 thread_qsort<T1,T2> t;
01019                 t.output=output;
01020                 t.index=index;
01021                 t.size=size;
01022                 t.qsort_threads=&n;
01023                 t.sort_limit=limit;
01024                 t.num_threads=n_threads;
01025                 parallel_qsort_index<T1,T2>(&t);
01026             }
01027 
01028 
01029         template <class T1,class T2>
01030             static void* parallel_qsort_index(void* p);
01031 
01032 
01033         /* finds the smallest element in output and puts that element as the 
01034            first element  */
01035         template <class T>
01036             static void min(float64_t* output, T* index, int32_t size);
01037 
01038         /* finds the n smallest elements in output and puts these elements as the 
01039            first n elements  */
01040         template <class T>
01041             static void nmin(
01042                 float64_t* output, T* index, int32_t size, int32_t n);
01043 
01044         /* performs a inplace unique of a vector of type T using quicksort
01045          * returns the new number of elements */
01046         template <class T>
01047             static int32_t unique(T* output, int32_t size)
01048             {
01049                 qsort(output, size);
01050                 int32_t i,j=0 ;
01051                 for (i=0; i<size; i++)
01052                     if (i==0 || output[i]!=output[i-1])
01053                         output[j++]=output[i];
01054                 return j ;
01055             }
01056 
01057         /* finds an element in a sorted array via binary search
01058          * returns -1 if not found */
01059         template <class T>
01060             static int32_t binary_search_helper(T* output, int32_t size, T elem)
01061             {
01062                 int32_t start=0;
01063                 int32_t end=size-1;
01064 
01065                 if (size<1)
01066                     return 0;
01067 
01068                 while (start<end)
01069                 {
01070                     int32_t middle=(start+end)/2; 
01071 
01072                     if (output[middle]>elem)
01073                         end=middle-1;
01074                     else if (output[middle]<elem)
01075                         start=middle+1;
01076                     else
01077                         return middle;
01078                 }
01079 
01080                 return start;
01081             }
01082 
01083         /* finds an element in a sorted array via binary search
01084          *     * returns -1 if not found */
01085         template <class T>
01086             static inline int32_t binary_search(T* output, int32_t size, T elem)
01087             {
01088                 int32_t ind = binary_search_helper(output, size, elem);
01089                 if (ind >= 0 && output[ind] == elem)
01090                     return ind;
01091                 return -1;
01092             }
01093 
01094         /* finds an element in a sorted array via binary search 
01095          * if it exists, else the index the largest smaller element
01096          * is returned
01097          * note: a successor is not mandatory */
01098         template <class T>
01099             static int32_t binary_search_max_lower_equal(
01100                 T* output, int32_t size, T elem)
01101             {
01102                 int32_t ind = binary_search_helper(output, size, elem);
01103 
01104                 if (output[ind]<=elem)
01105                     return ind;
01106                 if (ind>0 && output[ind-1] <= elem)
01107                     return ind-1;
01108                 return -1;
01109             }
01110 
01113         static float64_t Align(
01114             char * seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost);
01115 
01120         static int32_t calcroc(
01121             float64_t* fp, float64_t* tp, float64_t* output, int32_t* label,
01122             int32_t& size, int32_t& possize, int32_t& negsize,
01123             float64_t& tresh, FILE* rocfile);
01125 
01128         static float64_t mutual_info(float64_t* p1, float64_t* p2, int32_t len);
01129 
01132         static float64_t relative_entropy(
01133             float64_t* p, float64_t* q, int32_t len);
01134 
01136         static float64_t entropy(float64_t* p, int32_t len);
01137 
01139         inline static uint32_t get_seed()
01140         {
01141             return CMath::seed;
01142         }
01143 
01145         inline static int is_finite(double f)
01146         {
01147 #if defined(isfinite) && !defined(SUNOS)
01148             return isfinite(f);
01149 #else
01150             return finite(f);
01151 #endif
01152         }
01153 
01155         inline static int is_infinity(double f)
01156         {
01157 #ifdef SUNOS
01158             if (fpclass(f) == FP_NINF || fpclass(f) == FP_PINF)
01159                 return 1;
01160             else
01161                 return 0;
01162 #else
01163             return isinf(f);
01164 #endif
01165         }
01166 
01168         inline static int is_nan(double f)
01169         {
01170 #ifdef SUNOS
01171             return isnand(f);
01172 #else
01173             return isnan(f);
01174 #endif
01175         }
01176 
01177 
01188 #ifdef USE_LOGCACHE
01189         static inline float64_t logarithmic_sum(float64_t p, float64_t q)
01190         {
01191             float64_t diff;
01192 
01193             if (!CMath::finite(p))
01194                 return q;
01195 
01196             if (!CMath::finite(q))
01197             {
01198                 SG_SWARNING("INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
01199                 return NAN;
01200             }
01201             diff = p - q;
01202             if (diff > 0)
01203                 return diff > LOGRANGE? p : p + logtable[(int)(diff * LOGACCURACY)];
01204             return -diff > LOGRANGE? q : q + logtable[(int)(-diff * LOGACCURACY)];
01205         }
01206 
01208         static void init_log_table();
01209 
01211         static int32_t determine_logrange();
01212 
01214         static int32_t determine_logaccuracy(int32_t range);
01215 #else
01216         static inline float64_t logarithmic_sum(
01217                 float64_t p, float64_t q)
01218         {
01219             float64_t diff;
01220 
01221             if (!CMath::is_finite(p))
01222                 return q;
01223             if (!CMath::is_finite(q))
01224                 return p;
01225             diff = p - q;
01226             if (diff > 0)
01227                 return diff > LOGRANGE? p : p + log(1 + exp(-diff));
01228             return -diff > LOGRANGE? q : q + log(1 + exp(diff));
01229         }
01230 #endif
01231 #ifdef LOG_SUM_ARRAY
01232 
01237                 static inline float64_t logarithmic_sum_array(
01238                     float64_t *p, int32_t len)
01239                 {
01240                     if (len<=2)
01241                     {
01242                         if (len==2)
01243                             return logarithmic_sum(p[0],p[1]) ;
01244                         if (len==1)
01245                             return p[0];
01246                         return -INFTY ;
01247                     }
01248                     else
01249                     {
01250                         register float64_t *pp=p ;
01251                         if (len%2==1) pp++ ;
01252                         for (register int32_t j=0; j < len>>1; j++)
01253                             pp[j]=logarithmic_sum(pp[j<<1], pp[1+(j<<1)]) ;
01254                     }
01255                     return logarithmic_sum_array(p,len%2+len>>1) ;
01256                 } 
01257 #endif
01258 
01259 
01261                 inline virtual const char* get_name() const { return "Mathematics"; }
01262     public:
01265 
01266                 static const float64_t INFTY;
01267                 static const float64_t ALMOST_INFTY;
01268 
01270                 static const float64_t ALMOST_NEG_INFTY;
01271 
01273                 static int32_t LOGRANGE;
01274 
01276                 static uint32_t seed;
01277                 static char* rand_state;
01278 
01279 #ifdef USE_LOGCACHE 
01280 
01282                 static int32_t LOGACCURACY;
01284     protected:
01286                 static float64_t* logtable; 
01287 #endif
01288 };
01289 
01290 //implementations of template functions
01291 template <class T1,class T2>
01292 void* CMath::parallel_qsort_index(void* p)
01293     {
01294         struct thread_qsort<T1,T2>* ps=(thread_qsort<T1,T2>*) p;
01295         T1* output=ps->output;
01296         T2* index=ps->index;
01297         uint32_t size=ps->size;
01298         int32_t* qsort_threads=ps->qsort_threads;
01299         int32_t sort_limit=ps->sort_limit;
01300         int32_t num_threads=ps->num_threads;
01301 
01302         if (size==2)
01303         {
01304             if (output[0] > output [1])
01305             {
01306                 swap(output[0], output[1]);
01307                 swap(index[0], index[1]);
01308             }
01309             return NULL;
01310         }
01311         //T1 split=output[random(0,size-1)];
01312         T1 split=output[size/2];
01313 
01314         int32_t left=0;
01315         int32_t right=size-1;
01316 
01317         while (left<=right)
01318         {
01319             while (output[left] < split)
01320                 left++;
01321             while (output[right] > split)
01322                 right--;
01323 
01324             if (left<=right)
01325             {
01326                 swap(output[left], output[right]);
01327                 swap(index[left], index[right]);
01328                 left++;
01329                 right--;
01330             }
01331         }
01332         bool lthread_start=false;
01333         bool rthread_start=false;
01334         pthread_t lthread;
01335         pthread_t rthread;
01336         struct thread_qsort<T1,T2> t1;
01337         struct thread_qsort<T1,T2> t2;
01338 
01339         if (right+1> 1 && (right+1< sort_limit || *qsort_threads >= num_threads-1))
01340             qsort_index(output,index,right+1);
01341         else if (right+1> 1)
01342         {
01343             (*qsort_threads)++;
01344             lthread_start=true;
01345             t1.output=output;
01346             t1.index=index;
01347             t1.size=right+1;
01348             t1.qsort_threads=qsort_threads;
01349             t1.sort_limit=sort_limit;
01350             t1.num_threads=num_threads;
01351             if (pthread_create(&lthread, NULL, parallel_qsort_index<T1,T2>, &t1) != 0)
01352             {
01353                 lthread_start=false;
01354                 (*qsort_threads)--;
01355                 qsort_index(output,index,right+1);
01356             }
01357         }
01358 
01359 
01360         if (size-left> 1 && (size-left< sort_limit || *qsort_threads >= num_threads-1))
01361             qsort_index(&output[left],&index[left], size-left);
01362         else if (size-left> 1)
01363         {
01364             (*qsort_threads)++;
01365             rthread_start=true;
01366             t2.output=&output[left];
01367             t2.index=&index[left];
01368             t2.size=size-left;
01369             t2.qsort_threads=qsort_threads;
01370             t2.sort_limit=sort_limit;
01371             t2.num_threads=num_threads;
01372             if (pthread_create(&rthread, NULL, parallel_qsort_index<T1,T2>, &t2) != 0)
01373             {
01374                 rthread_start=false;
01375                 (*qsort_threads)--;
01376                 qsort_index(&output[left],&index[left], size-left);
01377             }
01378         }
01379 
01380         if (lthread_start)
01381         {
01382             pthread_join(lthread, NULL);
01383             (*qsort_threads)--;
01384         }
01385 
01386         if (rthread_start)
01387         {
01388             pthread_join(rthread, NULL);
01389             (*qsort_threads)--;
01390         }
01391 
01392         return NULL;
01393     }
01394 
01395     template <class T1,class T2>
01396 void CMath::qsort_index(T1* output, T2* index, uint32_t size)
01397 {
01398     if (size==1)
01399         return;
01400 
01401     if (size==2)
01402     {
01403         if (output[0] > output [1])
01404         {
01405             swap(output[0],output[1]);
01406             swap(index[0],index[1]);
01407         }
01408         return;
01409     }
01410     //T1 split=output[random(0,size-1)];
01411     T1 split=output[size/2];
01412 
01413     int32_t left=0;
01414     int32_t right=size-1;
01415 
01416     while (left<=right)
01417     {
01418         while (output[left] < split)
01419             left++;
01420         while (output[right] > split)
01421             right--;
01422 
01423         if (left<=right)
01424         {
01425             swap(output[left],output[right]);
01426             swap(index[left],index[right]);
01427             left++;
01428             right--;
01429         }
01430     }
01431 
01432     if (right+1> 1)
01433         qsort_index(output,index,right+1);
01434 
01435     if (size-left> 1)
01436         qsort_index(&output[left],&index[left], size-left);
01437 }
01438 
01439     template <class T1,class T2>
01440 void CMath::qsort_backward_index(T1* output, T2* index, int32_t size)
01441 {
01442     if (size==2)
01443     {
01444         if (output[0] < output [1])
01445         {
01446             swap(output[0],output[1]);
01447             swap(index[0],index[1]);
01448         }
01449         return;
01450     }
01451 
01452     //T1 split=output[random(0,size-1)];
01453     T1 split=output[size/2];
01454 
01455     int32_t left=0;
01456     int32_t right=size-1;
01457 
01458     while (left<=right)
01459     {
01460         while (output[left] > split)
01461             left++;
01462         while (output[right] < split)
01463             right--;
01464 
01465         if (left<=right)
01466         {
01467             swap(output[left],output[right]);
01468             swap(index[left],index[right]);
01469             left++;
01470             right--;
01471         }
01472     }
01473 
01474     if (right+1> 1)
01475         qsort_backward_index(output,index,right+1);
01476 
01477     if (size-left> 1)
01478         qsort_backward_index(&output[left],&index[left], size-left);
01479 }
01480 
01481     template <class T> 
01482 void CMath::nmin(float64_t* output, T* index, int32_t size, int32_t n)
01483 {
01484     if (6*n*size<13*size*CMath::log(size))
01485         for (int32_t i=0; i<n; i++)
01486             min(&output[i], &index[i], size-i) ;
01487     else
01488         qsort_index(output, index, size) ;
01489 }
01490 
01491 /* move the smallest entry in the array to the beginning */
01492     template <class T>
01493 void CMath::min(float64_t* output, T* index, int32_t size)
01494 {
01495     if (size<=1)
01496         return;
01497     float64_t min_elem=output[0];
01498     int32_t min_index=0;
01499     for (int32_t i=1; i<size; i++)
01500     {
01501         if (output[i]<min_elem)
01502         {
01503             min_index=i;
01504             min_elem=output[i];
01505         }
01506     }
01507     swap(output[0], output[min_index]);
01508     swap(index[0], index[min_index]);
01509 }
01510 }
01511 #endif