00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Shashwat Lal Das 00008 * Written (W) 2012 Fernando José Iglesias García 00009 * Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society. 00010 */ 00011 00012 #include <shogun/loss/LogLoss.h> 00013 00014 using namespace shogun; 00015 00016 float64_t CLogLoss::loss(float64_t z) 00017 { 00018 return (z >= 0) ? log(1 + exp(-z)) : -z + log(1 + exp(z)); 00019 } 00020 00021 float64_t CLogLoss::first_derivative(float64_t z) 00022 { 00023 if (z < 0) 00024 return -1 / (exp(z) + 1); 00025 00026 float64_t ez = exp(-z); 00027 return -ez / (ez + 1); 00028 } 00029 00030 float64_t CLogLoss::second_derivative(float64_t z) 00031 { 00032 float64_t ez = exp(z); 00033 return ez / (ez*(ez + 2) + 1); 00034 } 00035 00036 float64_t CLogLoss::get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm) 00037 { 00038 float64_t w,x; 00039 float64_t d = exp(label * prediction); 00040 if(eta_t < 1e-6){ 00041 /* As with squared loss, for small eta_t we replace the update 00042 * with its first order Taylor expansion to avoid numerical problems 00043 */ 00044 return label*eta_t/((1+d)*norm); 00045 } 00046 x = eta_t + label*prediction + d; 00047 00048 /* This piece of code is approximating W(exp(x))-x. 00049 * W is the Lambert W function: W(z)*exp(W(z))=z. 00050 * The absolute error of this approximation is less than 9e-5. 00051 * Faster/better approximations can be substituted here. 00052 */ 00053 float64_t W = x>=1. ? 0.86*x+0.01 : exp(0.8*x-0.65); //initial guess 00054 float64_t r = x>=1. ? x-log(W)-W : 0.2*x+0.65-W; //residual 00055 float64_t t = 1.+W; 00056 float64_t u = 2.*t*(t+2.*r/3.); //magic 00057 w = W*(1.+r/t*(u-r)/(u-2.*r))-x; //more magic 00058 00059 return -(label*w+prediction)/norm; 00060 } 00061 00062 float64_t CLogLoss::get_square_grad(float64_t prediction, float64_t label) 00063 { 00064 float64_t d = CLossFunction::first_derivative(prediction, label); 00065 return d*d; 00066 } 00067