18 #include <shogun/lib/external/libqp.h>
20 using namespace shogun;
51 void CLinearTimeMMD::init()
70 bool multiple_kernels)
75 "features p required!\n",
get_name());
77 "features q required!\n",
get_name());
79 REQUIRE(
m_kernel,
"%s::compute_statistic_and_variance: kernel needed!\n",
84 "%s::compute_statistic_and_variance: multiple kernels specified,"
85 "but underlying kernel is not of type K_COMBINED\n",
get_name());
98 SG_DEBUG(
"computing MMD and variance for %d sub-kernels\n",
110 REQUIRE(statistic.
vlen==num_kernels,
"%s::compute_statistic_and_variance: "
111 "statistic vector size (%d) does not match number of kernels (%d)\n",
114 REQUIRE(variance.
vlen==num_kernels,
"%s::compute_statistic_and_variance: "
115 "variance vector size (%d) does not match number of kernels (%d)\n",
131 index_t num_examples_processed=0;
132 while (num_examples_processed<m_2)
137 SG_DEBUG(
"processing %d more examples. %d so far processed. Blocksize "
138 "is %d\n", num_this_run, num_examples_processed,
m_blocksize);
172 copy.
add(num_this_run);
174 copy.
add(num_this_run);
176 copy.
add(num_this_run);
195 if (multiple_kernels)
197 SG_DEBUG(
"using multiple kernels\n");
201 for (
index_t i=0; i<num_kernels; ++i)
204 if (multiple_kernels)
210 kernel->
init(p1, p2);
213 kernel->
init(q1, q2);
216 kernel->
init(p1, q2);
219 kernel->
init(q1, p2);
225 for (
index_t j=0; j<num_this_run; ++j)
228 current=pp[j]+qq[j]-pq[j]-qp[j];
231 delta=current-statistic[i];
232 statistic[i]+=delta/term_counters[i]++;
233 variance[i]+=delta*(current-statistic[i]);
235 SG_DEBUG(
"burst: current=%f, delta=%f, statistic=%f, "
236 "variance=%f, kernel_idx=%d\n", current, delta,
237 statistic[i], variance[i], i);
240 if (multiple_kernels)
253 num_examples_processed+=num_this_run;
255 SG_DEBUG(
"Done compouting statistic, processed 2*%d examples.\n",
256 num_examples_processed);
265 for (
index_t i=0; i<num_kernels; ++i)
266 variance[i]=variance[i]/(m_2-1)/m_2;
280 "features p required!\n",
get_name());
282 "features q required!\n",
get_name());
289 "%s::compute_statistic_and_Q: underlying kernel is not of "
296 REQUIRE(
m_m>=4,
"%s::compute_statistic_and_Q: Need at least m>=4\n",
303 index_t num_kernels=combined->get_num_subkernels();
304 REQUIRE(num_kernels>0,
"%s::compute_statistic_and_Q: At least one kernel "
315 REQUIRE(statistic.
vlen==num_kernels,
"%s::compute_statistic_and_variance: "
316 "statistic vector size (%d) does not match number of kernels (%d)\n",
319 REQUIRE(Q.
num_rows==num_kernels,
"%s::compute_statistic_and_variance: "
320 "Q number of rows does (%d) not match number of kernels (%d)\n",
323 REQUIRE(Q.
num_cols==num_kernels,
"%s::compute_statistic_and_variance: "
324 "Q number of columns (%d) does not match number of kernels (%d)\n",
335 for (
index_t k_idx=0; k_idx<combined->get_num_kernels(); k_idx++)
337 CKernel* kernel = combined->get_kernel(k_idx);
349 index_t num_examples_processed=0;
350 while (num_examples_processed<m_4)
355 SG_DEBUG(
"processing %d more examples. %d so far processed. Blocksize "
356 "is %d\n", num_this_run, num_examples_processed,
m_blocksize);
402 copy.
add(num_this_run);
404 copy.
add(num_this_run);
406 copy.
add(num_this_run);
408 copy.
add(num_this_run);
410 copy.
add(num_this_run);
412 copy.
add(num_this_run);
414 copy.
add(num_this_run);
448 for (
index_t i=0; i<num_kernels; ++i)
455 kernel_i->
init(p1a, p2a);
457 kernel_i->
init(q1a, q2a);
459 kernel_i->
init(p1a, q2a);
461 kernel_i->
init(q1a, p2a);
463 for (
index_t it=0; it<num_this_run; ++it)
464 h_i_a[it]=pp[it]+qq[it]-pq[it]-qp[it];
467 kernel_i->
init(p1b, p2b);
469 kernel_i->
init(q1b, q2b);
471 kernel_i->
init(p1b, q2b);
473 kernel_i->
init(q1b, p2b);
475 for (
index_t it=0; it<num_this_run; ++it)
476 h_i_b[it]=pp[it]+qq[it]-pq[it]-qp[it];
488 kernel_j->
init(p1a, p2a);
490 kernel_j->
init(q1a, q2a);
492 kernel_j->
init(p1a, q2a);
494 kernel_j->
init(q1a, p2a);
496 for (
index_t it=0; it<num_this_run; ++it)
497 h_j_a[it]=pp[it]+qq[it]-pq[it]-qp[it];
500 kernel_j->
init(p1b, p2b);
502 kernel_j->
init(q1b, q2b);
504 kernel_j->
init(p1b, q2b);
506 kernel_j->
init(q1b, p2b);
508 for (
index_t it=0; it<num_this_run; ++it)
509 h_j_b[it]=pp[it]+qq[it]-pq[it]-qp[it];
512 for (
index_t it=0; it<num_this_run; ++it)
515 term=(h_i_a[it]-h_i_b[it])*(h_j_a[it]-h_j_b[it]);
520 Q(i, j)+=(term-Q(i, j))/term_counters_Q(i, j)++;
533 for (
index_t it=0; it<num_this_run; ++it)
537 statistic[i]=statistic[i]+
538 (h_i_a[it]-statistic[i])/term_counters_statistic[i]++;
541 statistic[i]=statistic[i]+
542 (h_i_b[it]-statistic[i])/(term_counters_statistic[i]++);
560 num_examples_processed+=num_this_run;
567 SG_DEBUG(
"Done compouting statistic, processed 4*%d examples.\n",
568 num_examples_processed);
584 bool multiple_kernels)
588 "%s::compute_statistic: multiple kernels specified,"
589 "but underlying kernel is not of type K_COMBINED\n",
get_name());
711 SG_ERROR(
"%s::set_p_and_q(): Method not implemented since linear time mmd"
712 " is based on streaming features\n",
get_name());
717 SG_ERROR(
"%s::get_p_and_q(): Method not implemented since linear time mmd"
718 " is based on streaming features\n",
get_name());