22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 struct HASHEDWD_THREAD_PARAM
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
42 "CHashedWDFeaturesTransposed::CHashedWDFeaturesTransposed()",
64 int32_t start_order, int32_t order, int32_t from_order,
76 int32_t transposed_num_feat=0;
77 int32_t transposed_num_vec=0;
100 :
CDotFeatures(orig), strings(orig.strings), transposed_strings(orig.transposed_strings),
101 degree(orig.degree), start_degree(orig.start_degree),
102 from_degree(orig.from_degree), m_hash_bits(orig.m_hash_bits),
103 normalization_const(orig.normalization_const)
133 bool free_vec1, free_vec2;
136 uint8_t* vec2=wdf->strings->get_feature_vector(vec_idx2, len2, free_vec2);
142 for (int32_t i=0; i<len1; i++)
144 for (int32_t j=0; (i+j<len1) && (j<
degree); j++)
146 if (vec1[i+j]!=vec2[i+j])
153 wdf->strings->free_feature_vector(vec2, vec_idx2, free_vec2);
159 if (vec2_len !=
w_dim)
160 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim)
166 uint32_t* val=SG_MALLOC(uint32_t, len);
172 for (int32_t i=0; i < len; i++)
178 for (int32_t k=0; k<
degree && i+k<len; k++)
185 #ifdef DEBUG_HASHEDWD
186 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h)
188 sum+=vec2[o+(h &
mask)]*wd;
209 uint32_t* index=SG_MALLOC(uint32_t, stop);
211 int32_t num_vectors=stop-start;
220 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim)
226 HASHEDWD_THREAD_PARAM params;
228 params.sub_index=NULL;
229 params.output=output;
232 params.alphas=alphas;
235 params.progress=
false;
242 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
243 HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
244 int32_t step= num_vectors/num_threads;
248 for (t=0; t<num_threads-1; t++)
251 params[t].sub_index=NULL;
252 params[t].output = output;
253 params[t].start = start+t*step;
254 params[t].stop = start+(t+1)*step;
255 params[t].alphas=alphas;
258 params[t].progress =
false;
259 params[t].index=index;
260 pthread_create(&threads[t], NULL,
265 params[t].sub_index=NULL;
266 params[t].output = output;
267 params[t].start = start+t*step;
268 params[t].stop = stop;
269 params[t].alphas=alphas;
272 params[t].progress =
false;
273 params[t].index=index;
276 for (t=0; t<num_threads-1; t++)
277 pthread_join(threads[t], NULL);
287 SG_INFO(
"prematurely stopped. \n")
296 uint32_t* index=SG_MALLOC(uint32_t, num);
304 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim)
310 HASHEDWD_THREAD_PARAM params;
312 params.sub_index=sub_index;
313 params.output=output;
316 params.alphas=alphas;
319 params.progress=
false;
326 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
327 HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
328 int32_t step= num/num_threads;
332 for (t=0; t<num_threads-1; t++)
335 params[t].sub_index=sub_index;
336 params[t].output = output;
337 params[t].start = t*step;
338 params[t].stop = (t+1)*step;
339 params[t].alphas=alphas;
342 params[t].progress =
false;
343 params[t].index=index;
344 pthread_create(&threads[t], NULL,
349 params[t].sub_index=sub_index;
350 params[t].output = output;
351 params[t].start = t*step;
352 params[t].stop = num;
353 params[t].alphas=alphas;
356 params[t].progress =
false;
357 params[t].index=index;
360 for (t=0; t<num_threads-1; t++)
361 pthread_join(threads[t], NULL);
371 SG_INFO(
"prematurely stopped. \n")
377 HASHEDWD_THREAD_PARAM* par=(HASHEDWD_THREAD_PARAM*) p;
379 int32_t* sub_index=par->sub_index;
381 int32_t start=par->start;
382 int32_t stop=par->stop;
386 bool progress=par->progress;
387 uint32_t* index=par->index;
398 for (int32_t j=start; j<stop; j++)
408 uint8_t* dim=transposed_strings[i+k].
string;
411 for (int32_t j=start; j<stop; j++)
413 uint8_t bval=dim[sub_index[j]];
415 index[j] = 0xDEADBEAF;
422 index[j], carry, chunk);
424 output[j]+=vec[o + (h &
mask)]*wd;
430 index[stop-1], carry, chunk);
434 offs+=partial_w_dim*
degree;
440 for (int32_t j=start; j<stop; j++)
443 output[j]=output[j]*alphas[sub_index[j]]/normalization_const+bias;
445 output[j]=output[j]/normalization_const+bias;
459 uint8_t* dim=transposed_strings[i+k].
string;
463 for (int32_t j=start; j<stop; j++)
465 uint8_t bval=dim[sub_index[j]];
467 index[j] = 0xDEADBEAF;
474 index[j], carry, chunk);
477 output[j]+=vec[o + (h &
mask)]*wd;
481 index[stop-1], carry, chunk);
485 offs+=partial_w_dim*
degree;
491 for (int32_t j=start; j<stop; j++)
494 output[j]=output[j]*alphas[j]/normalization_const+bias;
496 output[j]=output[j]/normalization_const+bias;
505 if (vec2_len !=
w_dim)
506 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim)
511 uint32_t* val=SG_MALLOC(uint32_t, len);
520 for (int32_t i=0; i<len; i++)
526 for (int32_t k=0; k<
degree && i+k<len; k++)
533 #ifdef DEBUG_HASHEDWD
534 SG_PRINT(
"offs=%d o=%d h=%d \n", offs, o, h)
535 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d\n", vec[i], k,offs, o)
537 vec2[o+(h &
mask)]+=wd;
560 for (int32_t i=0; i<
degree; i++)
563 SG_DEBUG(
"created HashedWDFeaturesTransposed with d=%d (%d), alphabetsize=%d, "
564 "dim=%d partial_dim=%d num=%d, len=%d\n",
575 for (int32_t i=0; i<
degree; i++)
virtual int32_t get_max_vector_length()
SGVector< ST > get_feature_vector(int32_t num)
CHashedWDFeaturesTransposed()
static void fill_vector(T *vec, int32_t len, T value)
virtual void free_feature_iterator(void *iterator)
void set_normalization_const(float64_t n=0)
int32_t get_num_threads() const
float64_t normalization_const
virtual int32_t get_num_vectors() const
virtual int32_t get_num_vectors() const
#define SG_NOTIMPLEMENTED
The class Alphabet implements an alphabet and alphabet utility functions.
static uint32_t FinalizeIncrementalMurmurHash3(uint32_t h, uint32_t carry, uint32_t total_length)
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
Features that support dot products among other operations.
virtual void * get_feature_iterator(int32_t vector_index)
CStringFeatures< ST > * get_transposed()
static void clear_cancel()
virtual EFeatureClass get_feature_class() const
int32_t get_num_symbols() const
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
virtual EFeatureClass get_feature_class() const =0
CAlphabet * get_alphabet()
static bool cancel_computations()
bool have_same_length(int32_t len=-1)
static void IncrementalMurmurHash3(uint32_t *hash, uint32_t *carry, uint8_t *data, int32_t len)
all of classes and functions are contained in the shogun namespace
CStringFeatures< uint8_t > * strings
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
SGString< uint8_t > * transposed_strings
The class Features is the base class of all feature objects.
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
static void * dense_dot_range_helper(void *p)
void progress(float64_t current_val, float64_t min_val=0.0, float64_t max_val=1.0, int32_t decimals=1, const char *prefix="PROGRESS:\t")
Features that compute the Weighted Degreee Kernel feature space explicitly.
virtual EFeatureType get_feature_type() const
virtual void dense_dot_range_subset(int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
virtual ~CHashedWDFeaturesTransposed()
static float32_t sqrt(float32_t x)
#define SG_UNSTABLE(func,...)
virtual CFeatures * duplicate() const
virtual EFeatureType get_feature_type() const =0
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)