24 if ( (n_grams==1 && skips!=0) || (skips<0))
27 init(hash_bits, docs, tzer, normalize, n_grams, skips);
43 CTokenizer* tzer,
bool normalize, int32_t n_grams, int32_t skips)
59 SG_ADD(&
ngrams,
"ngrams",
"Number of tokens to combine for quadratic feature support",
93 SGVector<char> sv2 = hddf->doc_collection->get_feature_vector(vec_idx2);
102 hddf->doc_collection->free_feature_vector(sv2, vec_idx2);
124 const int32_t seed = 0xdeadbeaf;
128 while (n<ngrams-1+tokens_to_skip && local_tzer->has_next())
171 float64_t* vec2, int32_t vec2_len,
bool abs_val)
186 const int32_t seed = 0xdeadbeaf;
187 local_tzer->set_text(sv);
190 while (n<ngrams-1+tokens_to_skip && local_tzer->has_next())
192 index_t end = local_tzer->next_token_idx(start);
194 hashes->append_element(token_hash);
198 while (local_tzer->has_next())
200 index_t end = local_tzer->next_token_idx(start);
202 hashes->append_element(token_hash);
209 hashes->delete_element(0);
214 while (hashes->get_num_elements()>0)
222 hashes->delete_element(0);
233 int32_t length, int32_t num_bits, uint32_t seed)
236 return hash & ((1 <<
num_bits) - 1);
248 int32_t num_nnz_features = sv.
size();
250 return num_nnz_features;
272 return "HashedDocDotFeatures";