10 #ifndef __BITSTRING_H__
11 #define __BITSTRING_H__
56 word_len = width*nbits;
59 for (int32_t j=0; j<word_len; j++)
60 mask=(mask<<1) | (uint64_t) 1;
61 mask<<=
sizeof(uint64_t)*8-word_len;
64 for (int32_t j=0; j<nbits; j++)
65 mask=(mask<<1) | (uint64_t) 1;
91 uint64_t stream_len=len/
sizeof(uint64_t)+1;
92 string=SG_MALLOC(uint64_t, stream_len);
98 uint64_t nfit=8*
sizeof(w)/nbits;
99 for (uint64_t i=0; i<len; i++)
101 w= (w << nbits) | alphabet->
remap_to_bin((uint8_t) str[j]);
103 if (i % nfit == nfit-1)
137 if (!id_len ||
id[0]!=
'>')
138 SG_SERROR(
"No fasta hunks (lines starting with '>') found\n")
145 int32_t spanned_lines=0;
156 SG_SERROR(
"Error reading fasta entry in line %d len=%ld", spanned_lines+1, len)
159 SG_SERROR(
"Multiple fasta hunks (lines starting with '>') are not supported!\n")
168 uint64_t nfit=8*
sizeof(w)/nbits;
170 len = fasta_len-spanned_lines;
171 uint64_t stream_len=len/(nfit)+1;
172 string=SG_MALLOC(uint64_t, stream_len);
178 for (int32_t j=0; j<fasta_len; j++, k++)
186 w= (w << nbits) | alphabet->
remap_to_bin((uint8_t) fasta[j]);
188 if (k % nfit == nfit-1)
197 string[idx]=w<<(nbits*(nfit - k%nfit));
226 uint64_t stream_len=len/
sizeof(uint64_t)+1;
227 string=SG_MALLOC(uint64_t, stream_len);
270 int32_t ws=8*
sizeof(uint64_t);
271 uint64_t i=bitindex/ws;
272 int32_t j=bitindex % ws;
273 int32_t missing=word_len-(ws-j);
276 uint64_t res= ((
string[i] << j) & mask ) >> (ws-word_len);
279 res|= (
string[i+1] >> (ws-missing) );
296 int32_t ws=8*
sizeof(uint64_t);
297 uint64_t i=bitindex/ws;
298 int32_t j=bitindex % ws;
299 int32_t missing=word_len-(ws-j);
301 uint64_t sl = j-word_len;
319 string[i] = (
string[i] & (~ml) ) | ( wl & ml);
325 string[i+1] = (
string[i+1] & (~mr) ) | ( wr & mr);
334 virtual const char*
get_name()
const {
return "BitString"; }
348 uint64_t single_mask;
351 #endif //__BITSTRING_H__
static void fill_vector(T *vec, int32_t len, T value)
a string class embedding a string in a compact bit representation
EAlphabet
Alphabet of charfeatures/observations.
uint64_t get_length() const
The class Alphabet implements an alphabet and alphabet utility functions.
uint8_t remap_to_bin(uint8_t c)
void set_binary_word(uint16_t word, uint64_t index)
virtual const char * get_name() const
char * get_line(uint64_t &len, uint64_t &offs)
Class SGObject is the base class of all shogun objects.
void load_fasta_file(const char *fname, bool ignore_invalid=false)
int32_t get_num_bits() const
void set_string(uint64_t *str, uint64_t len)
all of classes and functions are contained in the shogun namespace
void obtain_from_char(char *str, uint64_t len)
void create(uint64_t len)
#define SG_UNSTABLE(func,...)
uint64_t operator[](uint64_t index) const
CBitString(EAlphabet alpha, int32_t width=1)