29 "DNA",
"RAWDNA",
"RNA",
"PROTEIN",
"BINARY",
"ALPHANUM",
30 "CUBE",
"RAW",
"IUPAC_NUCLEIC_ACID",
"IUPAC_AMINO_ACID",
31 "NONE",
"DIGIT",
"DIGIT2",
"RAWDIGIT",
"RAWDIGIT2",
"UNKNOWN",
48 if (len>=(int32_t) strlen(
"DNA") && !strncmp(al,
"DNA", strlen(
"DNA")))
50 else if (len>=(int32_t) strlen(
"RAWDNA") && !strncmp(al,
"RAWDNA", strlen(
"RAWDNA")))
52 else if (len>=(int32_t) strlen(
"RNA") && !strncmp(al,
"RNA", strlen(
"RNA")))
54 else if (len>=(int32_t) strlen(
"PROTEIN") && !strncmp(al,
"PROTEIN", strlen(
"PROTEIN")))
56 else if (len>=(int32_t) strlen(
"BINARY") && !strncmp(al,
"BINARY", strlen(
"IBINARY")))
58 else if (len>=(int32_t) strlen(
"ALPHANUM") && !strncmp(al,
"ALPHANUM", strlen(
"ALPHANUM")))
60 else if (len>=(int32_t) strlen(
"CUBE") && !strncmp(al,
"CUBE", strlen(
"CUBE")))
62 else if (len>=(int32_t) strlen(
"DIGIT2") && !strncmp(al,
"DIGIT2", strlen(
"DIGIT2")))
64 else if (len>=(int32_t) strlen(
"DIGIT") && !strncmp(al,
"DIGIT", strlen(
"DIGIT")))
66 else if (len>=(int32_t) strlen(
"RAWDIGIT2") && !strncmp(al,
"RAWDIGIT2", strlen(
"RAWDIGIT2")))
68 else if (len>=(int32_t) strlen(
"RAWDIGIT") && !strncmp(al,
"RAWDIGIT", strlen(
"RAWDIGIT")))
70 else if (len>=(int32_t) strlen(
"SNP") && !strncmp(al,
"SNP", strlen(
"SNP")))
72 else if (len>=(int32_t) strlen(
"RAWSNP") && !strncmp(al,
"RAWSNP", strlen(
"RAWSNP")))
74 else if ((len>=(int32_t) strlen(
"BYTE") && !strncmp(al,
"BYTE", strlen(
"BYTE"))) ||
75 (len>=(int32_t) strlen(
"RAW") && !strncmp(al,
"RAW", strlen(
"RAW"))))
77 else if (len>=(int32_t) strlen(
"IUPAC_NUCLEIC_ACID") && !strncmp(al,
"IUPAC_NUCLEIC_ACID", strlen(
"IUPAC_NUCLEIC_ACID")))
79 else if (len>=(int32_t) strlen(
"IUPAC_AMINO_ACID") && !strncmp(al,
"IUPAC_AMINO_ACID", strlen(
"IUPAC_AMINO_ACID")))
82 SG_ERROR(
"unknown alphabet %s\n", al)
99 REQUIRE(a,
"No Alphabet specified!\n");
181 for (int32_t i=0; i<(1<<(8*
sizeof(uint8_t))); i++)
191 for (uint8_t i=0; i<=9; i++)
200 for (uint8_t i=0; i<=2; i++)
252 maptable_to_char[(uint8_t) 0]=
'0';
253 maptable_to_char[(uint8_t) 1]=
'1';
254 maptable_to_char[(uint8_t) 2]=
'2';
272 maptable_to_char[(uint8_t) 0]=
'1';
273 maptable_to_char[(uint8_t) 1]=
'2';
274 maptable_to_char[(uint8_t) 2]=
'3';
275 maptable_to_char[(uint8_t) 3]=
'4';
276 maptable_to_char[(uint8_t) 4]=
'5';
277 maptable_to_char[(uint8_t) 5]=
'6';
283 for (int32_t i=0; i<21; i++)
291 maptable_to_char[i]=
'A'+i+skip ;
303 maptable_to_char[0]=(uint8_t)
'0';
304 maptable_to_char[1]=(uint8_t)
'1';
309 for (int32_t i=0; i<26; i++)
313 maptable_to_char[i]=
'A'+i ;
315 for (int32_t i=0; i<10; i++)
319 maptable_to_char[26+i]=
'0'+i ;
327 for (int32_t i=0; i<256; i++)
331 maptable_to_char[i]=i;
347 maptable_to_char[
B_A]=
'A';
348 maptable_to_char[
B_C]=
'C';
349 maptable_to_char[
B_G]=
'G';
350 maptable_to_char[
B_T]=
'T';
355 for (int32_t i=0; i<4; i++)
359 maptable_to_char[i]=i;
377 maptable_to_char[
B_A]=
'A';
378 maptable_to_char[
B_C]=
'C';
379 maptable_to_char[
B_G]=
'G';
380 maptable_to_char[
B_T]=
'T';
381 maptable_to_char[
B_0]=
'0';
386 for (int32_t i=0; i<5; i++)
390 maptable_to_char[i]=i;
406 maptable_to_char[
B_A]=
'A';
407 maptable_to_char[
B_C]=
'C';
408 maptable_to_char[
B_G]=
'G';
409 maptable_to_char[
B_T]=
'U';
447 maptable_to_char[0]=(uint8_t)
'A';
448 maptable_to_char[1]=(uint8_t)
'C';
449 maptable_to_char[2]=(uint8_t)
'G';
450 maptable_to_char[3]=(uint8_t)
'T';
451 maptable_to_char[4]=(uint8_t)
'U';
452 maptable_to_char[5]=(uint8_t)
'R';
453 maptable_to_char[6]=(uint8_t)
'Y';
454 maptable_to_char[7]=(uint8_t)
'M';
455 maptable_to_char[8]=(uint8_t)
'K';
456 maptable_to_char[9]=(uint8_t)
'W';
457 maptable_to_char[10]=(uint8_t)
'S';
458 maptable_to_char[11]=(uint8_t)
'B';
459 maptable_to_char[12]=(uint8_t)
'D';
460 maptable_to_char[13]=(uint8_t)
'H';
461 maptable_to_char[14]=(uint8_t)
'V';
462 maptable_to_char[15]=(uint8_t)
'N';
514 maptable_to_char[0]=(uint8_t)
'A';
515 maptable_to_char[1]=(uint8_t)
'R';
516 maptable_to_char[2]=(uint8_t)
'N';
517 maptable_to_char[3]=(uint8_t)
'D';
518 maptable_to_char[4]=(uint8_t)
'C';
519 maptable_to_char[5]=(uint8_t)
'Q';
520 maptable_to_char[6]=(uint8_t)
'E';
521 maptable_to_char[7]=(uint8_t)
'G';
522 maptable_to_char[8]=(uint8_t)
'H';
523 maptable_to_char[9]=(uint8_t)
'I';
524 maptable_to_char[10]=(uint8_t)
'L';
525 maptable_to_char[11]=(uint8_t)
'K';
526 maptable_to_char[12]=(uint8_t)
'M';
527 maptable_to_char[13]=(uint8_t)
'F';
528 maptable_to_char[14]=(uint8_t)
'P';
529 maptable_to_char[15]=(uint8_t)
'S';
530 maptable_to_char[16]=(uint8_t)
'T';
531 maptable_to_char[17]=(uint8_t)
'W';
532 maptable_to_char[18]=(uint8_t)
'Y';
533 maptable_to_char[19]=(uint8_t)
'V';
534 maptable_to_char[20]=(uint8_t)
'B';
535 maptable_to_char[21]=(uint8_t)
'Z';
536 maptable_to_char[22]=(uint8_t)
'X';
553 for (int32_t i=(int32_t) (1 <<(
sizeof(uint8_t)*8))-1;i>=0; i--)
568 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
589 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
605 SG_PRINT(
" - Character not in Alphabet.\n")
621 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
630 if (!result && print_error)
633 SG_ERROR(
"ALPHABET does not contain all symbols in histogram\n")
647 SG_ERROR(
"ALPHABET too small to contain all symbols in histogram\n")
662 SG_ERROR(
"Histogram has %d elements, but %d elements where expected\n",
721 void CAlphabet::init()
736 "Number of symbols.");
765 for (i=sequence_length-1; i>= p_order-1; i--)
768 for (j=i; j>=i-p_order+1; j--)
769 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1)));
774 for (i=p_order-2;i>=0;i--)
776 if (i>=sequence_length)
780 for (j=i; j>=i-p_order+1; j--)
782 value= (value >> max_val);
783 if (j>=0 && j<sequence_length)
784 value|=obs[j] << (max_val * (p_order-1));
792 for (i=start; i<sequence_length; i++)
803 for (i=sequence_length-1; i>= p_order-1; i--)
806 for (j=i; j>=i-p_order+1; j--)
807 value= (value << max_val) | obs[j];
812 for (i=p_order-2;i>=0;i--)
814 if (i>=sequence_length)
818 for (j=i; j>=i-p_order+1; j--)
820 value= (value << max_val);
821 if (j>=0 && j<sequence_length)
830 for (i=start; i<sequence_length; i++)
840 const int32_t start_gap=(p_order-gap)/2;
841 const int32_t end_gap=start_gap+gap;
847 for (i=sequence_length-1; i>=p_order-1; i--)
850 for (j=i; j>=i-p_order+1; j--)
854 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
856 else if (i-j>=end_gap)
858 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
865 for (i=p_order-2;i>=0;i--)
867 if (i>=sequence_length)
871 for (j=i; j>=i-p_order+1; j--)
875 value= (value >> max_val);
876 if (j>=0 && j<sequence_length)
877 value|=obs[j] << (max_val * (p_order-1-gap));
879 else if (i-j>=end_gap)
881 value= (value >> max_val);
882 if (j>=0 && j<sequence_length)
883 value|=obs[j] << (max_val * (p_order-1-gap));
892 for (i=start; i<sequence_length; i++)
902 const int32_t start_gap=(p_order-gap)/2;
903 const int32_t end_gap=start_gap+gap;
909 for (i=sequence_length-1; i>=p_order-1; i--)
912 for (j=i; j>=i-p_order+1; j--)
915 value= (value << max_val) | obs[j];
916 else if (i-j>=end_gap)
917 value= (value << max_val) | obs[j];
923 for (i=p_order-2;i>=0;i--)
925 if (i>=sequence_length)
929 for (j=i; j>=i-p_order+1; j--)
933 value= value << max_val;
934 if (j>=0 && j<sequence_length)
937 else if (i-j>=end_gap)
939 value= value << max_val;
940 if (j>=0 && j<sequence_length)
950 for (i=start; i<sequence_length; i++)
979 template void CAlphabet::translate_from_single_order<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
980 template void CAlphabet::translate_from_single_order<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
981 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
982 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
983 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
984 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
985 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
986 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
987 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
988 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
990 template void CAlphabet::translate_from_single_order<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
991 template void CAlphabet::translate_from_single_order<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
992 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
993 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
994 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
995 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
996 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
997 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
998 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
999 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1001 template void CAlphabet::translate_from_single_order_reversed<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1002 template void CAlphabet::translate_from_single_order_reversed<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1003 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1004 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1005 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1006 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1007 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1008 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1009 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1010 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
1012 template void CAlphabet::translate_from_single_order_reversed<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1013 template void CAlphabet::translate_from_single_order_reversed<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1014 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1015 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1016 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1017 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1018 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1019 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1020 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1021 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1022 template void CAlphabet::translate_from_single_order_reversed<float32_t>(
float32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1023 template void CAlphabet::translate_from_single_order_reversed<float64_t>(
float64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1024 template void CAlphabet::translate_from_single_order_reversed<floatmax_t>(
floatmax_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
bool valid_chars[1<< (sizeof(uint8_t)*8)]
int32_t get_num_symbols_in_histogram()
int32_t get_num_bits_in_histogram()
static const char * get_alphabet_name(EAlphabet alphabet)
RAWDNA - letters 0,1,2,3.
void copy_histogram(CAlphabet *src)
uint8_t maptable_to_bin[1<< (sizeof(uint8_t)*8)]
EAlphabet
Alphabet of charfeatures/observations.
int32_t get_max_value_in_histogram()
bool check_alphabet_size(bool print_error=true)
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
void print_histogram()
print histogram
EAlphabet get_alphabet() const
static void translate_from_single_order(ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
The class Alphabet implements an alphabet and alphabet utility functions.
static const uint8_t MAPTABLE_UNDEF
static const char * alphabet_names[18]
virtual void load_serializable_post()
static void translate_from_single_order_reversed(ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
void add(bool *param, const char *name, const char *description="")
Class SGObject is the base class of all shogun objects.
RAWSNP - letters 0,1,2,3,4.
uint8_t maptable_to_char[1<< (sizeof(uint8_t)*8)]
bool set_alphabet(EAlphabet alpha)
NONE - type has no alphabet.
void clear_histogram()
clear histogram
bool check_alphabet(bool print_error=true)
SGVector< int64_t > get_histogram()
int32_t get_num_bits() const
virtual void load_serializable_post()
all of classes and functions are contained in the shogun namespace
int64_t histogram[1<< (sizeof(uint8_t)*8)]