18 using namespace shogun;
28 "DNA",
"RAWDNA",
"RNA",
"PROTEIN",
"BINARY",
"ALPHANUM",
29 "CUBE",
"RAW",
"IUPAC_NUCLEIC_ACID",
"IUPAC_AMINO_ACID",
30 "NONE",
"DIGIT",
"DIGIT2",
"RAWDIGIT",
"RAWDIGIT2",
"UNKNOWN",
47 if (len>=(int32_t) strlen(
"DNA") && !strncmp(al,
"DNA", strlen(
"DNA")))
49 else if (len>=(int32_t) strlen(
"RAWDNA") && !strncmp(al,
"RAWDNA", strlen(
"RAWDNA")))
51 else if (len>=(int32_t) strlen(
"RNA") && !strncmp(al,
"RNA", strlen(
"RNA")))
53 else if (len>=(int32_t) strlen(
"PROTEIN") && !strncmp(al,
"PROTEIN", strlen(
"PROTEIN")))
55 else if (len>=(int32_t) strlen(
"BINARY") && !strncmp(al,
"BINARY", strlen(
"IBINARY")))
57 else if (len>=(int32_t) strlen(
"ALPHANUM") && !strncmp(al,
"ALPHANUM", strlen(
"ALPHANUM")))
59 else if (len>=(int32_t) strlen(
"CUBE") && !strncmp(al,
"CUBE", strlen(
"CUBE")))
61 else if (len>=(int32_t) strlen(
"DIGIT2") && !strncmp(al,
"DIGIT2", strlen(
"DIGIT2")))
63 else if (len>=(int32_t) strlen(
"DIGIT") && !strncmp(al,
"DIGIT", strlen(
"DIGIT")))
65 else if (len>=(int32_t) strlen(
"RAWDIGIT2") && !strncmp(al,
"RAWDIGIT2", strlen(
"RAWDIGIT2")))
67 else if (len>=(int32_t) strlen(
"RAWDIGIT") && !strncmp(al,
"RAWDIGIT", strlen(
"RAWDIGIT")))
69 else if (len>=(int32_t) strlen(
"SNP") && !strncmp(al,
"SNP", strlen(
"SNP")))
71 else if (len>=(int32_t) strlen(
"RAWSNP") && !strncmp(al,
"RAWSNP", strlen(
"RAWSNP")))
73 else if ((len>=(int32_t) strlen(
"BYTE") && !strncmp(al,
"BYTE", strlen(
"BYTE"))) ||
74 (len>=(int32_t) strlen(
"RAW") && !strncmp(al,
"RAW", strlen(
"RAW"))))
76 else if (len>=(int32_t) strlen(
"IUPAC_NUCLEIC_ACID") && !strncmp(al,
"IUPAC_NUCLEIC_ACID", strlen(
"IUPAC_NUCLEIC_ACID")))
78 else if (len>=(int32_t) strlen(
"IUPAC_AMINO_ACID") && !strncmp(al,
"IUPAC_AMINO_ACID", strlen(
"IUPAC_AMINO_ACID")))
81 SG_ERROR(
"unknown alphabet %s\n", al);
180 for (int32_t i=0; i<(1<<(8*
sizeof(uint8_t))); i++)
190 for (uint8_t i=0; i<=9; i++)
199 for (uint8_t i=0; i<=2; i++)
251 maptable_to_char[(uint8_t) 0]=
'0';
252 maptable_to_char[(uint8_t) 1]=
'1';
253 maptable_to_char[(uint8_t) 2]=
'2';
271 maptable_to_char[(uint8_t) 0]=
'1';
272 maptable_to_char[(uint8_t) 1]=
'2';
273 maptable_to_char[(uint8_t) 2]=
'3';
274 maptable_to_char[(uint8_t) 3]=
'4';
275 maptable_to_char[(uint8_t) 4]=
'5';
276 maptable_to_char[(uint8_t) 5]=
'6';
282 for (int32_t i=0; i<21; i++)
290 maptable_to_char[i]=
'A'+i+skip ;
302 maptable_to_char[0]=(uint8_t)
'0';
303 maptable_to_char[1]=(uint8_t)
'1';
308 for (int32_t i=0; i<26; i++)
312 maptable_to_char[i]=
'A'+i ;
314 for (int32_t i=0; i<10; i++)
318 maptable_to_char[26+i]=
'0'+i ;
326 for (int32_t i=0; i<256; i++)
330 maptable_to_char[i]=i;
346 maptable_to_char[
B_A]=
'A';
347 maptable_to_char[
B_C]=
'C';
348 maptable_to_char[
B_G]=
'G';
349 maptable_to_char[
B_T]=
'T';
354 for (int32_t i=0; i<4; i++)
358 maptable_to_char[i]=i;
376 maptable_to_char[
B_A]=
'A';
377 maptable_to_char[
B_C]=
'C';
378 maptable_to_char[
B_G]=
'G';
379 maptable_to_char[
B_T]=
'T';
380 maptable_to_char[
B_0]=
'0';
385 for (int32_t i=0; i<5; i++)
389 maptable_to_char[i]=i;
405 maptable_to_char[
B_A]=
'A';
406 maptable_to_char[
B_C]=
'C';
407 maptable_to_char[
B_G]=
'G';
408 maptable_to_char[
B_T]=
'U';
446 maptable_to_char[0]=(uint8_t)
'A';
447 maptable_to_char[1]=(uint8_t)
'C';
448 maptable_to_char[2]=(uint8_t)
'G';
449 maptable_to_char[3]=(uint8_t)
'T';
450 maptable_to_char[4]=(uint8_t)
'U';
451 maptable_to_char[5]=(uint8_t)
'R';
452 maptable_to_char[6]=(uint8_t)
'Y';
453 maptable_to_char[7]=(uint8_t)
'M';
454 maptable_to_char[8]=(uint8_t)
'K';
455 maptable_to_char[9]=(uint8_t)
'W';
456 maptable_to_char[10]=(uint8_t)
'S';
457 maptable_to_char[11]=(uint8_t)
'B';
458 maptable_to_char[12]=(uint8_t)
'D';
459 maptable_to_char[13]=(uint8_t)
'H';
460 maptable_to_char[14]=(uint8_t)
'V';
461 maptable_to_char[15]=(uint8_t)
'N';
513 maptable_to_char[0]=(uint8_t)
'A';
514 maptable_to_char[1]=(uint8_t)
'R';
515 maptable_to_char[2]=(uint8_t)
'N';
516 maptable_to_char[3]=(uint8_t)
'D';
517 maptable_to_char[4]=(uint8_t)
'C';
518 maptable_to_char[5]=(uint8_t)
'Q';
519 maptable_to_char[6]=(uint8_t)
'E';
520 maptable_to_char[7]=(uint8_t)
'G';
521 maptable_to_char[8]=(uint8_t)
'H';
522 maptable_to_char[9]=(uint8_t)
'I';
523 maptable_to_char[10]=(uint8_t)
'L';
524 maptable_to_char[11]=(uint8_t)
'K';
525 maptable_to_char[12]=(uint8_t)
'M';
526 maptable_to_char[13]=(uint8_t)
'F';
527 maptable_to_char[14]=(uint8_t)
'P';
528 maptable_to_char[15]=(uint8_t)
'S';
529 maptable_to_char[16]=(uint8_t)
'T';
530 maptable_to_char[17]=(uint8_t)
'W';
531 maptable_to_char[18]=(uint8_t)
'Y';
532 maptable_to_char[19]=(uint8_t)
'V';
533 maptable_to_char[20]=(uint8_t)
'B';
534 maptable_to_char[21]=(uint8_t)
'Z';
535 maptable_to_char[22]=(uint8_t)
'X';
552 for (int32_t i=(int32_t) (1 <<(
sizeof(uint8_t)*8))-1;i>=0; i--)
567 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
588 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
604 for (int32_t i=0; i<(int32_t) (1 <<(
sizeof(uint8_t)*8)); i++)
613 if (!result && print_error)
616 SG_ERROR(
"ALPHABET does not contain all symbols in histogram\n");
630 SG_ERROR(
"ALPHABET too small to contain all symbols in histogram\n");
645 SG_ERROR(
"Histogram has %d elements, but %d elements where expected\n",
704 void CAlphabet::init()
719 "Number of symbols.");
748 for (i=sequence_length-1; i>= p_order-1; i--)
751 for (j=i; j>=i-p_order+1; j--)
752 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1)));
757 for (i=p_order-2;i>=0;i--)
759 if (i>=sequence_length)
763 for (j=i; j>=i-p_order+1; j--)
765 value= (value >> max_val);
766 if (j>=0 && j<sequence_length)
767 value|=obs[j] << (max_val * (p_order-1));
775 for (i=start; i<sequence_length; i++)
786 for (i=sequence_length-1; i>= p_order-1; i--)
789 for (j=i; j>=i-p_order+1; j--)
790 value= (value << max_val) | obs[j];
795 for (i=p_order-2;i>=0;i--)
797 if (i>=sequence_length)
801 for (j=i; j>=i-p_order+1; j--)
803 value= (value << max_val);
804 if (j>=0 && j<sequence_length)
813 for (i=start; i<sequence_length; i++)
823 const int32_t start_gap=(p_order-gap)/2;
824 const int32_t end_gap=start_gap+gap;
830 for (i=sequence_length-1; i>=p_order-1; i--)
833 for (j=i; j>=i-p_order+1; j--)
837 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
839 else if (i-j>=end_gap)
841 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
848 for (i=p_order-2;i>=0;i--)
850 if (i>=sequence_length)
854 for (j=i; j>=i-p_order+1; j--)
858 value= (value >> max_val);
859 if (j>=0 && j<sequence_length)
860 value|=obs[j] << (max_val * (p_order-1-gap));
862 else if (i-j>=end_gap)
864 value= (value >> max_val);
865 if (j>=0 && j<sequence_length)
866 value|=obs[j] << (max_val * (p_order-1-gap));
875 for (i=start; i<sequence_length; i++)
885 const int32_t start_gap=(p_order-gap)/2;
886 const int32_t end_gap=start_gap+gap;
892 for (i=sequence_length-1; i>=p_order-1; i--)
895 for (j=i; j>=i-p_order+1; j--)
898 value= (value << max_val) | obs[j];
899 else if (i-j>=end_gap)
900 value= (value << max_val) | obs[j];
906 for (i=p_order-2;i>=0;i--)
908 if (i>=sequence_length)
912 for (j=i; j>=i-p_order+1; j--)
916 value= value << max_val;
917 if (j>=0 && j<sequence_length)
920 else if (i-j>=end_gap)
922 value= value << max_val;
923 if (j>=0 && j<sequence_length)
933 for (i=start; i<sequence_length; i++)
962 template void CAlphabet::translate_from_single_order<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
963 template void CAlphabet::translate_from_single_order<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
964 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
965 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
966 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
967 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
968 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
969 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
970 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
971 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
973 template void CAlphabet::translate_from_single_order<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
974 template void CAlphabet::translate_from_single_order<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
975 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
976 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
977 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
978 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
979 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
980 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
981 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
982 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
984 template void CAlphabet::translate_from_single_order_reversed<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
985 template void CAlphabet::translate_from_single_order_reversed<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
986 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
987 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
988 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
989 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
990 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
991 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
992 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
993 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
995 template void CAlphabet::translate_from_single_order_reversed<bool>(
bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
996 template void CAlphabet::translate_from_single_order_reversed<char>(
char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
997 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
998 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
999 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1000 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1001 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1002 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1003 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1004 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1005 template void CAlphabet::translate_from_single_order_reversed<float32_t>(
float32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1006 template void CAlphabet::translate_from_single_order_reversed<float64_t>(
float64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
1007 template void CAlphabet::translate_from_single_order_reversed<floatmax_t>(
floatmax_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);