Skip to content

Commit

Permalink
#2356 - Library ambiguous peptides loaded as mixtures from FASTA
Browse files Browse the repository at this point in the history
Fix import subtype. Fix UTs
  • Loading branch information
AliaksandrDziarkach committed Sep 14, 2024
1 parent 3f841ab commit 604ff18
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 60 deletions.
4 changes: 2 additions & 2 deletions api/tests/integration/tests/formats/ket_to_helm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def find_diff(a, b):
"helm_mixed_base": "RNA1{[dR](A)P.[dR](A+G)P.[dR](A)P.[dR](G+C)}$$$$V2.0",
"helm_mixed_custom": "RNA1{[dR](A:10+[Xan]:20+G:30+T:50)P.[dR](A:10+C:20+G:30+T:50)P.[dR](A+C+G+T)}$$$$V2.0",
"helm_aminoacids_variants": "PEPTIDE1{([Dha]+N).(L+I).(E+Q).(A+C+D+E+F+G+H+I+K+L+M+N+O+P+Q+R+S+T+U+V+W+Y)}$$$$V2.0",
"dna_variants": "RNA1{[dR](C+G+T)P.[dR](A+C+G+T)}$$$$V2.0",
"rna_variants": "RNA1{R(G+T)P.R(A+C+G+T)}$$$$V2.0",
"dna_variants": "RNA1{[dR](C,G,T)P.[dR](A,C,G,T)}$$$$V2.0",
"rna_variants": "RNA1{R(G,T)P.R(A,C,G,T)}$$$$V2.0",
"helm_monomer_molecule": "PEPTIDE1{A}|PEPTIDE2{G}|CHEM1{[C(N[*:2])=C[*:1] |$;;_R2;;_R1$|]}$CHEM1,PEPTIDE1,1:R2-1:R1$$$V2.0",
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4667,7 +4667,7 @@
},
"ambiguousMonomerTemplate-B": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "B",
"alias": "B",
"options": [
Expand All @@ -4681,7 +4681,7 @@
},
"ambiguousMonomerTemplate-J": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "J",
"alias": "J",
"options": [
Expand All @@ -4695,7 +4695,7 @@
},
"ambiguousMonomerTemplate-Z": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "Z",
"alias": "Z",
"options": [
Expand All @@ -4709,7 +4709,7 @@
},
"ambiguousMonomerTemplate-X": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "X",
"alias": "X",
"options": [
Expand Down
4 changes: 2 additions & 2 deletions api/tests/integration/tests/formats/ref/dna_variants.ket
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@
},
"ambiguousMonomerTemplate-B": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "B",
"alias": "B",
"options": [
Expand All @@ -1218,7 +1218,7 @@
},
"ambiguousMonomerTemplate-N": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "N",
"alias": "N",
"options": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,13 @@
"$ref": "ambiguousMonomerTemplate-Var0"
},
{
"$ref": "ambiguousMonomerTemplate-J"
"$ref": "ambiguousMonomerTemplate-J1"
},
{
"$ref": "ambiguousMonomerTemplate-Z"
"$ref": "ambiguousMonomerTemplate-Z2"
},
{
"$ref": "ambiguousMonomerTemplate-X"
"$ref": "ambiguousMonomerTemplate-X3"
}
]
},
Expand All @@ -152,8 +152,8 @@
"y": -0.000000
},
"seqid": 2,
"alias": "J",
"templateId": "J"
"alias": "J1",
"templateId": "J1"
},
"ambiguousMonomer-2": {
"type": "ambiguousMonomer",
Expand All @@ -163,8 +163,8 @@
"y": -0.000000
},
"seqid": 3,
"alias": "Z",
"templateId": "Z"
"alias": "Z2",
"templateId": "Z2"
},
"ambiguousMonomer-3": {
"type": "ambiguousMonomer",
Expand All @@ -174,8 +174,8 @@
"y": -0.000000
},
"seqid": 4,
"alias": "X",
"templateId": "X"
"alias": "X3",
"templateId": "X3"
},
"monomerTemplate-Dha___2-aminoprop-2-enoic acid": {
"type": "monomerTemplate",
Expand Down Expand Up @@ -4813,11 +4813,11 @@
}
]
},
"ambiguousMonomerTemplate-J": {
"ambiguousMonomerTemplate-J1": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "J",
"alias": "J",
"id": "J1",
"alias": "J1",
"options": [
{
"templateId": "L___Leucine"
Expand All @@ -4827,11 +4827,11 @@
}
]
},
"ambiguousMonomerTemplate-Z": {
"ambiguousMonomerTemplate-Z2": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "Z",
"alias": "Z",
"id": "Z2",
"alias": "Z2",
"options": [
{
"templateId": "E___Glutamic acid"
Expand All @@ -4841,11 +4841,11 @@
}
]
},
"ambiguousMonomerTemplate-X": {
"ambiguousMonomerTemplate-X3": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "X",
"alias": "X",
"id": "X3",
"alias": "X3",
"options": [
{
"templateId": "A___Alanine"
Expand Down
24 changes: 12 additions & 12 deletions api/tests/integration/tests/formats/ref/helm_mixed_base.ket
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,10 @@
"$ref": "monomerTemplate-C___Cytosine"
},
{
"$ref": "ambiguousMonomerTemplate-R"
"$ref": "ambiguousMonomerTemplate-Var0"
},
{
"$ref": "ambiguousMonomerTemplate-S"
"$ref": "ambiguousMonomerTemplate-Var1"
}
]
},
Expand Down Expand Up @@ -223,8 +223,8 @@
"y": -1.600000
},
"seqid": 5,
"alias": "R",
"templateId": "R"
"alias": "Var0",
"templateId": "Var0"
},
"monomer5": {
"type": "monomer",
Expand Down Expand Up @@ -289,8 +289,8 @@
"y": -1.600000
},
"seqid": 11,
"alias": "S",
"templateId": "S"
"alias": "Var1",
"templateId": "Var1"
},
"monomerTemplate-dR___Deoxy-Ribose": {
"type": "monomerTemplate",
Expand Down Expand Up @@ -1172,11 +1172,11 @@
}
]
},
"ambiguousMonomerTemplate-R": {
"ambiguousMonomerTemplate-Var0": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "R",
"alias": "R",
"id": "Var0",
"alias": "Var0",
"options": [
{
"templateId": "A___Adenine"
Expand All @@ -1186,11 +1186,11 @@
}
]
},
"ambiguousMonomerTemplate-S": {
"ambiguousMonomerTemplate-Var1": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "S",
"alias": "S",
"id": "Var1",
"alias": "Var1",
"options": [
{
"templateId": "G___Guanine"
Expand Down
24 changes: 12 additions & 12 deletions api/tests/integration/tests/formats/ref/helm_mixed_custom.ket
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@
"$ref": "ambiguousMonomerTemplate-Var0"
},
{
"$ref": "ambiguousMonomerTemplate-N1"
"$ref": "ambiguousMonomerTemplate-Var1"
},
{
"$ref": "ambiguousMonomerTemplate-N"
"$ref": "ambiguousMonomerTemplate-Var2"
}
]
},
Expand Down Expand Up @@ -190,8 +190,8 @@
"y": -1.600000
},
"seqid": 5,
"alias": "N1",
"templateId": "N1"
"alias": "Var1",
"templateId": "Var1"
},
"monomer5": {
"type": "monomer",
Expand Down Expand Up @@ -223,8 +223,8 @@
"y": -1.600000
},
"seqid": 8,
"alias": "N",
"templateId": "N"
"alias": "Var2",
"templateId": "Var2"
},
"monomerTemplate-dR___Deoxy-Ribose": {
"type": "monomerTemplate",
Expand Down Expand Up @@ -1515,11 +1515,11 @@
}
]
},
"ambiguousMonomerTemplate-N1": {
"ambiguousMonomerTemplate-Var1": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "N1",
"alias": "N1",
"id": "Var1",
"alias": "Var1",
"options": [
{
"templateId": "A___Adenine",
Expand All @@ -1539,11 +1539,11 @@
}
]
},
"ambiguousMonomerTemplate-N": {
"ambiguousMonomerTemplate-Var2": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"id": "N",
"alias": "N",
"id": "Var2",
"alias": "Var2",
"options": [
{
"templateId": "A___Adenine"
Expand Down
4 changes: 2 additions & 2 deletions api/tests/integration/tests/formats/ref/rna_variants.ket
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,7 @@
},
"ambiguousMonomerTemplate-K": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "K",
"alias": "K",
"options": [
Expand All @@ -1232,7 +1232,7 @@
},
"ambiguousMonomerTemplate-N": {
"type": "ambiguousMonomerTemplate",
"subtype": "mixture",
"subtype": "alternatives",
"id": "N",
"alias": "N",
"options": [
Expand Down
16 changes: 8 additions & 8 deletions core/indigo-core/molecule/src/sequence_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,24 +369,24 @@ void SequenceLoader::addMonomer(KetDocument& document, const std::string& monome
_alias_to_id.emplace(monomer, checkAddTemplate(document, monomer_class, monomer));
else if (!document.hasVariantMonomerTemplate(monomer))
{
std::optional<std::reference_wrapper<const std::vector<std::string>>> mixture;
std::optional<std::reference_wrapper<const std::vector<std::string>>> alternatives;
if (seq_type == SeqType::PEPTIDESeq)
{
const auto& it = STANDARD_MIXED_PEPTIDES.find(monomer);
if (it == STANDARD_MIXED_PEPTIDES.end())
throw Error("Unknown mixed peptide '%s'", monomer.c_str());
mixture.emplace(std::cref(it->second));
alternatives.emplace(std::cref(it->second));
}
else
{
const auto& it = STANDARD_MIXED_BASES.find(monomer);
if (it == STANDARD_MIXED_BASES.end())
throw Error("Unknown mixed base '%s'", monomer.c_str());
mixture.emplace(std::cref(it->second));
alternatives.emplace(std::cref(it->second));
}

std::vector<KetVariantMonomerOption> options;
for (auto template_alias : mixture.value().get())
for (auto template_alias : alternatives.value().get())
{
auto& template_id = _library.getMonomerTemplateIdByAlias(monomer_class, template_alias);
if (template_id.size() == 0)
Expand All @@ -396,7 +396,7 @@ void SequenceLoader::addMonomer(KetDocument& document, const std::string& monome
checkAddTemplate(document, monomer_template);
_alias_to_id.emplace(template_alias, template_id);
}
auto& templ = document.addVariantMonomerTemplate("mixture", monomer, monomer, IdtAlias(), options);
auto& templ = document.addVariantMonomerTemplate("alternatives", monomer, monomer, IdtAlias(), options);
static const std::map<std::string, KetAttachmentPoint> aa_aps{{"R1", -1}, {"R2", -1}};
static const std::map<std::string, KetAttachmentPoint> base_aps{{"R1", -1}};
if (seq_type == SeqType::PEPTIDESeq)
Expand Down Expand Up @@ -1488,7 +1488,7 @@ SequenceLoader::MonomerInfo SequenceLoader::readHelmMonomer(KetDocument& documen
if (monomer_class == MonomerClass::AminoAcid)
{
if (STANDARD_MIXED_PEPTIDES_TO_ALIAS.count(aliases) > 0)
if (is_mixture && no_counts)
if (!is_mixture && no_counts)
monomer_alias = STANDARD_MIXED_PEPTIDES_TO_ALIAS.at(aliases);
else
monomer_alias = STANDARD_MIXED_PEPTIDES_TO_ALIAS.at(aliases) + std::to_string(_unknown_variants_count++);
Expand All @@ -1497,8 +1497,8 @@ SequenceLoader::MonomerInfo SequenceLoader::readHelmMonomer(KetDocument& documen
}
else if (monomer_class == MonomerClass::Base)
{
if (is_mixture && STANDARD_MIXED_BASES_TO_ALIAS.count(aliases) > 0)
if (is_mixture && no_counts)
if (!is_mixture && STANDARD_MIXED_BASES_TO_ALIAS.count(aliases) > 0)
if (!is_mixture && no_counts)
monomer_alias = STANDARD_MIXED_BASES_TO_ALIAS.at(aliases);
else
monomer_alias = STANDARD_MIXED_BASES_TO_ALIAS.at(aliases) + std::to_string(_unknown_variants_count++);
Expand Down

0 comments on commit 604ff18

Please sign in to comment.