From ce1e3960a22476182e1e7e4b8076c1bda452f3f1 Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Sat, 18 Dec 2021 12:13:36 -0800 Subject: [PATCH] avoid standard polymers misqualified as ligands - overrule erroneous chem comp type in some files --- docs/interesting-pdb-entries.md | 5 ++ src/mol-model/structure/model/types.ts | 6 +++ .../helpers/structure-selection-query.ts | 53 ++++++++++++------- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/docs/interesting-pdb-entries.md b/docs/interesting-pdb-entries.md index 329566992..a1323f898 100644 --- a/docs/interesting-pdb-entries.md +++ b/docs/interesting-pdb-entries.md @@ -29,6 +29,11 @@ * Long linear sugar chain (4HG6) * Anisotropic B-factors/Ellipsoids (1EJG) * NOS bridges (LYS-CSO in 7B0L, 6ZWJ, 6ZWH) +* Non-polymer components in polymer entities + * PN2 in 1F80 + * ACE (many, e.g. 5AGU, 1E1X) + * ACY in 7ABY + * NH2 (many, e.g. 6Y13) Assembly symmetries * 5M30 (Assembly 1, C3 local and pseudo) diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index da3f5333c..e6c7b8080 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -321,6 +321,12 @@ export function getMoleculeType(compType: string, compId: string): MoleculeType if (SaccharideCompIdMap.has(compId)) { // trust our saccharide table more than given 'non-polymer' or 'other' component type return MoleculeType.Saccharide; + } else if (AminoAcidNames.has(compId)) { + return MoleculeType.Protein; + } else if (RnaBaseNames.has(compId)) { + return MoleculeType.RNA; + } else if (DnaBaseNames.has(compId)) { + return MoleculeType.DNA; } else { return MoleculeType.Other; } diff --git a/src/mol-plugin-state/helpers/structure-selection-query.ts b/src/mol-plugin-state/helpers/structure-selection-query.ts index a969a5f06..a8d3505aa 100644 --- a/src/mol-plugin-state/helpers/structure-selection-query.ts +++ b/src/mol-plugin-state/helpers/structure-selection-query.ts @@ -7,7 +7,7 @@ import { CustomProperty } from '../../mol-model-props/common/custom-property'; import { QueryContext, Structure, StructureQuery, StructureSelection, StructureProperties, StructureElement } from '../../mol-model/structure'; -import { BondType, NucleicBackboneAtoms, ProteinBackboneAtoms, SecondaryStructureType, AminoAcidNamesL, RnaBaseNames, DnaBaseNames, WaterNames, ElementSymbol } from '../../mol-model/structure/model/types'; +import { BondType, NucleicBackboneAtoms, ProteinBackboneAtoms, SecondaryStructureType, AminoAcidNamesL, RnaBaseNames, DnaBaseNames, WaterNames, ElementSymbol, PolymerNames } from '../../mol-model/structure/model/types'; import { PluginContext } from '../../mol-plugin/context'; import { MolScriptBuilder as MS } from '../../mol-script/language/builder'; import { Expression } from '../../mol-script/language/expression'; @@ -320,33 +320,46 @@ const branchedConnectedOnly = StructureSelectionQuery('Connected to Carbohydrate ]), { category: StructureSelectionCategory.Internal, isHidden: true }); const ligand = StructureSelectionQuery('Ligand', MS.struct.modifier.union([ - MS.struct.combinator.merge([ - MS.struct.modifier.union([ - MS.struct.generator.atomGroups({ - 'entity-test': MS.core.logic.and([ - MS.core.logic.or([ - MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']), - MS.core.rel.neq([MS.ammp('entityPrdId'), '']) - ]), - MS.core.logic.not([MS.core.str.match([ - MS.re('(oligosaccharide|lipid|ion)', 'i'), - MS.ammp('entitySubtype') - ])]) + MS.struct.modifier.exceptBy({ + 0: MS.struct.modifier.union([ + MS.struct.combinator.merge([ + MS.struct.modifier.union([ + MS.struct.generator.atomGroups({ + 'entity-test': MS.core.logic.and([ + MS.core.logic.or([ + MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']), + MS.core.rel.neq([MS.ammp('entityPrdId'), '']) + ]), + MS.core.logic.not([MS.core.str.match([ + MS.re('(oligosaccharide|lipid|ion)', 'i'), + MS.ammp('entitySubtype') + ])]) + ]), + 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), + 'residue-test': MS.core.logic.not([ + MS.core.str.match([MS.re('saccharide', 'i'), MS.ammp('chemCompType')]) + ]) + }) ]), - 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), - 'residue-test': MS.core.logic.not([ - MS.core.str.match([MS.re('saccharide', 'i'), MS.ammp('chemCompType')]) + MS.struct.modifier.union([ + MS.struct.generator.atomGroups({ + 'entity-test': MS.core.rel.eq([MS.ammp('entityType'), 'polymer']), + 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), + 'residue-test': _nonPolymerResidueTest + }) ]) - }) + ]), ]), - MS.struct.modifier.union([ + by: MS.struct.modifier.union([ MS.struct.generator.atomGroups({ 'entity-test': MS.core.rel.eq([MS.ammp('entityType'), 'polymer']), 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), - 'residue-test': _nonPolymerResidueTest + 'residue-test': MS.core.set.has([ + MS.set(...SetUtils.toArray(PolymerNames)), MS.ammp('label_comp_id') + ]) }) ]) - ]), + }) ]), { category: StructureSelectionCategory.Type }); // don't include branched entities as they have their own link representation -- GitLab