diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts index 29a3c2e3ad5b573d009157cdd73bfcb2474db535..7a35b55203149878a443912a8c3c12e9b744f885 100644 --- a/src/mol-model-formats/structure/mmcif/parser.ts +++ b/src/mol-model-formats/structure/mmcif/parser.ts @@ -144,10 +144,13 @@ function getChemicalComponentMap(format: mmCIF_Format): Model['properties']['che function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { const map = new Map<string, SaccharideComponent>(); - const { pdbx_chem_comp_identifier } = format.data - if (pdbx_chem_comp_identifier._rowCount > 0) { - const { comp_id, type, identifier } = pdbx_chem_comp_identifier - for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) { + + if (format.data.pdbx_chem_comp_identifier._rowCount > 0) { + // note that `pdbx_chem_comp_identifier` does not contain + // a 'SNFG CARB SYMBOL' entry for 'Unknown' saccharide components + // so we always need to check `chem_comp` for those + const { comp_id, type, identifier } = format.data.pdbx_chem_comp_identifier + for (let i = 0, il = comp_id.rowCount; i < il; ++i) { if (type.value(i) === 'SNFG CARB SYMBOL') { const snfgName = identifier.value(i) const saccharideComp = SaccharidesSnfgMap.get(snfgName) @@ -158,21 +161,24 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap } } } - } else if (format.data.chem_comp._rowCount > 0) { + } + + if (format.data.chem_comp._rowCount > 0) { const { id, type } = format.data.chem_comp for (let i = 0, il = id.rowCount; i < il; ++i) { const _id = id.value(i) + if (map.has(_id)) continue const _type = type.value(i) if (SaccharideCompIdMap.has(_id)) { map.set(_id, SaccharideCompIdMap.get(_id)!) - } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) { + } else if (getMoleculeType(_type, _id) === MoleculeType.saccharide) { map.set(_id, UnknownSaccharideComponent) } } } else { const uniqueNames = getUniqueComponentNames(format) SaccharideCompIdMap.forEach((v, k) => { - if (uniqueNames.has(k)) map.set(k, v) + if (!map.has(k) && uniqueNames.has(k)) map.set(k, v) }) } return map diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index 64863fcf0610b7ac75f9a7456e65db462bcc0a8d..1302c2fe162aab2725f6c6d8de932312db602389 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -224,7 +224,12 @@ export function getMoleculeType(compType: string, compId: string) { } else if (IonNames.has(compId)) { return MoleculeType.ion } else if (OtherComponentTypeNames.has(compType)) { - return MoleculeType.other + if (SaccharideCompIdMap.has(compId)) { + // trust our saccharide table more than given 'non-polymer' or 'other' component type + return MoleculeType.saccharide + } else { + return MoleculeType.other + } } else { return MoleculeType.unknown } @@ -679,5 +684,5 @@ export const ResidueHydrophobicity = { 'TRP': [ -1.85, -2.09, -0.24 ], 'TYR': [ -0.94, -0.71, 0.23 ], 'VAL': [ 0.07, -0.46, -0.53 ] - } - export const DefaultResidueHydrophobicity = [ 0.00, 0.00, 0.00 ] \ No newline at end of file +} +export const DefaultResidueHydrophobicity = [ 0.00, 0.00, 0.00 ] \ No newline at end of file diff --git a/src/mol-model/structure/structure/carbohydrates/constants.ts b/src/mol-model/structure/structure/carbohydrates/constants.ts index 75d23bb9ab999885943b4b3bf97352732f5415f5..7dcb8b51ad3d98cf012da8b64d85098817915ec3 100644 --- a/src/mol-model/structure/structure/carbohydrates/constants.ts +++ b/src/mol-model/structure/structure/carbohydrates/constants.ts @@ -172,6 +172,19 @@ const Monosaccharides: SaccharideComponent[] = [ { abbr: 'Tag', name: 'Tagatose', color: SaccharideColors.Yellow, type: SaccharideType.Assigned }, { abbr: 'Sor', name: 'Sorbose', color: SaccharideColors.Orange, type: SaccharideType.Assigned }, { abbr: 'Psi', name: 'Psicose', color: SaccharideColors.Pink, type: SaccharideType.Assigned }, + + { abbr: 'Hexose', name: 'Hexose', color: SaccharideColors.Secondary, type: SaccharideType.Hexose }, + { abbr: 'HexNAc', name: 'HexNAc', color: SaccharideColors.Secondary, type: SaccharideType.HexNAc }, + { abbr: 'Hexosamine', name: 'Hexosamine', color: SaccharideColors.Secondary, type: SaccharideType.Hexosamine }, + { abbr: 'Hexuronate', name: 'Hexuronate', color: SaccharideColors.Secondary, type: SaccharideType.Hexuronate }, + { abbr: 'Deoxyhexose', name: 'Deoxyhexose', color: SaccharideColors.Secondary, type: SaccharideType.Deoxyhexose }, + { abbr: 'DeoxyhexNAc', name: 'DeoxyhexNAc', color: SaccharideColors.Secondary, type: SaccharideType.DeoxyhexNAc }, + { abbr: 'Di-deoxyhexose', name: 'Di-deoxyhexose', color: SaccharideColors.Secondary, type: SaccharideType.DiDeoxyhexose }, + { abbr: 'Pentose', name: 'Pentose', color: SaccharideColors.Secondary, type: SaccharideType.Pentose }, + { abbr: 'Deoxynonulosonate', name: 'Deoxynonulosonate', color: SaccharideColors.Secondary, type: SaccharideType.Deoxynonulosonate }, + { abbr: 'Di-deoxynonulosonate', name: 'Di-deoxynonulosonate', color: SaccharideColors.Secondary, type: SaccharideType.DiDeoxynonulosonate }, + { abbr: 'Unknown', name: 'Unknown', color: SaccharideColors.Secondary, type: SaccharideType.Unknown }, + { abbr: 'Assigned', name: 'Assigned', color: SaccharideColors.Secondary, type: SaccharideType.Assigned }, ] export const SaccharidesSnfgMap = (function () { @@ -200,33 +213,27 @@ const CommonSaccharideNames: { [k: string]: string[] } = { // Hexose Glc: [ 'GLC', 'BGC', - 'BOG', // via GlyFinder - 'TRE', // via GlyFinder, di-saccharide but homomer - 'MLR', // via GlyFinder, tri-saccharide but homomer + 'TRE', // di-saccharide but homomer + 'MLR', // tri-saccharide but homomer ], Man: ['MAN', 'BMA'], - Gal: [ - 'GAL', 'GLA', - 'GXL' // via PubChem - ], - Gul: ['GUP', 'GL0'], - Alt: ['ALT'], - All: ['ALL', 'AFD'], - Tal: ['TAL'], - Ido: ['4N2'], + Gal: ['GLA', 'GAL', 'GZL'], + Gul: ['4GL', 'GL0'], + Alt: ['Z6H', '3MK'], + All: ['AFD', 'ALL'], + Tal: [], + Ido: ['Z0F', '4N2'], // HexNAc - GlcNAc: ['NAG', 'NDG'], - ManNAc: ['NGA', 'A2G'], - GulNAc: [], + GlcNAc: ['NDG', 'NAG'], + ManNAc: ['BM3', 'BM7'], + GalNAc: ['A2G', 'NGA'], + GulNAc: ['LXB'], AltNAc: [], AllNAc: ['NAA'], TalNAc: [], - IdoNAc: ['HSQ'], + IdoNAc: [], // Hexosamine - GlcN: [ - 'GCS', 'PA1', - 'IDU', 'SGN', 'SUS', // via GlyFinder - ], + GlcN: ['PA1', 'GCS'], ManN: ['95Z'], GalN: ['X6X', '1GN'], GulN: [], @@ -237,66 +244,76 @@ const CommonSaccharideNames: { [k: string]: string[] } = { // Hexuronate GlcA: ['GCU', 'BDP'], ManA: ['MAV', 'BEM'], - GalA: ['ADA', 'GTR'], - GulA: ['LGU'], + GalA: ['ADA', 'GTR', 'GTK'], + GulA: [], AltA: [], AllA: [], - TalA: ['X0X', 'X1X'], - IdoA: [ - 'IDR', - 'IDS', // via GlyFinder - ], + TalA: ['X1X', 'X0X'], + IdoA: ['IDR'], // Deoxyhexose - Qui: ['G6D'], + Qui: ['G6D', 'YYK'], Rha: ['RAM', 'RM4'], - '6dGul': [], + '6dGul': ['66O'], '6dAlt': [], '6dTal': [], Fuc: ['FUC', 'FUL'], // DeoxyhexNAc - QuiNAc: [], + QuiNAc: ['Z9W'], RhaNAc: [], '6dAltNAc': [], '6dTalNAc': [], FucNAc: [], // Di-deoxyhexose - Oli: ['DDA'], + Oli: ['DDA', 'RAE', 'Z5J'], Tyv: ['TYV'], Abe: ['ABE'], Par: ['PZU'], - Dig: [], + Dig: ['Z3U'], Col: [], // Pentose - Ara: ['ARA', 'ARB'], - Lyx: ['LDY'], - Xyl: ['XYS', 'XYP'], - Rib: ['RIP', '0MK'], + Ara: ['ARA', 'ARB', 'AHR', 'FUB'], + Lyx: ['LDY', 'Z4W'], + Xyl: ['XZS', 'XYP', 'XYZ'], + Rib: ['YYM', 'RIP', 'RIB', 'BDR'], // Deoxynonulosonate - Kdn: ['KDN', 'KDM'], + Kdn: ['KDM', 'KDN'], Neu5Ac: ['SIA', 'SLB'], Neu5Gc: ['NGC', 'NGE'], Neu: [], Sia: [], // Di-deoxynonulosonate - Pse: ['6PZ'], + Pse: [], Leg: [], Aci: [], '4eLeg': [], // Unknown - Bac: ['B6D'], + Bac: [], LDManHep: ['GMH'], Kdo: ['KDO'], Dha: [], - DDManHep: [], - MurNAc: ['AMU'], + DDManHep: ['289'], + MurNAc: ['MUB', 'AMU'], MurNGc: [], - Mur: ['MUR'], + Mur: ['1S4', 'MUR'], // Assigned Api: ['XXM'], - Fru: ['BDF'], + Fru: ['BDF', 'Z9N', 'FRU'], Tag: ['T6T'], Sor: ['SOE'], - Psi: [], + Psi: ['PSV'], + // Generic + Hexose: [], + HexNAc: [], + Hexosamine: [], + Hexuronate: [], + Deoxyhexose: [], + DeoxyhexNAc: [], + 'Di-deoxyhexose': [], + Pentose: [], + Deoxynonulosonate: [], + 'Di-deoxynonulosonate': [], + Unknown: [], + Assigned: ['PUF'], } const UnknownSaccharideNames = [ diff --git a/src/mol-plugin/util/structure-selection-helper.ts b/src/mol-plugin/util/structure-selection-helper.ts index a3416b0e4b812902e75d52d3f77586c7516018f4..1a221e6b715086f4098b0bad5f891df4eefff036 100644 --- a/src/mol-plugin/util/structure-selection-helper.ts +++ b/src/mol-plugin/util/structure-selection-helper.ts @@ -114,8 +114,11 @@ const branchedConnectedOnly = MS.struct.modifier.union([ const ligand = MS.struct.modifier.union([ MS.struct.generator.atomGroups({ 'entity-test': MS.core.logic.and([ - MS.core.rel.neq([MS.ammp('entityType'), 'branched']), - MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']) + MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']), + MS.core.logic.not([MS.core.str.match([ + MS.re('oligosaccharide', 'i'), + MS.ammp('entitySubtype') + ])]) ]), 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), 'residue-test': MS.core.logic.not([