From 652f6c651b70bf00ac9a630d3f41a2686509097b Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Sat, 12 Mar 2022 10:58:33 -0800 Subject: [PATCH] fix wrong element assignment --- CHANGELOG.md | 1 + src/mol-model-formats/structure/cif-core.ts | 2 +- src/mol-model-formats/structure/gro.ts | 5 +++- src/mol-model-formats/structure/mol2.ts | 2 +- src/mol-model-formats/structure/psf.ts | 5 +++- src/mol-model-formats/structure/util.ts | 33 +++++++++++---------- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6c16297..8df9ea307 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Note that since we don't clearly distinguish between a public and private interf - Add ``CustomImportControls`` to left panel - Add Zenodo import extension (load structures, trajectories, volumes, and zip files) - Fix loading of some compressed files within sessions +- Fix wrong element assignment for atoms with Charmm ion names ## [v3.3.1] - 2022-02-27 diff --git a/src/mol-model-formats/structure/cif-core.ts b/src/mol-model-formats/structure/cif-core.ts index fd87abc7c..486b8450f 100644 --- a/src/mol-model-formats/structure/cif-core.ts +++ b/src/mol-model-formats/structure/cif-core.ts @@ -100,7 +100,7 @@ async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: Runti const element_symbol = new Array<string>(atomCount); for (let i = 0; i < atomCount; ++i) { // TODO can take as is if type_symbol not given? - element_symbol[i] = guessElementSymbolString(label.value(i)); + element_symbol[i] = guessElementSymbolString(label.value(i), ''); } typeSymbol = Column.ofStringArray(element_symbol); formalCharge = Column.Undefined(atomCount, Column.Schema.int); diff --git a/src/mol-model-formats/structure/gro.ts b/src/mol-model-formats/structure/gro.ts index 33f0efac5..2fb95634b 100644 --- a/src/mol-model-formats/structure/gro.ts +++ b/src/mol-model-formats/structure/gro.ts @@ -27,6 +27,7 @@ function getBasic(atoms: GroAtoms, modelNum: number): BasicData { const asymIds = new Array<string>(atoms.count); const seqIds = new Uint32Array(atoms.count); const ids = new Uint32Array(atoms.count); + const typeSymbol = new Array<string>(atoms.count); const entityBuilder = new EntityBuilder(); const componentBuilder = new ComponentBuilder(atoms.residueNumber, atoms.atomName); @@ -66,6 +67,8 @@ function getBasic(atoms: GroAtoms, modelNum: number): BasicData { asymIds[i] = currentAsymId; seqIds[i] = currentSeqId; ids[i] = i; + + typeSymbol[i] = guessElementSymbolString(atoms.atomName.value(i), atoms.residueName.value(i)); } const auth_asym_id = Column.ofStringArray(asymIds); @@ -87,7 +90,7 @@ function getBasic(atoms: GroAtoms, modelNum: number): BasicData { label_entity_id: Column.ofStringArray(entityIds), occupancy: Column.ofConst(1, atoms.count, Column.Schema.float), - type_symbol: Column.ofStringArray(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))), + type_symbol: Column.ofStringArray(typeSymbol), pdbx_PDB_model_num: Column.ofConst(modelNum, atoms.count, Column.Schema.int), }, atoms.count); diff --git a/src/mol-model-formats/structure/mol2.ts b/src/mol-model-formats/structure/mol2.ts index b2bfe635b..ac8b4e75c 100644 --- a/src/mol-model-formats/structure/mol2.ts +++ b/src/mol-model-formats/structure/mol2.ts @@ -41,7 +41,7 @@ async function getModels(mol2: Mol2File, ctx: RuntimeContext) { for (let i = 0; i < atoms.count; ++i) { type_symbol[i] = hasAtomType ? atoms.atom_type.value(i).split('.')[0].toUpperCase() - : guessElementSymbolString(atoms.atom_name.value(i)); + : guessElementSymbolString(atoms.atom_name.value(i), atoms.subst_name.value(i)); } const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, { diff --git a/src/mol-model-formats/structure/psf.ts b/src/mol-model-formats/structure/psf.ts index 362cf9f22..50f1b454b 100644 --- a/src/mol-model-formats/structure/psf.ts +++ b/src/mol-model-formats/structure/psf.ts @@ -21,6 +21,7 @@ function getBasic(atoms: PsfFile['atoms']) { const asymIds = new Array<string>(atoms.count); const seqIds = new Uint32Array(atoms.count); const ids = new Uint32Array(atoms.count); + const typeSymbol = new Array<string>(atoms.count); const entityBuilder = new EntityBuilder(); const componentBuilder = new ComponentBuilder(atoms.residueId, atoms.atomName); @@ -68,6 +69,8 @@ function getBasic(atoms: PsfFile['atoms']) { asymIds[i] = currentAsymId; seqIds[i] = currentSeqId; ids[i] = i; + + typeSymbol[i] = guessElementSymbolString(atoms.atomName.value(i), atoms.residueName.value(i)); } const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, { @@ -84,7 +87,7 @@ function getBasic(atoms: PsfFile['atoms']) { label_entity_id: Column.ofStringArray(entityIds), occupancy: Column.ofConst(1, atoms.count, Column.Schema.float), - type_symbol: Column.ofStringArray(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))), + type_symbol: Column.ofStringArray(typeSymbol), pdbx_PDB_model_num: Column.ofConst(1, atoms.count, Column.Schema.int), }, atoms.count); diff --git a/src/mol-model-formats/structure/util.ts b/src/mol-model-formats/structure/util.ts index f75908b05..b0ebfd663 100644 --- a/src/mol-model-formats/structure/util.ts +++ b/src/mol-model-formats/structure/util.ts @@ -47,30 +47,31 @@ export function guessElementSymbolTokens(tokens: Tokens, str: string, start: num } const reTrimSpacesAndNumbers = /^[\s\d]+|[\s\d]+$/g; -export function guessElementSymbolString(str: string) { +export function guessElementSymbolString(atomId: string, compId: string) { // trim spaces and numbers, convert to upper case - str = str.replace(reTrimSpacesAndNumbers, '').toUpperCase(); - const l = str.length; + atomId = atomId.replace(reTrimSpacesAndNumbers, '').toUpperCase(); + const l = atomId.length; - if (l === 0) return str; // empty - if (l === 1) return str; // one char + if (l === 0) return atomId; // empty + if (l === 1) return atomId; // one char if (l === 2) { // two chars - if (str === 'NA' || str === 'CL' || str === 'FE' || str === 'SI' || - str === 'BR' || str === 'AS' - ) return str; + if (atomId === 'NA' || atomId === 'CL' || atomId === 'FE' || atomId === 'SI' || + atomId === 'BR' || atomId === 'AS' + ) return atomId; } - if (l === 3) { // three chars - if (str === 'SOD') return 'NA'; - if (str === 'POT') return 'K'; - if (str === 'CES') return 'CS'; - if (str === 'CAL') return 'CA'; - if (str === 'CLA') return 'CL'; + if (l === 3 && compId === atomId) { // three chars + if (atomId === 'SOD') return 'NA'; + if (atomId === 'POT') return 'K'; + if (atomId === 'CES') return 'CS'; + if (atomId === 'CAL') return 'CA'; + if (atomId === 'CLA') return 'CL'; } - const c = str[0]; + const c = atomId[0]; if (c === 'C' || c === 'H' || c === 'N' || c === 'O' || c === 'P' || c === 'S') return c; return ''; // no reasonable guess, return empty string -} \ No newline at end of file +} + -- GitLab