diff --git a/src/mol-io/reader/_spec/mol.spec.ts b/src/mol-io/reader/_spec/mol.spec.ts index 923db651eebcd8e5725ab9cf69f4c278bb55f5af..897794ea59cbaea3c6abdfe330fe258b2d1c37c2 100644 --- a/src/mol-io/reader/_spec/mol.spec.ts +++ b/src/mol-io/reader/_spec/mol.spec.ts @@ -1,5 +1,12 @@ +/** + * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ -import { parseMol } from '../mol/parser'; +import { parseMol, formalChargeMapper } from '../mol/parser'; const MolString = `2244 -OEChem-04072009073D @@ -49,6 +56,48 @@ const MolString = `2244 13 20 1 0 0 0 0 M END`; +const MolStringWithAtomBlockCharge = ` + Ketcher 1 72215442D 1 1.00000 0.00000 0 + + 4 3 0 0 0 0 999 V2000 + 0.0000 0.0000 0.0000 C 0 1 0 0 0 0 0 0 0 0 0 0 + 0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0 + 1 4 2 0 0 0 0 + 3 1 1 0 0 0 0 + 2 1 1 0 0 0 0 +M END`; + +const MolStringWithPropertyBlockCharge = ` + Ketcher 1 72215442D 1 1.00000 0.00000 0 + + 4 3 0 0 0 0 999 V2000 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0 + 1 4 2 0 0 0 0 + 3 1 1 0 0 0 0 + 2 1 1 0 0 0 0 +M CHG 3 2 -1 3 1 4 1 +M END`; + +const MolStringWithMultipleChargeLines = ` + Ketcher 1 72215442D 1 1.00000 0.00000 0 + + 4 3 0 0 0 0 999 V2000 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0 + 1 4 2 0 0 0 0 + 3 1 1 0 0 0 0 + 2 1 1 0 0 0 0 +M CHG 1 2 -1 +M CHG 2 3 1 4 1 +M END`; + describe('mol reader', () => { it('basic', async () => { const parsed = await parseMol(MolString).run(); @@ -70,4 +119,63 @@ describe('mol reader', () => { expect(bonds.atomIdxB.value(20)).toBe(20); expect(bonds.order.value(20)).toBe(1); }); + it('property block charges', async () => { + const parsed = await parseMol(MolStringWithPropertyBlockCharge).run(); + if (parsed.isError) { + throw new Error(parsed.message); + } + const { formalCharges } = parsed.result; + + expect(formalCharges.atomIdx.rowCount).toBe(3); + expect(formalCharges.charge.rowCount).toBe(3); + + expect(formalCharges.atomIdx.value(0)).toBe(2); + expect(formalCharges.atomIdx.value(1)).toBe(3); + + expect(formalCharges.charge.value(0)).toBe(-1); + expect(formalCharges.charge.value(1)).toBe(1); + }); + it('multiple charge lines', async () => { + const parsed = await parseMol(MolStringWithMultipleChargeLines).run(); + if (parsed.isError) { + throw new Error(parsed.message); + } + const { formalCharges } = parsed.result; + + expect(formalCharges.atomIdx.rowCount).toBe(3); + expect(formalCharges.charge.rowCount).toBe(3); + + expect(formalCharges.atomIdx.value(0)).toBe(2); + expect(formalCharges.atomIdx.value(1)).toBe(3); + + expect(formalCharges.charge.value(0)).toBe(-1); + expect(formalCharges.charge.value(1)).toBe(1); + }); + + it('atom block charge mapping', async () => { + expect(formalChargeMapper(7)).toBe(-3); + expect(formalChargeMapper(6)).toBe(-2); + expect(formalChargeMapper(5)).toBe(-1); + expect(formalChargeMapper(0)).toBe(0); + expect(formalChargeMapper(3)).toBe(1); + expect(formalChargeMapper(2)).toBe(2); + expect(formalChargeMapper(1)).toBe(3); + expect(formalChargeMapper(4)).toBe(0); + }); + it('atom block charges', async () => { + const parsed = await parseMol(MolStringWithAtomBlockCharge).run(); + if (parsed.isError) { + throw new Error(parsed.message); + } + const { atoms, formalCharges } = parsed.result; + + /* No property block charges */ + expect(formalCharges.atomIdx.rowCount).toBe(0); + expect(formalCharges.charge.rowCount).toBe(0); + + expect(atoms.formal_charge.value(0)).toBe(1); + expect(atoms.formal_charge.value(1)).toBe(0); + expect(atoms.formal_charge.value(2)).toBe(0); + expect(atoms.formal_charge.value(3)).toBe(0); + }); }); diff --git a/src/mol-io/reader/_spec/sdf.spec.ts b/src/mol-io/reader/_spec/sdf.spec.ts index d97644c4e6d687342d10814fbcacb994ae880578..00e9aa8659f8c0de1b40925d49a29651a96e7439 100644 --- a/src/mol-io/reader/_spec/sdf.spec.ts +++ b/src/mol-io/reader/_spec/sdf.spec.ts @@ -1,3 +1,10 @@ +/** + * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org> + * @author David Sehnal <david.sehnal@gmail.com> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ import { parseSdf } from '../sdf/parser'; @@ -458,6 +465,38 @@ describe('sdf reader', () => { expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10'); }); + it('charge parsing in V2000', async () => { + const parsed = await parseSdf(SdfString).run(); + if (parsed.isError) { + throw new Error(parsed.message); + } + const compound1 = parsed.result.compounds[0]; + const compound2 = parsed.result.compounds[1]; + const compound3 = parsed.result.compounds[2]; + + const formalCharges1 = { + atomIdx: compound1.molFile.formalCharges.atomIdx, + charge: compound1.molFile.formalCharges.charge + }; + const formalCharges2 = { + atomIdx: compound2.molFile.formalCharges.atomIdx, + charge: compound2.molFile.formalCharges.charge + }; + const formalCharges3 = { + atomIdx: compound3.molFile.formalCharges.atomIdx, + charge: compound3.molFile.formalCharges.charge + }; + + expect(formalCharges1.atomIdx.rowCount).toBe(3); + expect(formalCharges2.atomIdx.rowCount).toBe(3); + expect(formalCharges3.atomIdx.rowCount).toBe(0); + + expect(formalCharges1.charge.rowCount === formalCharges1.atomIdx.rowCount).toBe(true); + expect(formalCharges2.charge.rowCount === formalCharges2.atomIdx.rowCount).toBe(true); + expect(formalCharges3.charge.rowCount === formalCharges3.atomIdx.rowCount).toBe(true); + }); + + it('v3000', async () => { const parsed = await parseSdf(V3000SdfString).run(); if (parsed.isError) { @@ -486,6 +525,11 @@ describe('sdf reader', () => { expect(compound1.molFile.bonds.atomIdxB.value(10)).toBe(9); expect(compound1.molFile.bonds.order.value(10)).toBe(2); + expect(compound1.molFile.formalCharges.atomIdx.rowCount).toBe(13); + for (let i = 0; i < compound1.molFile.atoms.count; i++) { + expect(compound1.molFile.formalCharges.charge.value(i)).toBe(0); + } + expect(compound1.dataItems.dataHeader.rowCount).toBe(2); expect(compound1.dataItems.data.rowCount).toBe(2); diff --git a/src/mol-io/reader/mol/parser.ts b/src/mol-io/reader/mol/parser.ts index 0874954df64a21d163a131afd14ea41f3b006d8d..f0c3cefccd131da9a20b7a8e087347f93d3e0bb9 100644 --- a/src/mol-io/reader/mol/parser.ts +++ b/src/mol-io/reader/mol/parser.ts @@ -1,7 +1,8 @@ /** - * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> */ import { Column } from '../../../mol-data/db'; @@ -10,6 +11,7 @@ import { TokenColumnProvider as TokenColumn } from '../common/text/column/token' import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'; import { ReaderResult as Result } from '../result'; + /** Subset of the MolFile V2000 format */ export interface MolFile { readonly title: string, @@ -20,7 +22,8 @@ export interface MolFile { readonly x: Column<number>, readonly y: Column<number>, readonly z: Column<number>, - readonly type_symbol: Column<string> + readonly type_symbol: Column<string>, + readonly formal_charge: Column<number> }, readonly bonds: { readonly count: number @@ -28,6 +31,57 @@ export interface MolFile { readonly atomIdxB: Column<number>, readonly order: Column<number> } + readonly formalCharges: { + readonly atomIdx: Column<number>; + readonly charge: Column<number>; + } +} + +/* + The atom lines in a .mol file have the following structure: + + xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee + --------------------------------------------------------------------- + + Below is a breakdown of each component and its start/end indices: + + xxxxx.xxxx (X COORDINATE, 1-10) + yyyyy.yyyy (Y COORDINATE, 10-20) + zzzzz.zzzz (Z COORDINATE, 20-30) + _ (30 IS EMPTY) + aaa (ATOM SYMBOL, 31-34) + dd (MASS DIFF, 34-36) + ccc (FORMAL CHARGE, 36-39) + sss (ATOM STEREO PARITY, 39-42) + hhh (HYDROGEN COUNT+1, 42-45) + bbb (STEREO CARE BOX, 45-48) + vvv (VALENCE, 48-51) + HHH (H0 DESIGNATOR, 51-54) + rrr (UNUSED, 54-57) + iii (UNUSED, 57-60) + mmm (ATOM-ATOM MAPPING NUMBER, 60-63) + nnn (INVERSION/RETENTION FLAG, 63-66) + eee (EXACT CHANGE FLAG, 66-69) +*/ + +/** + * @param key - The value found at the atom block. + * @returns The actual formal charge based on the mapping. + */ +export function formalChargeMapper(key: number) { + switch (key) { + case 7: return -3; + case 6: return -2; + case 5: return -1; + case 0: return 0; + case 3: return 1; + case 2: return 2; + case 1: return 3; + case 4: return 0; + default: + console.error(`Value ${key} is outside the 0-7 range, defaulting to 0.`); + return 0; + } } export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] { @@ -35,6 +89,7 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms const y = TokenBuilder.create(tokenizer.data, count * 2); const z = TokenBuilder.create(tokenizer.data, count * 2); const type_symbol = TokenBuilder.create(tokenizer.data, count * 2); + const formal_charge = TokenBuilder.create(tokenizer.data, count * 2); for (let i = 0; i < count; ++i) { Tokenizer.markLine(tokenizer); @@ -47,6 +102,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms TokenBuilder.addUnchecked(z, tokenizer.tokenStart, tokenizer.tokenEnd); Tokenizer.trim(tokenizer, s + 31, s + 34); TokenBuilder.addUnchecked(type_symbol, tokenizer.tokenStart, tokenizer.tokenEnd); + Tokenizer.trim(tokenizer, s + 36, s + 39); + TokenBuilder.addUnchecked(formal_charge, tokenizer.tokenStart, tokenizer.tokenEnd); tokenizer.position = position; } @@ -55,7 +112,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms x: TokenColumn(x)(Column.Schema.float), y: TokenColumn(y)(Column.Schema.float), z: TokenColumn(z)(Column.Schema.float), - type_symbol: TokenColumn(type_symbol)(Column.Schema.str) + type_symbol: TokenColumn(type_symbol)(Column.Schema.str), + formal_charge: TokenColumn(formal_charge)(Column.Schema.int) }; } @@ -84,6 +142,76 @@ export function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds }; } +interface FormalChargesRawData { + atomIdx: Array<number>; + charge: Array<number>; +} +export function handleFormalCharges(tokenizer: Tokenizer, lineStart: number, formalCharges: FormalChargesRawData) { + + Tokenizer.trim(tokenizer, lineStart + 6, lineStart + 9); + const numOfCharges = parseInt(Tokenizer.getTokenString(tokenizer)); + for (let i = 0; i < numOfCharges; ++i) { + /* + M CHG 3 1 -1 2 0 2 -1 + | | | | | + | | | | |__charge2 (etc.) + | | | | + | | | |__atomIdx2 + | | | + | | |__charge1 + | | + | |__atomIdx1 (cursor at position 12) + | + |___numOfCharges + */ + const offset = 9 + (i * 8); + + Tokenizer.trim(tokenizer, lineStart + offset, lineStart + offset + 4); + const _atomIdx = Tokenizer.getTokenString(tokenizer); + formalCharges.atomIdx.push(+_atomIdx); + Tokenizer.trim(tokenizer, lineStart + offset + 4, lineStart + offset + 8); + const _charge = Tokenizer.getTokenString(tokenizer); + formalCharges.charge.push(+_charge); + } + /* Once the line is read, move to the next one. */ + Tokenizer.eatLine(tokenizer); +} + +/** Call an appropriate handler based on the property type. + * (For now it only calls the formal charge handler, additional handlers can + * be added for other properties.) + */ +export function handlePropertiesBlock(tokenizer: Tokenizer): MolFile['formalCharges'] { + + const _atomIdx: Array<number> = []; + const _charge: Array<number> = []; + const _formalCharges: FormalChargesRawData = { atomIdx: _atomIdx, charge: _charge }; + + while (tokenizer.position < tokenizer.length) { + const { position: s } = tokenizer; + + Tokenizer.trim(tokenizer, s + 3, s + 6); + const propertyType = Tokenizer.getTokenString(tokenizer); + + if (propertyType === 'END') break; + Tokenizer.eatLine(tokenizer); + + switch (propertyType) { + case 'CHG': + handleFormalCharges(tokenizer, s, _formalCharges); + break; + default: + break; + } + } + + const formalCharges: MolFile['formalCharges'] = { + atomIdx: Column.ofIntArray(_formalCharges.atomIdx), + charge: Column.ofIntArray(_formalCharges.charge) + }; + return formalCharges; +} + function parseInternal(data: string): Result<MolFile> { const tokenizer = Tokenizer(data); @@ -98,12 +226,15 @@ function parseInternal(data: string): Result<MolFile> { const atoms = handleAtoms(tokenizer, atomCount); const bonds = handleBonds(tokenizer, bondCount); + const formalCharges = handlePropertiesBlock(tokenizer); + const result: MolFile = { title, program, comment, atoms, - bonds + bonds, + formalCharges, }; return Result.success(result); } diff --git a/src/mol-io/reader/sdf/parser-v3-util.ts b/src/mol-io/reader/sdf/parser-v3-util.ts index 83286e2bf5eb2492e8273220460946e9fa42a880..32573ac8066ab8d2b44c41e140e86b0adec1cecd 100644 --- a/src/mol-io/reader/sdf/parser-v3-util.ts +++ b/src/mol-io/reader/sdf/parser-v3-util.ts @@ -1,3 +1,10 @@ +/** + * Copyright (c) 2021-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Jason Pattle <jpattle@exscientia.co.uk> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + import { Column } from '../../../mol-data/db'; import { MolFile } from '../mol/parser'; import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer'; @@ -61,6 +68,9 @@ export function handleAtomsV3( y: TokenColumn(y)(Column.Schema.float), z: TokenColumn(z)(Column.Schema.float), type_symbol: TokenColumn(type_symbol)(Column.Schema.str), + /* No support for formal charge parsing in V3000 molfiles at the moment, + so all charges default to 0.*/ + formal_charge: Column.ofConst(0, atomCount, Column.Schema.int) }; } diff --git a/src/mol-io/reader/sdf/parser.ts b/src/mol-io/reader/sdf/parser.ts index d7e6d069c153495d8cfcadebb1b824b4c570d691..d609a380d3701da6c3da63b7d2a90aedeac5a4fd 100644 --- a/src/mol-io/reader/sdf/parser.ts +++ b/src/mol-io/reader/sdf/parser.ts @@ -1,12 +1,14 @@ /** - * Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Sebastian Bittrich <sebastian.bittrich@rcsb.org> * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Jason Pattle <jpattle@exscientia.co.uk> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> */ import { Column } from '../../../mol-data/db'; -import { MolFile, handleAtoms, handleBonds } from '../mol/parser'; +import { MolFile, handleAtoms, handleBonds, handlePropertiesBlock } from '../mol/parser'; import { Task } from '../../../mol-task'; import { ReaderResult as Result } from '../result'; import { Tokenizer, TokenBuilder } from '../common/text/tokenizer'; @@ -29,6 +31,7 @@ export interface SdfFile { const delimiter = '$$$$'; + function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } { const dataHeader = TokenBuilder.create(tokenizer.data, 32); const data = TokenBuilder.create(tokenizer.data, 32); @@ -93,12 +96,20 @@ function handleMolFile(tokenizer: Tokenizer) { return; } + /* No support for formal charge parsing in V3000 molfiles at the moment, + so all charges default to 0.*/ + const nullFormalCharges: MolFile['formalCharges'] = { + atomIdx: Column.ofConst(0, atomCount, Column.Schema.int), + charge: Column.ofConst(0, atomCount, Column.Schema.int) + }; + const atoms = molIsV3 ? handleAtomsV3(tokenizer, atomCount) : handleAtoms(tokenizer, atomCount); const bonds = molIsV3 ? handleBondsV3(tokenizer, bondCount) : handleBonds(tokenizer, bondCount); + const formalCharges = molIsV3 ? nullFormalCharges : handlePropertiesBlock(tokenizer); const dataItems = handleDataItems(tokenizer); return { - molFile: { title, program, comment, atoms, bonds }, + molFile: { title, program, comment, atoms, bonds, formalCharges }, dataItems }; } diff --git a/src/mol-model-formats/structure/mol.ts b/src/mol-model-formats/structure/mol.ts index b6847e852adbf7b8faf3e2fed75ffd21f4c37a86..9a873019d2e9efb80718385384a141b0b41a19d0 100644 --- a/src/mol-model-formats/structure/mol.ts +++ b/src/mol-model-formats/structure/mol.ts @@ -1,12 +1,13 @@ /** - * Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> */ import { Column, Table } from '../../mol-data/db'; -import { MolFile } from '../../mol-io/reader/mol/parser'; +import { MolFile, formalChargeMapper } from '../../mol-io/reader/mol/parser'; import { MoleculeType } from '../../mol-model/structure/model/types'; import { RuntimeContext, Task } from '../../mol-task'; import { createModels } from './basic/parser'; @@ -18,13 +19,24 @@ import { IndexPairBonds } from './property/bonds/index-pair'; import { Trajectory } from '../../mol-model/structure'; export async function getMolModels(mol: MolFile, format: ModelFormat<any> | undefined, ctx: RuntimeContext) { - const { atoms, bonds } = mol; + const { atoms, bonds, formalCharges } = mol; const MOL = Column.ofConst('MOL', mol.atoms.count, Column.Schema.str); const A = Column.ofConst('A', mol.atoms.count, Column.Schema.str); const type_symbol = Column.asArrayColumn(atoms.type_symbol); const seq_id = Column.ofConst(1, atoms.count, Column.Schema.int); + const computedFormalCharges = new Int32Array(mol.atoms.count); + if (formalCharges.atomIdx.rowCount > 0) { + for (let i = 0; i < formalCharges.atomIdx.rowCount; i++) { + computedFormalCharges[formalCharges.atomIdx.value(i) - 1] = formalCharges.charge.value(i); + } + } else { + for (let i = 0; i < mol.atoms.count; i++) { + computedFormalCharges[i] = formalChargeMapper(atoms.formal_charge.value(i)); + } + } + const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, { auth_asym_id: A, auth_atom_id: type_symbol, @@ -45,6 +57,7 @@ export async function getMolModels(mol: MolFile, format: ModelFormat<any> | unde type_symbol, pdbx_PDB_model_num: Column.ofConst(1, atoms.count, Column.Schema.int), + pdbx_formal_charge: Column.ofIntArray(computedFormalCharges) }, atoms.count); const entityBuilder = new EntityBuilder();