From 5df55e6bf7a34837738eeb5d2f7e6e8000e311e5 Mon Sep 17 00:00:00 2001 From: dsehnal <david.sehnal@gmail.com> Date: Mon, 22 Mar 2021 17:21:47 +0100 Subject: [PATCH] SDF delimiter bugfix + multi-molecule SDF support in mol-plugin --- src/mol-io/reader/_spec/sdf.spec.ts | 27 ++++++++++++++++-- src/mol-io/reader/sdf/parser.ts | 27 ++++++++++++++---- src/mol-plugin-state/formats/trajectory.ts | 16 +++++++++-- src/mol-plugin-state/transforms/model.ts | 32 ++++++++++++++++++++++ 4 files changed, 90 insertions(+), 12 deletions(-) diff --git a/src/mol-io/reader/_spec/sdf.spec.ts b/src/mol-io/reader/_spec/sdf.spec.ts index 9ab1f281c..8c65d6456 100644 --- a/src/mol-io/reader/_spec/sdf.spec.ts +++ b/src/mol-io/reader/_spec/sdf.spec.ts @@ -112,7 +112,22 @@ Phosphate ion > <SYNONYMS> Orthophosphate; Phosphate -$$$$`; +$$$$ + +Comp 2 + +5 4 0 0 0 0 999 V2000 + 0.0000 0.8250 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 + -0.8250 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 -0.8250 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0 + 0.8250 0.0000 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 +4 1 1 0 0 0 0 +4 2 2 0 0 0 0 +4 3 1 0 0 0 0 +4 5 1 0 0 0 0 +M CHG 3 1 -1 3 -1 5 -1 +M END`; describe('sdf reader', () => { it('basic', async () => { @@ -120,14 +135,20 @@ describe('sdf reader', () => { if (parsed.isError) { throw new Error(parsed.message); } - const compound = parsed.result.compounds[0]; - const { molFile, dataItems } = compound; + const compound1 = parsed.result.compounds[0]; + const compound2 = parsed.result.compounds[1]; + const { molFile, dataItems } = compound1; const { atoms, bonds } = molFile; + expect(parsed.result.compounds.length).toBe(2); + // number of structures expect(atoms.count).toBe(5); expect(bonds.count).toBe(4); + expect(compound2.molFile.atoms.count).toBe(5); + expect(compound2.molFile.bonds.count).toBe(4); + expect(atoms.x.value(0)).toBeCloseTo(0, 0.001); expect(atoms.y.value(0)).toBeCloseTo(0.8250, 0.0001); expect(atoms.z.value(0)).toBeCloseTo(0, 0.0001); diff --git a/src/mol-io/reader/sdf/parser.ts b/src/mol-io/reader/sdf/parser.ts index dd787ef01..fa764cde1 100644 --- a/src/mol-io/reader/sdf/parser.ts +++ b/src/mol-io/reader/sdf/parser.ts @@ -22,6 +22,7 @@ export interface SdfFile { }[] } +const delimiter = '$$$$'; function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } { const dataHeader = TokenBuilder.create(tokenizer.data, 32); const data = TokenBuilder.create(tokenizer.data, 32); @@ -29,6 +30,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da let sawHeaderToken = false; while (tokenizer.position < tokenizer.length) { const line = Tokenizer.readLine(tokenizer); + if (line.startsWith(delimiter)) break; if (!!line) { if (line.startsWith('> <')) { TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1); @@ -49,9 +51,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da }; } -function handleMolFile(data: string) { - const tokenizer = Tokenizer(data); - +function handleMolFile(tokenizer: Tokenizer) { const title = Tokenizer.readLine(tokenizer).trim(); const program = Tokenizer.readLine(tokenizer).trim(); const comment = Tokenizer.readLine(tokenizer).trim(); @@ -60,6 +60,15 @@ function handleMolFile(data: string) { const atomCount = +counts.substr(0, 3), bondCount = +counts.substr(3, 3); + if (Number.isNaN(atomCount) || Number.isNaN(bondCount)) { + // try to skip to next molecule + while (tokenizer.position < tokenizer.length) { + const line = Tokenizer.readLine(tokenizer); + if (line.startsWith(delimiter)) break; + } + return; + } + const atoms = handleAtoms(tokenizer, atomCount); const bonds = handleBonds(tokenizer, bondCount); const dataItems = handleDataItems(tokenizer); @@ -70,10 +79,16 @@ function handleMolFile(data: string) { }; } -const delimiter = '$$$$'; function parseInternal(data: string): Result<SdfFile> { - const result: SdfFile = { compounds: data.split(delimiter).map(d => handleMolFile(d)) }; - return Result.success(result); + const tokenizer = Tokenizer(data); + + const compounds: SdfFile['compounds'] = []; + while (tokenizer.position < tokenizer.length) { + const c = handleMolFile(tokenizer); + if (c) compounds.push(c); + } + + return Result.success({ compounds }); } export function parseSdf(data: string) { diff --git a/src/mol-plugin-state/formats/trajectory.ts b/src/mol-plugin-state/formats/trajectory.ts index 1e2d8d534..c8eca7e90 100644 --- a/src/mol-plugin-state/formats/trajectory.ts +++ b/src/mol-plugin-state/formats/trajectory.ts @@ -123,14 +123,23 @@ export const GroProvider: TrajectoryFormatProvider = { }; export const MolProvider: TrajectoryFormatProvider = { - label: 'MOL/SDF', - description: 'MOL/SDF', + label: 'MOL', + description: 'MOL', category: TrajectoryFormatCategory, - stringExtensions: ['mol', 'sdf', 'sd'], + stringExtensions: ['mol'], parse: directTrajectory(StateTransforms.Model.TrajectoryFromMOL), visuals: defaultVisuals }; +export const SdfProvider: TrajectoryFormatProvider = { + label: 'SDF', + description: 'SDF', + category: TrajectoryFormatCategory, + stringExtensions: ['sdf', 'sd'], + parse: directTrajectory(StateTransforms.Model.TrajectoryFromSDF), + visuals: defaultVisuals +}; + export const Mol2Provider: TrajectoryFormatProvider = { label: 'MOL2', description: 'MOL2', @@ -148,6 +157,7 @@ export const BuiltInTrajectoryFormats = [ ['gro', GroProvider] as const, ['xyz', XyzProvider] as const, ['mol', MolProvider] as const, + ['sdf', SdfProvider] as const, ['mol2', Mol2Provider] as const, ] as const; diff --git a/src/mol-plugin-state/transforms/model.ts b/src/mol-plugin-state/transforms/model.ts index b50cbe180..d987f7914 100644 --- a/src/mol-plugin-state/transforms/model.ts +++ b/src/mol-plugin-state/transforms/model.ts @@ -39,6 +39,7 @@ import { parseXtc } from '../../mol-io/reader/xtc/parser'; import { coordinatesFromXtc } from '../../mol-model-formats/structure/xtc'; import { parseXyz } from '../../mol-io/reader/xyz/parser'; import { trajectoryFromXyz } from '../../mol-model-formats/structure/xyz'; +import { parseSdf } from '../../mol-io/reader/sdf/parser'; export { CoordinatesFromDcd }; export { CoordinatesFromXtc }; @@ -50,6 +51,7 @@ export { TrajectoryFromPDB }; export { TrajectoryFromGRO }; export { TrajectoryFromXYZ }; export { TrajectoryFromMOL }; +export { TrajectoryFromSDF }; export { TrajectoryFromMOL2 }; export { TrajectoryFromCube }; export { TrajectoryFromCifCore }; @@ -292,6 +294,36 @@ const TrajectoryFromMOL = PluginStateTransform.BuiltIn({ } }); +type TrajectoryFromSDF = typeof TrajectoryFromSDF +const TrajectoryFromSDF = PluginStateTransform.BuiltIn({ + name: 'trajectory-from-sdf', + display: { name: 'Parse SDF', description: 'Parse SDF string and create trajectory.' }, + from: [SO.Data.String], + to: SO.Molecule.Trajectory +})({ + apply({ a }) { + return Task.create('Parse SDF', async ctx => { + const parsed = await parseSdf(a.data).runInContext(ctx); + if (parsed.isError) throw new Error(parsed.message); + + const models: Model[] = []; + + for (const { molFile } of parsed.result.compounds) { + const traj = await trajectoryFromMol(molFile).runInContext(ctx); + for (let i = 0; i < traj.frameCount; i++) { + models.push(await Task.resolveInContext(traj.getFrameAtIndex(i), ctx)); + } + } + + const traj = new ArrayTrajectory(models); + + const props = trajectoryProps(traj); + return new SO.Molecule.Trajectory(traj, props); + }); + } +}); + + type TrajectoryFromMOL2 = typeof TrajectoryFromMOL const TrajectoryFromMOL2 = PluginStateTransform.BuiltIn({ name: 'trajectory-from-mol2', -- GitLab