diff --git a/CHANGELOG.md b/CHANGELOG.md index 494610e16bfbfd0baa9ec169b39599e65a8086bd..397b8354c9c0b742a8754834610f9653426a6316 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Note that since we don't clearly distinguish between a public and private interf ## [Unreleased] - Add `tubularHelices` parameter to Cartoon representation +- Add `SdfFormat` and update SDF parser to be able to parse data headers according to spec (hopefully :)) #230 ## [v2.1.0] - 2021-07-05 diff --git a/src/mol-io/reader/_spec/sdf.spec.ts b/src/mol-io/reader/_spec/sdf.spec.ts index fb6dad9869bb9f9e6d02d283114ddb64b4218dfb..d7e47781990de7b517baa9c2c8c9d402d2499a53 100644 --- a/src/mol-io/reader/_spec/sdf.spec.ts +++ b/src/mol-io/reader/_spec/sdf.spec.ts @@ -22,8 +22,8 @@ M END > <DATABASE_NAME> drugbank -> <SMILES> -[O-]P([O-])([O-])=O +> 5225 <TEST_FIELD> +whatever > <INCHI_IDENTIFIER> InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-3 @@ -362,22 +362,25 @@ describe('sdf reader', () => { expect(bonds.atomIdxB.value(3)).toBe(5); expect(bonds.order.value(3)).toBe(1); - expect(dataItems.dataHeader.value(0)).toBe('DATABASE_ID'); + expect(dataItems.dataHeader.value(0)).toBe('<DATABASE_ID>'); expect(dataItems.data.value(0)).toBe('0'); - expect(dataItems.dataHeader.value(1)).toBe('DATABASE_NAME'); + expect(dataItems.dataHeader.value(1)).toBe('<DATABASE_NAME>'); expect(dataItems.data.value(1)).toBe('drugbank'); - expect(dataItems.dataHeader.value(31)).toBe('SYNONYMS'); + expect(dataItems.dataHeader.value(2)).toBe('5225 <TEST_FIELD>'); + expect(dataItems.data.value(2)).toBe('whatever'); + + expect(dataItems.dataHeader.value(31)).toBe('<SYNONYMS>'); expect(dataItems.data.value(31)).toBe('Orthophosphate; Phosphate'); expect(compound1.dataItems.data.value(0)).toBe('0'); expect(compound2.dataItems.data.value(0)).toBe('1'); - expect(compound3.dataItems.dataHeader.value(2)).toBe('PUBCHEM_CONFORMER_DIVERSEORDER'); + expect(compound3.dataItems.dataHeader.value(2)).toBe('<PUBCHEM_CONFORMER_DIVERSEORDER>'); expect(compound3.dataItems.data.value(2)).toBe('1\n11\n10\n3\n15\n17\n13\n5\n16\n7\n14\n9\n8\n4\n18\n6\n12\n2'); - expect(compound3.dataItems.dataHeader.value(21)).toBe('PUBCHEM_COORDINATE_TYPE'); + expect(compound3.dataItems.dataHeader.value(21)).toBe('<PUBCHEM_COORDINATE_TYPE>'); expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10'); }); }); diff --git a/src/mol-io/reader/sdf/parser.ts b/src/mol-io/reader/sdf/parser.ts index cced43dd6320dde488417cc009448ea24605b77c..71594930183d66cdaf939bb5ff7fa41ccb119fb2 100644 --- a/src/mol-io/reader/sdf/parser.ts +++ b/src/mol-io/reader/sdf/parser.ts @@ -13,16 +13,20 @@ import { Tokenizer, TokenBuilder } from '../common/text/tokenizer'; import { TokenColumnProvider as TokenColumn } from '../common/text/column/token'; /** http://c4.cabrillo.edu/404/ctfile.pdf - page 41 */ + +export interface SdfFileCompound { + readonly molFile: MolFile, + readonly dataItems: { + readonly dataHeader: Column<string>, + readonly data: Column<string> + } +} + export interface SdfFile { - readonly compounds: { - readonly molFile: MolFile, - readonly dataItems: { - readonly dataHeader: Column<string>, - readonly data: Column<string> - } - }[] + readonly compounds: SdfFileCompound[] } + const delimiter = '$$$$'; function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } { const dataHeader = TokenBuilder.create(tokenizer.data, 32); @@ -33,8 +37,8 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da if (line.startsWith(delimiter)) break; if (!line) continue; - if (line.startsWith('> <')) { - TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1); + if (line.startsWith('> ')) { + TokenBuilder.add(dataHeader, tokenizer.tokenStart + 2, tokenizer.tokenEnd); Tokenizer.markLine(tokenizer); const start = tokenizer.tokenStart; @@ -42,7 +46,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da let added = false; while (tokenizer.position < tokenizer.length) { const line2 = Tokenizer.readLine(tokenizer); - if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> <')) { + if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> ')) { TokenBuilder.add(data, start, end); added = true; break; diff --git a/src/mol-model-formats/structure/mol.ts b/src/mol-model-formats/structure/mol.ts index fb66c34e8e2e80da8dce161d42c7f768e10707ad..b6847e852adbf7b8faf3e2fed75ffd21f4c37a86 100644 --- a/src/mol-model-formats/structure/mol.ts +++ b/src/mol-model-formats/structure/mol.ts @@ -17,7 +17,7 @@ import { ModelFormat } from '../format'; import { IndexPairBonds } from './property/bonds/index-pair'; import { Trajectory } from '../../mol-model/structure'; -async function getModels(mol: MolFile, ctx: RuntimeContext) { +export async function getMolModels(mol: MolFile, format: ModelFormat<any> | undefined, ctx: RuntimeContext) { const { atoms, bonds } = mol; const MOL = Column.ofConst('MOL', mol.atoms.count, Column.Schema.str); @@ -61,7 +61,7 @@ async function getModels(mol: MolFile, ctx: RuntimeContext) { atom_site }); - const models = await createModels(basics, MolFormat.create(mol), ctx); + const models = await createModels(basics, format ?? MolFormat.create(mol), ctx); if (models.frameCount > 0) { const indexA = Column.ofIntArray(Column.mapToArray(bonds.atomIdxA, x => x - 1, Int32Array)); @@ -91,5 +91,5 @@ namespace MolFormat { } export function trajectoryFromMol(mol: MolFile): Task<Trajectory> { - return Task.create('Parse MOL', ctx => getModels(mol, ctx)); + return Task.create('Parse MOL', ctx => getMolModels(mol, void 0, ctx)); } diff --git a/src/mol-model-formats/structure/sdf.ts b/src/mol-model-formats/structure/sdf.ts new file mode 100644 index 0000000000000000000000000000000000000000..77f9b6bf8176d6ba14331503f7d868bd8c4f0ff0 --- /dev/null +++ b/src/mol-model-formats/structure/sdf.ts @@ -0,0 +1,29 @@ +/** + * Copyright (c) 2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { SdfFileCompound } from '../../mol-io/reader/sdf/parser'; +import { Trajectory } from '../../mol-model/structure'; +import { Task } from '../../mol-task'; +import { ModelFormat } from '../format'; +import { getMolModels } from './mol'; + +export { SdfFormat }; + +type SdfFormat = ModelFormat<SdfFileCompound> + +namespace SdfFormat { + export function is(x?: ModelFormat): x is SdfFormat { + return x?.kind === 'sdf'; + } + + export function create(mol: SdfFileCompound): SdfFormat { + return { kind: 'sdf', name: mol.molFile.title, data: mol }; + } +} + +export function trajectoryFromSdf(mol: SdfFileCompound): Task<Trajectory> { + return Task.create('Parse SDF', ctx => getMolModels(mol.molFile, SdfFormat.create(mol), ctx)); +} diff --git a/src/mol-plugin-state/transforms/model.ts b/src/mol-plugin-state/transforms/model.ts index 68e9dd9676e75cb59a669105af0dd736ce8fe02c..1d5a79d4fa4e0dc71a96052f1b4fafab49ad1ecb 100644 --- a/src/mol-plugin-state/transforms/model.ts +++ b/src/mol-plugin-state/transforms/model.ts @@ -40,6 +40,7 @@ import { coordinatesFromXtc } from '../../mol-model-formats/structure/xtc'; import { parseXyz } from '../../mol-io/reader/xyz/parser'; import { trajectoryFromXyz } from '../../mol-model-formats/structure/xyz'; import { parseSdf } from '../../mol-io/reader/sdf/parser'; +import { trajectoryFromSdf } from '../../mol-model-formats/structure/sdf'; export { CoordinatesFromDcd }; export { CoordinatesFromXtc }; @@ -308,8 +309,8 @@ const TrajectoryFromSDF = PluginStateTransform.BuiltIn({ const models: Model[] = []; - for (const { molFile } of parsed.result.compounds) { - const traj = await trajectoryFromMol(molFile).runInContext(ctx); + for (const compound of parsed.result.compounds) { + const traj = await trajectoryFromSdf(compound).runInContext(ctx); for (let i = 0; i < traj.frameCount; i++) { models.push(await Task.resolveInContext(traj.getFrameAtIndex(i), ctx)); }