diff --git a/CHANGELOG.md b/CHANGELOG.md index 494610e16bfbfd0baa9ec169b39599e65a8086bd..397b8354c9c0b742a8754834610f9653426a6316 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Note that since we don't clearly distinguish between a public and private interf ## [Unreleased] - Add `tubularHelices` parameter to Cartoon representation +- Add `SdfFormat` and update SDF parser to be able to parse data headers according to spec (hopefully :)) #230 ## [v2.1.0] - 2021-07-05 diff --git a/src/mol-io/reader/_spec/sdf.spec.ts b/src/mol-io/reader/_spec/sdf.spec.ts index fb6dad9869bb9f9e6d02d283114ddb64b4218dfb..d7e47781990de7b517baa9c2c8c9d402d2499a53 100644 --- a/src/mol-io/reader/_spec/sdf.spec.ts +++ b/src/mol-io/reader/_spec/sdf.spec.ts @@ -22,8 +22,8 @@ M END > <DATABASE_NAME> drugbank -> <SMILES> -[O-]P([O-])([O-])=O +> 5225 <TEST_FIELD> +whatever > <INCHI_IDENTIFIER> InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-3 @@ -362,22 +362,25 @@ describe('sdf reader', () => { expect(bonds.atomIdxB.value(3)).toBe(5); expect(bonds.order.value(3)).toBe(1); - expect(dataItems.dataHeader.value(0)).toBe('DATABASE_ID'); + expect(dataItems.dataHeader.value(0)).toBe('<DATABASE_ID>'); expect(dataItems.data.value(0)).toBe('0'); - expect(dataItems.dataHeader.value(1)).toBe('DATABASE_NAME'); + expect(dataItems.dataHeader.value(1)).toBe('<DATABASE_NAME>'); expect(dataItems.data.value(1)).toBe('drugbank'); - expect(dataItems.dataHeader.value(31)).toBe('SYNONYMS'); + expect(dataItems.dataHeader.value(2)).toBe('5225 <TEST_FIELD>'); + expect(dataItems.data.value(2)).toBe('whatever'); + + expect(dataItems.dataHeader.value(31)).toBe('<SYNONYMS>'); expect(dataItems.data.value(31)).toBe('Orthophosphate; Phosphate'); expect(compound1.dataItems.data.value(0)).toBe('0'); expect(compound2.dataItems.data.value(0)).toBe('1'); - expect(compound3.dataItems.dataHeader.value(2)).toBe('PUBCHEM_CONFORMER_DIVERSEORDER'); + expect(compound3.dataItems.dataHeader.value(2)).toBe('<PUBCHEM_CONFORMER_DIVERSEORDER>'); expect(compound3.dataItems.data.value(2)).toBe('1\n11\n10\n3\n15\n17\n13\n5\n16\n7\n14\n9\n8\n4\n18\n6\n12\n2'); - expect(compound3.dataItems.dataHeader.value(21)).toBe('PUBCHEM_COORDINATE_TYPE'); + expect(compound3.dataItems.dataHeader.value(21)).toBe('<PUBCHEM_COORDINATE_TYPE>'); expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10'); }); }); diff --git a/src/mol-io/reader/sdf/parser.ts b/src/mol-io/reader/sdf/parser.ts index 1db788b4247be4672b377163d319550cc35b20ce..71594930183d66cdaf939bb5ff7fa41ccb119fb2 100644 --- a/src/mol-io/reader/sdf/parser.ts +++ b/src/mol-io/reader/sdf/parser.ts @@ -26,6 +26,7 @@ export interface SdfFile { readonly compounds: SdfFileCompound[] } + const delimiter = '$$$$'; function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } { const dataHeader = TokenBuilder.create(tokenizer.data, 32); @@ -36,8 +37,8 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da if (line.startsWith(delimiter)) break; if (!line) continue; - if (line.startsWith('> <')) { - TokenBuilder.add(dataHeader, tokenizer.tokenStart + 3, tokenizer.tokenEnd - 1); + if (line.startsWith('> ')) { + TokenBuilder.add(dataHeader, tokenizer.tokenStart + 2, tokenizer.tokenEnd); Tokenizer.markLine(tokenizer); const start = tokenizer.tokenStart; @@ -45,7 +46,7 @@ function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, da let added = false; while (tokenizer.position < tokenizer.length) { const line2 = Tokenizer.readLine(tokenizer); - if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> <')) { + if (!line2 || line2.startsWith(delimiter) || line2.startsWith('> ')) { TokenBuilder.add(data, start, end); added = true; break;