From 11a6df6e19a7da121ebcebeeb4ceb6b8bee30a73 Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Mon, 13 Jan 2020 16:32:42 -0500 Subject: [PATCH] add basic psf reader --- src/mol-io/reader/_spec/psf.spec.ts | 110 ++++++++++ src/mol-io/reader/common/text/tokenizer.ts | 2 +- src/mol-io/reader/psf/parser.ts | 213 ++++++++++++++++++++ src/mol-model-formats/structure/dcd.ts | 1 - src/mol-plugin/state/actions/data-format.ts | 3 +- src/mol-plugin/state/actions/structure.ts | 17 +- src/mol-plugin/state/objects.ts | 2 + src/mol-plugin/state/transforms/data.ts | 18 ++ 8 files changed, 362 insertions(+), 4 deletions(-) create mode 100644 src/mol-io/reader/_spec/psf.spec.ts create mode 100644 src/mol-io/reader/psf/parser.ts diff --git a/src/mol-io/reader/_spec/psf.spec.ts b/src/mol-io/reader/_spec/psf.spec.ts new file mode 100644 index 000000000..86a006093 --- /dev/null +++ b/src/mol-io/reader/_spec/psf.spec.ts @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { parsePsf } from '../psf/parser'; + +const psfString = `PSF CMAP CHEQ + +2 !NTITLE +* BETA HARPIN IN IMPLICIT SOLVENT +* DATE: 11/22/10 16:54: 9 CREATED BY USER: aokur + +42 !NATOM +1 ALA3 1 ALA CAY 24 -0.270000 12.0110 0 0.00000 -0.301140E-02 +2 ALA3 1 ALA HY1 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +3 ALA3 1 ALA HY2 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +4 ALA3 1 ALA HY3 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +5 ALA3 1 ALA CY 20 0.510000 12.0110 0 0.00000 -0.301140E-02 +6 ALA3 1 ALA OY 70 -0.510000 15.9990 0 0.00000 -0.301140E-02 +7 ALA3 1 ALA N 54 -0.470000 14.0070 0 0.00000 -0.301140E-02 +8 ALA3 1 ALA HN 1 0.310000 1.00800 0 0.00000 -0.301140E-02 +9 ALA3 1 ALA CA 22 0.700000E-01 12.0110 0 0.00000 -0.301140E-02 +10 ALA3 1 ALA HA 6 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +11 ALA3 1 ALA CB 24 -0.270000 12.0110 0 0.00000 -0.301140E-02 +12 ALA3 1 ALA HB1 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +13 ALA3 1 ALA HB2 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +14 ALA3 1 ALA HB3 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +15 ALA3 1 ALA C 20 0.510000 12.0110 0 0.00000 -0.301140E-02 +16 ALA3 1 ALA O 70 -0.510000 15.9990 0 0.00000 -0.301140E-02 +17 ALA3 2 ALA N 54 -0.470000 14.0070 0 0.00000 -0.301140E-02 +18 ALA3 2 ALA HN 1 0.310000 1.00800 0 0.00000 -0.301140E-02 +19 ALA3 2 ALA CA 22 0.700000E-01 12.0110 0 0.00000 -0.301140E-02 +20 ALA3 2 ALA HA 6 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +21 ALA3 2 ALA CB 24 -0.270000 12.0110 0 0.00000 -0.301140E-02 +22 ALA3 2 ALA HB1 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +23 ALA3 2 ALA HB2 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +24 ALA3 2 ALA HB3 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +25 ALA3 2 ALA C 20 0.510000 12.0110 0 0.00000 -0.301140E-02 +26 ALA3 2 ALA O 70 -0.510000 15.9990 0 0.00000 -0.301140E-02 +27 ALA3 3 ALA N 54 -0.470000 14.0070 0 0.00000 -0.301140E-02 +28 ALA3 3 ALA HN 1 0.310000 1.00800 0 0.00000 -0.301140E-02 +29 ALA3 3 ALA CA 22 0.700000E-01 12.0110 0 0.00000 -0.301140E-02 +30 ALA3 3 ALA HA 6 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +31 ALA3 3 ALA CB 24 -0.270000 12.0110 0 0.00000 -0.301140E-02 +32 ALA3 3 ALA HB1 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +33 ALA3 3 ALA HB2 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +34 ALA3 3 ALA HB3 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +35 ALA3 3 ALA C 20 0.510000 12.0110 0 0.00000 -0.301140E-02 +36 ALA3 3 ALA O 70 -0.510000 15.9990 0 0.00000 -0.301140E-02 +37 ALA3 3 ALA NT 54 -0.470000 14.0070 0 0.00000 -0.301140E-02 +38 ALA3 3 ALA HNT 1 0.310000 1.00800 0 0.00000 -0.301140E-02 +39 ALA3 3 ALA CAT 24 -0.110000 12.0110 0 0.00000 -0.301140E-02 +40 ALA3 3 ALA HT1 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +41 ALA3 3 ALA HT2 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 +42 ALA3 3 ALA HT3 3 0.900000E-01 1.00800 0 0.00000 -0.301140E-02 + +41 !NBOND: bonds +5 1 5 7 1 2 1 3 +1 4 6 5 11 9 7 8 +7 9 15 9 15 17 9 10 +11 12 11 13 11 14 16 15 +21 19 17 18 17 19 25 19 +25 27 19 20 21 22 21 23 +21 24 26 25 31 29 27 28 +27 29 35 29 29 30 31 32 +31 33 31 34 36 35 35 37 +37 38 37 39 39 40 39 41 +39 42 +` + +describe('psf reader', () => { + it('basic', async () => { + const parsed = await parsePsf(psfString).run(); + + if (parsed.isError) { + throw new Error(parsed.message) + } + + const psfFile = parsed.result; + const { id, title, atoms, bonds } = psfFile; + + expect(id).toBe('PSF CMAP CHEQ') + expect(title).toEqual([ + 'BETA HARPIN IN IMPLICIT SOLVENT', + 'DATE: 11/22/10 16:54: 9 CREATED BY USER: aokur' + ]) + + expect(atoms.atomId.value(0)).toBe(1) + expect(atoms.atomId.value(41)).toBe(42) + expect(atoms.segmentName.value(0)).toBe('ALA3') + expect(atoms.residueId.value(0)).toBe(1) + expect(atoms.residueId.value(41)).toBe(3) + expect(atoms.residueName.value(0)).toBe('ALA') + expect(atoms.atomName.value(0)).toBe('CAY') + expect(atoms.atomName.value(41)).toBe('HT3') + expect(atoms.atomType.value(0)).toBe('24') + expect(atoms.atomType.value(41)).toBe('3') + expect(atoms.charge.value(0)).toBeCloseTo(-0.270000, 0.00001) + expect(atoms.charge.value(41)).toBeCloseTo(0.090000, 0.00001) + expect(atoms.mass.value(0)).toBeCloseTo(12.0110, 0.00001) + expect(atoms.mass.value(41)).toBeCloseTo(1.00800, 0.00001) + + expect(bonds.atomIdA.value(0)).toBe(5) + expect(bonds.atomIdB.value(0)).toBe(1) + expect(bonds.atomIdA.value(40)).toBe(39) + expect(bonds.atomIdB.value(40)).toBe(42) + }); +}); diff --git a/src/mol-io/reader/common/text/tokenizer.ts b/src/mol-io/reader/common/text/tokenizer.ts index f1f476a44..8d8cb2d41 100644 --- a/src/mol-io/reader/common/text/tokenizer.ts +++ b/src/mol-io/reader/common/text/tokenizer.ts @@ -91,7 +91,7 @@ namespace Tokenizer { return eatLine(state); } - /** Advance the state by the given number of lines and return line starts/ends as tokens. */ + /** Advance the state by the given number of lines and return line as string. */ export function readLine(state: Tokenizer): string { markLine(state); return getTokenString(state); diff --git a/src/mol-io/reader/psf/parser.ts b/src/mol-io/reader/psf/parser.ts new file mode 100644 index 000000000..2a86e2068 --- /dev/null +++ b/src/mol-io/reader/psf/parser.ts @@ -0,0 +1,213 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Task, RuntimeContext, chunkedSubtask } from '../../../mol-task' +import { Tokenizer, TokenBuilder } from '../common/text/tokenizer' +import { ReaderResult as Result } from '../result' +import TokenColumn from '../common/text/column/token'; +import { Column } from '../../../mol-data/db'; + +// http://www.ks.uiuc.edu/Training/Tutorials/namd/namd-tutorial-unix-html/node23.html + +export interface PsfFile { + readonly id: string + readonly title: string[] + readonly atoms: { + readonly count: number + readonly atomId: Column<number> + readonly segmentName: Column<string> + readonly residueId: Column<number> + readonly residueName: Column<string> + readonly atomName: Column<string> + readonly atomType: Column<string> + readonly charge: Column<number> + readonly mass: Column<number> + } + readonly bonds: { + readonly count: number + readonly atomIdA: Column<number> + readonly atomIdB: Column<number> + } +} + +const { readLine, skipWhitespace, eatValue, eatLine, markStart } = Tokenizer; + +const reWhitespace = /\s+/ +const reTitle = /(^\*|REMARK)*/ + +function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext) { + return { + tokenizer, + runtimeCtx, + } +} +type State = ReturnType<typeof State> + +async function handleAtoms(state: State, count: number): Promise<PsfFile['atoms']> { + const { tokenizer } = state + + const atomId = TokenBuilder.create(tokenizer.data, count * 2) + const segmentName = TokenBuilder.create(tokenizer.data, count * 2) + const residueId = TokenBuilder.create(tokenizer.data, count * 2) + const residueName = TokenBuilder.create(tokenizer.data, count * 2) + const atomName = TokenBuilder.create(tokenizer.data, count * 2) + const atomType = TokenBuilder.create(tokenizer.data, count * 2) + const charge = TokenBuilder.create(tokenizer.data, count * 2) + const mass = TokenBuilder.create(tokenizer.data, count * 2) + + const { length } = tokenizer + let linesAlreadyRead = 0 + await chunkedSubtask(state.runtimeCtx, 10, void 0, chunkSize => { + const linesToRead = Math.min(count - linesAlreadyRead, chunkSize) + for (let i = 0; i < linesToRead; ++i) { + for (let j = 0; j < 8; ++j) { + skipWhitespace(tokenizer) + markStart(tokenizer) + eatValue(tokenizer) + switch (j) { + case 0: TokenBuilder.addUnchecked(atomId, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 1: TokenBuilder.addUnchecked(segmentName, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 2: TokenBuilder.addUnchecked(residueId, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 3: TokenBuilder.addUnchecked(residueName, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 4: TokenBuilder.addUnchecked(atomName, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 5: TokenBuilder.addUnchecked(atomType, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 6: TokenBuilder.addUnchecked(charge, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 7: TokenBuilder.addUnchecked(mass, tokenizer.tokenStart, tokenizer.tokenEnd); break + } + } + // ignore any extra columns + eatLine(tokenizer) + markStart(tokenizer) + } + linesAlreadyRead += linesToRead + return linesToRead + }, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length })) + + return { + count, + atomId: TokenColumn(atomId)(Column.Schema.int), + segmentName: TokenColumn(segmentName)(Column.Schema.str), + residueId: TokenColumn(residueId)(Column.Schema.int), + residueName: TokenColumn(residueName)(Column.Schema.str), + atomName: TokenColumn(atomName)(Column.Schema.str), + atomType: TokenColumn(atomType)(Column.Schema.str), + charge: TokenColumn(charge)(Column.Schema.float), + mass: TokenColumn(mass)(Column.Schema.float) + } +} + +async function handleBonds(state: State, count: number): Promise<PsfFile['bonds']> { + const { tokenizer } = state + + const atomIdA = TokenBuilder.create(tokenizer.data, count * 2) + const atomIdB = TokenBuilder.create(tokenizer.data, count * 2) + + const { length } = tokenizer + let bondsAlreadyRead = 0 + await chunkedSubtask(state.runtimeCtx, 10, void 0, chunkSize => { + const bondsToRead = Math.min(count - bondsAlreadyRead, chunkSize) + for (let i = 0; i < bondsToRead; ++i) { + for (let j = 0; j < 2; ++j) { + skipWhitespace(tokenizer) + markStart(tokenizer) + eatValue(tokenizer) + switch (j) { + case 0: TokenBuilder.addUnchecked(atomIdA, tokenizer.tokenStart, tokenizer.tokenEnd); break + case 1: TokenBuilder.addUnchecked(atomIdB, tokenizer.tokenStart, tokenizer.tokenEnd); break + } + } + } + bondsAlreadyRead += bondsToRead + return bondsToRead + }, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length })) + + return { + count, + atomIdA: TokenColumn(atomIdA)(Column.Schema.int), + atomIdB: TokenColumn(atomIdB)(Column.Schema.int), + } +} + +function parseTitle(state: State, count: number) { + const title: string[] = [] + for (let i = 0; i < count; ++i) { + const line = readLine(state.tokenizer) + title.push(line.replace(reTitle, '').trim()) + } + return title +} + +async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<PsfFile>> { + const tokenizer = Tokenizer(data); + const state = State(tokenizer, ctx); + + let title = undefined as string[] | undefined + let atoms = undefined as PsfFile['atoms'] | undefined + let bonds = undefined as PsfFile['bonds'] | undefined + + const id = readLine(state.tokenizer).trim() + + while(tokenizer.tokenEnd < tokenizer.length) { + const line = readLine(state.tokenizer) + if (line.includes('!NTITLE')) { + const numTitle = parseInt(line.split(reWhitespace)[0]) + title = parseTitle(state, numTitle) + } else if (line.includes('!NATOM')) { + const numAtoms = parseInt(line.split(reWhitespace)[0]) + atoms = await handleAtoms(state, numAtoms) + } else if (line.includes('!NBOND')) { + const numBonds = parseInt(line.split(reWhitespace)[0]) + bonds = await handleBonds(state, numBonds) + break // TODO: don't break when the below are implemented + } else if (line.includes('!NTHETA')) { + // TODO + } else if (line.includes('!NPHI')) { + // TODO + } else if (line.includes('!NIMPHI')) { + // TODO + } else if (line.includes('!NDON')) { + // TODO + } else if (line.includes('!NACC')) { + // TODO + } else if (line.includes('!NNB')) { + // TODO + } else if (line.includes('!NGRP NST2')) { + // TODO + } else if (line.includes('!MOLNT')) { + // TODO + } else if (line.includes('!NUMLP NUMLPH')) { + // TODO + } else if (line.includes('!NCRTERM')) { + // TODO + } + } + + if (title === undefined) { + title = [] + } + + if (atoms === undefined) { + return Result.error('no atoms data') + } + + if (bonds === undefined) { + return Result.error('no bonds data') + } + + const result: PsfFile = { + id, + title, + atoms, + bonds + } + return Result.success(result); +} + +export function parsePsf(data: string) { + return Task.create<Result<PsfFile>>('Parse PSF', async ctx => { + return await parseInternal(data, ctx) + }); +} \ No newline at end of file diff --git a/src/mol-model-formats/structure/dcd.ts b/src/mol-model-formats/structure/dcd.ts index 2f1dd1f19..f2e02cd79 100644 --- a/src/mol-model-formats/structure/dcd.ts +++ b/src/mol-model-formats/structure/dcd.ts @@ -11,7 +11,6 @@ import { Coordinates, Frame, Time } from '../../mol-model/structure/coordinates' const charmmTimeUnitFactor = 20.45482949774598 export function coordinatesFromDcd(dcdFile: DcdFile): Task<Coordinates> { - console.log('coordinatesFromDcd', dcdFile) return Task.create('Parse DCD', async ctx => { await ctx.update('Converting to coordinates'); diff --git a/src/mol-plugin/state/actions/data-format.ts b/src/mol-plugin/state/actions/data-format.ts index 4159b65f0..1c841267f 100644 --- a/src/mol-plugin/state/actions/data-format.ts +++ b/src/mol-plugin/state/actions/data-format.ts @@ -12,7 +12,7 @@ import { PluginStateObject } from '../objects'; import { ParamDefinition as PD } from '../../../mol-util/param-definition'; import { Ccp4Provider, Dsn6Provider, DscifProvider } from './volume'; import { StateTransforms } from '../transforms'; -import { MmcifProvider, PdbProvider, GroProvider, Provider3dg, DcdProvider } from './structure'; +import { MmcifProvider, PdbProvider, GroProvider, Provider3dg, DcdProvider, PsfProvider } from './structure'; import msgpackDecode from '../../../mol-io/common/msgpack/decode' import { PlyProvider } from './shape'; @@ -64,6 +64,7 @@ export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | Plugin this.add('mmcif', MmcifProvider) this.add('pdb', PdbProvider) this.add('ply', PlyProvider) + this.add('psf', PsfProvider) }; private _clear() { diff --git a/src/mol-plugin/state/actions/structure.ts b/src/mol-plugin/state/actions/structure.ts index f75e68944..a2991425f 100644 --- a/src/mol-plugin/state/actions/structure.ts +++ b/src/mol-plugin/state/actions/structure.ts @@ -10,7 +10,7 @@ import { StateAction, StateBuilder, StateSelection, StateTransformer, State } fr import { ParamDefinition as PD } from '../../../mol-util/param-definition'; import { PluginStateObject } from '../objects'; import { StateTransforms } from '../transforms'; -import { Download } from '../transforms/data'; +import { Download, ParsePsf } from '../transforms/data'; import { CustomModelProperties, StructureSelectionFromExpression, CustomStructureProperties, CoordinatesFromDcd, TrajectoryFromModelAndCoordinates } from '../transforms/model'; import { DataFormatProvider, guessCifVariant, DataFormatBuilderOptions } from './data-format'; import { FileInfo } from '../../../mol-util/file-info'; @@ -86,6 +86,21 @@ export const Provider3dg: DataFormatProvider<any> = { } } +export const PsfProvider: DataFormatProvider<any> = { + label: 'PSF', + description: 'PSF', + stringExtensions: ['psf'], + binaryExtensions: [], + isApplicable: (info: FileInfo, data: string) => { + return info.ext === 'psf' + }, + getDefaultBuilder: (ctx: PluginContext, data: StateBuilder.To<PluginStateObject.Data.String>, options: DataFormatBuilderOptions, state: State) => { + return Task.create('PSF default builder', async taskCtx => { + await state.updateTree(data.apply(ParsePsf)).runInContext(taskCtx) + }) + } +} + export const DcdProvider: DataFormatProvider<any> = { label: 'DCD', description: 'DCD', diff --git a/src/mol-plugin/state/objects.ts b/src/mol-plugin/state/objects.ts index 4f59657aa..9f32b3dea 100644 --- a/src/mol-plugin/state/objects.ts +++ b/src/mol-plugin/state/objects.ts @@ -22,6 +22,7 @@ import { Shape as _Shape } from '../../mol-model/shape'; import { ShapeProvider } from '../../mol-model/shape/provider'; import { File3DG } from '../../mol-io/reader/3dg/parser'; import { DcdFile } from '../../mol-io/reader/dcd/parser'; +import { PsfFile } from '../../mol-io/reader/psf/parser'; export type TypeClass = 'root' | 'data' | 'prop' @@ -70,6 +71,7 @@ export namespace PluginStateObject { export class Pdb extends Create<CifFile>({ name: 'PDB File', typeClass: 'Data' }) { } export class Gro extends Create<CifFile>({ name: 'GRO File', typeClass: 'Data' }) { } export class _3dg extends Create<CifFile>({ name: '3DG File', typeClass: 'Data' }) { } + export class Psf extends Create<PsfFile>({ name: 'PSF File', typeClass: 'Data' }) { } export class Dcd extends Create<CifFile>({ name: 'DCD File', typeClass: 'Data' }) { } export class Ply extends Create<PlyFile>({ name: 'PLY File', typeClass: 'Data' }) { } export class Ccp4 extends Create<Ccp4File>({ name: 'CCP4/MRC/MAP File', typeClass: 'Data' }) { } diff --git a/src/mol-plugin/state/transforms/data.ts b/src/mol-plugin/state/transforms/data.ts index 31ac6fabe..0d1d19dad 100644 --- a/src/mol-plugin/state/transforms/data.ts +++ b/src/mol-plugin/state/transforms/data.ts @@ -16,6 +16,7 @@ import { readFromFile, ajaxGetMany } from '../../../mol-util/data-source'; import * as CCP4 from '../../../mol-io/reader/ccp4/parser' import * as DSN6 from '../../../mol-io/reader/dsn6/parser' import * as PLY from '../../../mol-io/reader/ply/parser' +import { parsePsf } from '../../../mol-io/reader/psf/parser'; export { Download } type Download = typeof Download @@ -186,6 +187,23 @@ const ParseCif = PluginStateTransform.BuiltIn({ } }); +export { ParsePsf } +type ParsePsf = typeof ParsePsf +const ParsePsf = PluginStateTransform.BuiltIn({ + name: 'parse-psf', + display: { name: 'Parse PSF', description: 'Parse PSF from String data' }, + from: [SO.Data.String], + to: SO.Format.Psf +})({ + apply({ a }) { + return Task.create('Parse PSF', async ctx => { + const parsed = await parsePsf(a.data).runInContext(ctx); + if (parsed.isError) throw new Error(parsed.message); + return new SO.Format.Psf(parsed.result); + }); + } +}); + export { ParsePly } type ParsePly = typeof ParsePly const ParsePly = PluginStateTransform.BuiltIn({ -- GitLab