From 261e96068a104051eb9fe56c51f5cc73eccf8b18 Mon Sep 17 00:00:00 2001 From: Alexander Rose <alex.rose@rcsb.org> Date: Tue, 3 Oct 2017 22:35:42 -0700 Subject: [PATCH] Gro parser using computation --- src/reader/common/text/tokenizer.ts | 16 ++++- src/reader/gro/parser.ts | 15 ++-- src/reader/spec/gro.spec.ts | 10 +-- src/script.ts | 103 +++++++++++++++------------- 4 files changed, 84 insertions(+), 60 deletions(-) diff --git a/src/reader/common/text/tokenizer.ts b/src/reader/common/text/tokenizer.ts index 326497591..e4fac8678 100644 --- a/src/reader/common/text/tokenizer.ts +++ b/src/reader/common/text/tokenizer.ts @@ -6,6 +6,8 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ +import Computation from '../../../utils/computation' + export interface Tokenizer { data: string @@ -15,6 +17,8 @@ export interface Tokenizer { currentLineNumber: number currentTokenStart: number currentTokenEnd: number + + computation: Computation.Chunked } export interface Tokens { @@ -23,14 +27,15 @@ export interface Tokens { indices: ArrayLike<number> } -export function Tokenizer(data: string): Tokenizer { +export function Tokenizer(data: string, ctx: Computation.Context): Tokenizer { return { data, position: 0, length: data.length, currentLineNumber: 1, currentTokenStart: 0, - currentTokenEnd: 0 + currentTokenEnd: 0, + computation: new Computation.Chunked(ctx, 1000000) }; } @@ -86,12 +91,17 @@ export namespace Tokenizer { } /** Advance the state by the given number of lines and return line starts/ends as tokens. */ - export function readLines(state: Tokenizer, count: number): Tokens { + export async function readLines(state: Tokenizer, count: number): Promise<Tokens> { + const { computation, position, length } = state const lineTokens = TokenBuilder.create(state, count * 2); for (let i = 0; i < count; i++) { markLine(state); TokenBuilder.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd); + + if (computation.requiresUpdate) { + await computation.updateProgress('Parsing...', void 0, position, length); + } } return { data: state.data, count, indices: lineTokens.indices }; diff --git a/src/reader/gro/parser.ts b/src/reader/gro/parser.ts index 28a1e26ca..7573b6d4c 100644 --- a/src/reader/gro/parser.ts +++ b/src/reader/gro/parser.ts @@ -10,6 +10,7 @@ import FixedColumn from '../common/text/column/fixed' import { ColumnType, UndefinedColumn } from '../common/column' import * as Schema from './schema' import Result from '../result' +import Computation from '../../utils/computation' interface State { tokenizer: Tokenizer, @@ -85,9 +86,9 @@ function handleNumberOfAtoms(state: State) { * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places) * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places) */ -function handleAtoms(state: State): Schema.Atoms { +async function handleAtoms(state: State): Promise<Schema.Atoms> { const { tokenizer, numberOfAtoms } = state; - const lines = Tokenizer.readLines(tokenizer, numberOfAtoms); + const lines = await Tokenizer.readLines(tokenizer, numberOfAtoms); const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20); const precisions = positionSample.match(/\.\d+/g)!; @@ -134,15 +135,15 @@ function handleBoxVectors(state: State) { state.header.box = [+values[0], +values[1], +values[2]]; } -function parseInternal(data: string): Result<Schema.File> { - const tokenizer = Tokenizer(data); +async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> { + const tokenizer = Tokenizer(data, ctx); const structures: Schema.Structure[] = []; while (tokenizer.position < data.length) { const state = State(tokenizer); handleTitleString(state); handleNumberOfAtoms(state); - const atoms = handleAtoms(state); + const atoms = await handleAtoms(state); handleBoxVectors(state); structures.push({ header: state.header, atoms }); } @@ -152,7 +153,9 @@ function parseInternal(data: string): Result<Schema.File> { } export function parse(data: string) { - return parseInternal(data); + return new Computation<Result<Schema.File>>(async ctx => { + return await parseInternal(data, ctx); + }); } export default parse; \ No newline at end of file diff --git a/src/reader/spec/gro.spec.ts b/src/reader/spec/gro.spec.ts index 67dd4b21d..157688fa4 100644 --- a/src/reader/spec/gro.spec.ts +++ b/src/reader/spec/gro.spec.ts @@ -25,8 +25,9 @@ const groStringHighPrecision = `Generated by trjconv : 2168 system t= 15.00000 1.82060 2.82060 3.82060` describe('gro reader', () => { - it('basic', () => { - const parsed = Gro(groString) + it('basic', async () => { + const comp = Gro(groString) + const parsed = await comp.run() if (parsed.isError) { console.log(parsed) @@ -57,8 +58,9 @@ describe('gro reader', () => { expect(atoms.vz.value(5)).toBeCloseTo(-0.0244, 0.0001); }); - it('high precision', () => { - const parsed = Gro(groStringHighPrecision); + it('high precision', async () => { + const comp = Gro(groStringHighPrecision) + const parsed = await comp.run() if (parsed.isError) { console.log(parsed) diff --git a/src/script.ts b/src/script.ts index 9db88b748..3a040acf6 100644 --- a/src/script.ts +++ b/src/script.ts @@ -2,6 +2,7 @@ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> */ // import * as util from 'util' @@ -15,69 +16,77 @@ import CIF from './reader/cif/index' // const file = 'test.gro' const file = 'md_1u19_trj.gro' -export function _gro() { - fs.readFile(`./examples/${file}`, 'utf8', function (err, input) { - if (err) { - return console.log(err); - } - // console.log(data); - - console.time('parse') - const parsed = Gro(input) - console.timeEnd('parse') - if (parsed.isError) { - console.log(parsed) - return; - } +async function runGro(input: string) { + console.time('parseGro'); + const comp = Gro(input); + const running = comp.runWithContext(new Computation.ObservableContext({ updateRateMs: 250 })); + running.subscribe(p => console.log(`[Gro] ${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`)); + const parsed = await running.result; + console.timeEnd('parseGro'); + + if (parsed.isError) { + console.log(parsed); + return; + } - const groFile = parsed.result + const groFile = parsed.result - console.log('structure count: ', groFile.structures.length); + console.log('structure count: ', groFile.structures.length); - const data = groFile.structures[0]; + const data = groFile.structures[0]; - // const header = groFile.blocks[0].getCategory('header') - const { header, atoms } = data; - console.log(JSON.stringify(header, null, 2)); - console.log('number of atoms:', atoms.count); + // const header = groFile.blocks[0].getCategory('header') + const { header, atoms } = data; + console.log(JSON.stringify(header, null, 2)); + console.log('number of atoms:', atoms.count); - console.log(`'${atoms.residueNumber.value(1)}'`) - console.log(`'${atoms.residueName.value(1)}'`) - console.log(`'${atoms.atomName.value(1)}'`) - console.log(atoms.z.value(1)) - console.log(`'${atoms.z.value(1)}'`) + console.log(`'${atoms.residueNumber.value(1)}'`) + console.log(`'${atoms.residueName.value(1)}'`) + console.log(`'${atoms.atomName.value(1)}'`) + console.log(atoms.z.value(1)) + console.log(`'${atoms.z.value(1)}'`) - const n = atoms.count; - console.log('rowCount', n) + const n = atoms.count; + console.log('rowCount', n) - console.time('getFloatArray x') - const x = atoms.x.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray x') - console.log(x.length, x[0], x[x.length - 1]) + console.time('getFloatArray x') + const x = atoms.x.toArray({ array: Float32Array }) + console.timeEnd('getFloatArray x') + console.log(x.length, x[0], x[x.length - 1]) - console.time('getFloatArray y') - const y = atoms.y.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray y') - console.log(y.length, y[0], y[y.length - 1]) + console.time('getFloatArray y') + const y = atoms.y.toArray({ array: Float32Array }) + console.timeEnd('getFloatArray y') + console.log(y.length, y[0], y[y.length - 1]) - console.time('getFloatArray z') - const z = atoms.z.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray z') - console.log(z.length, z[0], z[z.length - 1]) + console.time('getFloatArray z') + const z = atoms.z.toArray({ array: Float32Array }) + console.timeEnd('getFloatArray z') + console.log(z.length, z[0], z[z.length - 1]) - console.time('getIntArray residueNumber') - const residueNumber = atoms.residueNumber.toArray({ array: Int32Array }) - console.timeEnd('getIntArray residueNumber') - console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1]) + console.time('getIntArray residueNumber') + const residueNumber = atoms.residueNumber.toArray({ array: Int32Array }) + console.timeEnd('getIntArray residueNumber') + console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1]) +} + +async function _gro() { + fs.readFile(`./examples/${file}`, 'utf8', function (err, input) { + if (err) { + return console.log(err); + } + runGro(input) }); } +_gro() + async function runCIF(input: string | Uint8Array) { console.time('parseCIF'); const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input); - + const running = comp.runWithContext(new Computation.ObservableContext({ updateRateMs: 250 })); - running.subscribe(p => console.log(`${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`)); + running.subscribe(p => console.log(`[CIF] ${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`)); const parsed = await running.result; console.timeEnd('parseCIF'); if (parsed.isError) { @@ -98,7 +107,7 @@ async function runCIF(input: string | Uint8Array) { export function _cif() { let path = `./examples/1cbs_updated.cif`; - path = 'c:/test/quick/3j3q.cif'; + path = '../test/3j3q.cif'; fs.readFile(path, 'utf8', function (err, input) { if (err) { return console.log(err); -- GitLab