From 261e96068a104051eb9fe56c51f5cc73eccf8b18 Mon Sep 17 00:00:00 2001
From: Alexander Rose <alex.rose@rcsb.org>
Date: Tue, 3 Oct 2017 22:35:42 -0700
Subject: [PATCH] Gro parser using computation

---
 src/reader/common/text/tokenizer.ts |  16 ++++-
 src/reader/gro/parser.ts            |  15 ++--
 src/reader/spec/gro.spec.ts         |  10 +--
 src/script.ts                       | 103 +++++++++++++++-------------
 4 files changed, 84 insertions(+), 60 deletions(-)

diff --git a/src/reader/common/text/tokenizer.ts b/src/reader/common/text/tokenizer.ts
index 326497591..e4fac8678 100644
--- a/src/reader/common/text/tokenizer.ts
+++ b/src/reader/common/text/tokenizer.ts
@@ -6,6 +6,8 @@
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
  */
 
+import Computation from '../../../utils/computation'
+
 export interface Tokenizer {
     data: string
 
@@ -15,6 +17,8 @@ export interface Tokenizer {
     currentLineNumber: number
     currentTokenStart: number
     currentTokenEnd: number
+
+    computation: Computation.Chunked
 }
 
 export interface Tokens {
@@ -23,14 +27,15 @@ export interface Tokens {
     indices: ArrayLike<number>
 }
 
-export function Tokenizer(data: string): Tokenizer {
+export function Tokenizer(data: string, ctx: Computation.Context): Tokenizer {
     return {
         data,
         position: 0,
         length: data.length,
         currentLineNumber: 1,
         currentTokenStart: 0,
-        currentTokenEnd: 0
+        currentTokenEnd: 0,
+        computation: new Computation.Chunked(ctx, 1000000)
     };
 }
 
@@ -86,12 +91,17 @@ export namespace Tokenizer {
     }
 
     /** Advance the state by the given number of lines and return line starts/ends as tokens. */
-    export function readLines(state: Tokenizer, count: number): Tokens {
+    export async function readLines(state: Tokenizer, count: number): Promise<Tokens> {
+        const { computation, position, length } = state
         const lineTokens = TokenBuilder.create(state, count * 2);
 
         for (let i = 0; i < count; i++) {
             markLine(state);
             TokenBuilder.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd);
+
+            if (computation.requiresUpdate) {
+                await computation.updateProgress('Parsing...', void 0, position, length);
+            }
         }
 
         return { data: state.data, count, indices: lineTokens.indices };
diff --git a/src/reader/gro/parser.ts b/src/reader/gro/parser.ts
index 28a1e26ca..7573b6d4c 100644
--- a/src/reader/gro/parser.ts
+++ b/src/reader/gro/parser.ts
@@ -10,6 +10,7 @@ import FixedColumn from '../common/text/column/fixed'
 import { ColumnType, UndefinedColumn } from '../common/column'
 import * as Schema from './schema'
 import Result from '../result'
+import Computation from '../../utils/computation'
 
 interface State {
     tokenizer: Tokenizer,
@@ -85,9 +86,9 @@ function handleNumberOfAtoms(state: State) {
  *     position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
  *     velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
  */
-function handleAtoms(state: State): Schema.Atoms {
+async function handleAtoms(state: State): Promise<Schema.Atoms> {
     const { tokenizer, numberOfAtoms } = state;
-    const lines = Tokenizer.readLines(tokenizer, numberOfAtoms);
+    const lines = await Tokenizer.readLines(tokenizer, numberOfAtoms);
 
     const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
     const precisions = positionSample.match(/\.\d+/g)!;
@@ -134,15 +135,15 @@ function handleBoxVectors(state: State) {
     state.header.box = [+values[0], +values[1], +values[2]];
 }
 
-function parseInternal(data: string): Result<Schema.File> {
-    const tokenizer = Tokenizer(data);
+async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> {
+    const tokenizer = Tokenizer(data, ctx);
 
     const structures: Schema.Structure[] = [];
     while (tokenizer.position < data.length) {
         const state = State(tokenizer);
         handleTitleString(state);
         handleNumberOfAtoms(state);
-        const atoms = handleAtoms(state);
+        const atoms = await handleAtoms(state);
         handleBoxVectors(state);
         structures.push({ header: state.header, atoms });
     }
@@ -152,7 +153,9 @@ function parseInternal(data: string): Result<Schema.File> {
 }
 
 export function parse(data: string) {
-    return parseInternal(data);
+    return new Computation<Result<Schema.File>>(async ctx => {
+        return await parseInternal(data, ctx);
+    });
 }
 
 export default parse;
\ No newline at end of file
diff --git a/src/reader/spec/gro.spec.ts b/src/reader/spec/gro.spec.ts
index 67dd4b21d..157688fa4 100644
--- a/src/reader/spec/gro.spec.ts
+++ b/src/reader/spec/gro.spec.ts
@@ -25,8 +25,9 @@ const groStringHighPrecision = `Generated by trjconv : 2168 system t=  15.00000
    1.82060   2.82060   3.82060`
 
 describe('gro reader', () => {
-    it('basic', () => {
-        const parsed = Gro(groString)
+    it('basic', async () => {
+        const comp = Gro(groString)
+        const parsed = await comp.run()
 
         if (parsed.isError) {
             console.log(parsed)
@@ -57,8 +58,9 @@ describe('gro reader', () => {
         expect(atoms.vz.value(5)).toBeCloseTo(-0.0244, 0.0001);
     });
 
-    it('high precision', () => {
-        const parsed = Gro(groStringHighPrecision);
+    it('high precision', async () => {
+        const comp = Gro(groStringHighPrecision)
+        const parsed = await comp.run()
 
         if (parsed.isError) {
             console.log(parsed)
diff --git a/src/script.ts b/src/script.ts
index 9db88b748..3a040acf6 100644
--- a/src/script.ts
+++ b/src/script.ts
@@ -2,6 +2,7 @@
  * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
  *
  * @author Alexander Rose <alexander.rose@weirdbyte.de>
+ * @author David Sehnal <david.sehnal@gmail.com>
  */
 
 // import * as util from 'util'
@@ -15,69 +16,77 @@ import CIF from './reader/cif/index'
 // const file = 'test.gro'
 const file = 'md_1u19_trj.gro'
 
-export function _gro() {
-    fs.readFile(`./examples/${file}`, 'utf8', function (err, input) {
-        if (err) {
-            return console.log(err);
-        }
-        // console.log(data);
-
-        console.time('parse')
-        const parsed = Gro(input)
-        console.timeEnd('parse')
-        if (parsed.isError) {
-            console.log(parsed)
-            return;
-        }
+async function runGro(input: string) {
+    console.time('parseGro');
+    const comp = Gro(input);
+    const running = comp.runWithContext(new Computation.ObservableContext({ updateRateMs: 250 }));
+    running.subscribe(p => console.log(`[Gro] ${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`));
+    const parsed = await running.result;
+    console.timeEnd('parseGro');
+
+    if (parsed.isError) {
+        console.log(parsed);
+        return;
+    }
 
-        const groFile = parsed.result
+    const groFile = parsed.result
 
-        console.log('structure count: ', groFile.structures.length);
+    console.log('structure count: ', groFile.structures.length);
 
-        const data = groFile.structures[0];
+    const data = groFile.structures[0];
 
-        // const header = groFile.blocks[0].getCategory('header')
-        const { header, atoms } = data;
-        console.log(JSON.stringify(header, null, 2));
-        console.log('number of atoms:', atoms.count);
+    // const header = groFile.blocks[0].getCategory('header')
+    const { header, atoms } = data;
+    console.log(JSON.stringify(header, null, 2));
+    console.log('number of atoms:', atoms.count);
 
-        console.log(`'${atoms.residueNumber.value(1)}'`)
-        console.log(`'${atoms.residueName.value(1)}'`)
-        console.log(`'${atoms.atomName.value(1)}'`)
-        console.log(atoms.z.value(1))
-        console.log(`'${atoms.z.value(1)}'`)
+    console.log(`'${atoms.residueNumber.value(1)}'`)
+    console.log(`'${atoms.residueName.value(1)}'`)
+    console.log(`'${atoms.atomName.value(1)}'`)
+    console.log(atoms.z.value(1))
+    console.log(`'${atoms.z.value(1)}'`)
 
-        const n = atoms.count;
-        console.log('rowCount', n)
+    const n = atoms.count;
+    console.log('rowCount', n)
 
-        console.time('getFloatArray x')
-        const x = atoms.x.toArray({ array: Float32Array })
-        console.timeEnd('getFloatArray x')
-        console.log(x.length, x[0], x[x.length - 1])
+    console.time('getFloatArray x')
+    const x = atoms.x.toArray({ array: Float32Array })
+    console.timeEnd('getFloatArray x')
+    console.log(x.length, x[0], x[x.length - 1])
 
-        console.time('getFloatArray y')
-        const y = atoms.y.toArray({ array: Float32Array })
-        console.timeEnd('getFloatArray y')
-        console.log(y.length, y[0], y[y.length - 1])
+    console.time('getFloatArray y')
+    const y = atoms.y.toArray({ array: Float32Array })
+    console.timeEnd('getFloatArray y')
+    console.log(y.length, y[0], y[y.length - 1])
 
-        console.time('getFloatArray z')
-        const z = atoms.z.toArray({ array: Float32Array })
-        console.timeEnd('getFloatArray z')
-        console.log(z.length, z[0], z[z.length - 1])
+    console.time('getFloatArray z')
+    const z = atoms.z.toArray({ array: Float32Array })
+    console.timeEnd('getFloatArray z')
+    console.log(z.length, z[0], z[z.length - 1])
 
-        console.time('getIntArray residueNumber')
-        const residueNumber = atoms.residueNumber.toArray({ array: Int32Array })
-        console.timeEnd('getIntArray residueNumber')
-        console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1])
+    console.time('getIntArray residueNumber')
+    const residueNumber = atoms.residueNumber.toArray({ array: Int32Array })
+    console.timeEnd('getIntArray residueNumber')
+    console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1])
+}
+
+async function _gro() {
+    fs.readFile(`./examples/${file}`, 'utf8', function (err, input) {
+        if (err) {
+            return console.log(err);
+        }
+        runGro(input)
     });
 }
 
+_gro()
+
 async function runCIF(input: string | Uint8Array) {
     console.time('parseCIF');
     const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
-    
+
     const running = comp.runWithContext(new Computation.ObservableContext({ updateRateMs: 250 }));
-    running.subscribe(p => console.log(`${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`));
+    running.subscribe(p => console.log(`[CIF] ${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`));
     const parsed = await running.result;
     console.timeEnd('parseCIF');
     if (parsed.isError) {
@@ -98,7 +107,7 @@ async function runCIF(input: string | Uint8Array) {
 
 export function _cif() {
     let path = `./examples/1cbs_updated.cif`;
-    path = 'c:/test/quick/3j3q.cif';
+    path = '../test/3j3q.cif';
     fs.readFile(path, 'utf8', function (err, input) {
         if (err) {
             return console.log(err);
-- 
GitLab