parser.ts

/**
 * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
 * @author Alexander Rose <alexander.rose@weirdbyte.de>
 * @author David Sehnal <david.sehnal@gmail.com>
 */

import { Column } from 'mol-data/db'
import { Tokenizer } from '../common/text/tokenizer'
import FixedColumn from '../common/text/column/fixed'
import * as Schema from './schema'
import { ReaderResult as Result } from '../result'
import { Task, RuntimeContext } from 'mol-task'

interface State {
    tokenizer: Tokenizer,
    header: Schema.GroHeader,
    numberOfAtoms: number,
    runtimeCtx: RuntimeContext
}

function createEmptyHeader(): Schema.GroHeader {
    return {
        title: '',
        timeInPs: 0,
        hasVelocities: false,
        precision: { position: 0, velocity: 0 },
        box: [0, 0, 0]
    };
}

function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext): State {
    return {
        tokenizer,
        header: createEmptyHeader(),
        numberOfAtoms: 0,
        runtimeCtx
    };
}

/**
 * title string (free format string, optional time in ps after 't=')
 */
function handleTitleString(state: State) {
    const { tokenizer, header } = state;
    let line = Tokenizer.readLine(tokenizer);

    // skip potential empty lines...
    if (line.trim().length === 0) {
        line = Tokenizer.readLine(tokenizer);
    }

    const timeOffset = line.lastIndexOf('t=');
    if (timeOffset >= 0) {
        header.timeInPs = parseFloat(line.substring(timeOffset + 2));
        header.title = line.substring(0, timeOffset).trim();
        if (header.title && header.title[header.title.length - 1] === ',') {
            header.title = header.title.substring(0, header.title.length - 1);
        }
    } else {
        header.title = line;
    }
}

/**
 * number of atoms (free format integer)
 */
function handleNumberOfAtoms(state: State) {
    const { tokenizer } = state;
    Tokenizer.markLine(tokenizer);
    const line = Tokenizer.getTokenString(tokenizer);
    state.numberOfAtoms = parseInt(line);
}

/**
 * This format is fixed, ie. all columns are in a fixed position.
 * Optionally (for now only yet with trjconv) you can write gro files
 * with any number of decimal places, the format will then be n+5
 * positions with n decimal places (n+1 for velocities) in stead
 * of 8 with 3 (with 4 for velocities). Upon reading, the precision
 * will be inferred from the distance between the decimal points
 * (which will be n+5). Columns contain the following information
 * (from left to right):
 *     residue number (5 positions, integer)
 *     residue name (5 characters)
 *     atom name (5 characters)
 *     atom number (5 positions, integer)
 *     position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
 *     velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
 */
async function handleAtoms(state: State): Promise<Schema.GroAtoms> {
    const { tokenizer, numberOfAtoms } = state;
    const lines = await Tokenizer.readLinesAsync(tokenizer, numberOfAtoms, state.runtimeCtx, 100000);

    const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
    const precisions = positionSample.match(/\.\d+/g)!;
    const hasVelocities = precisions.length === 6;

    state.header.hasVelocities = hasVelocities;
    state.header.precision.position = precisions[0].length - 1;
    state.header.precision.velocity = hasVelocities ? precisions[3].length - 1 : 0;

    const pO = 20;
    const pW = state.header.precision.position + 5;
    const vO = pO + 3 * pW;
    const vW = state.header.precision.velocity + 4;

    const col = FixedColumn(lines);
    const undef = Column.Undefined(state.numberOfAtoms, Column.Schema.float);

    const ret = {
        count: state.numberOfAtoms,
        residueNumber: col(0, 5, Column.Schema.int),
        residueName: col(5, 5, Column.Schema.str),
        atomName: col(10, 5, Column.Schema.str),
        atomNumber: col(15, 5, Column.Schema.int),
        x: col(pO, pW, Column.Schema.float),
        y: col(pO + pW, pW, Column.Schema.float),
        z: col(pO + 2 * pW, pW, Column.Schema.float),
        vx: hasVelocities ? col(vO, vW, Column.Schema.float) : undef,
        vy: hasVelocities ? col(vO + vW, vW, Column.Schema.float) : undef,
        vz: hasVelocities ? col(vO + 2 * vW, vW, Column.Schema.float) : undef,
    };

    return ret;
}

/**
 * box vectors (free format, space separated reals), values:
 * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
 * the last 6 values may be omitted (they will be set to zero).
 * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
 */
function handleBoxVectors(state: State) {
    const { tokenizer } = state;
    const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
    state.header.box = [+values[0], +values[1], +values[2]];
}

async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<Schema.GroFile>> {
    const tokenizer = Tokenizer(data);

    await ctx.update({ message: 'Parsing...', current: 0, max: data.length });
    const structures: Schema.GroStructure[] = [];
    while (tokenizer.position < data.length) {
        const state = State(tokenizer, ctx);
        handleTitleString(state);
        handleNumberOfAtoms(state);
        const atoms = await handleAtoms(state);
        handleBoxVectors(state);
        structures.push({ header: state.header, atoms });
    }

    const result: Schema.GroFile = { structures };
    return Result.success(result);
}

export function parseGRO(data: string) {
    return Task.create<Result<Schema.GroFile>>('Parse GRO', async ctx => {
        return await parseInternal(data, ctx);
    });
}