-
Alexander Rose authoredAlexander Rose authored
parser.ts 5.60 KiB
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { Column } from 'mol-data/db'
import { Tokenizer } from '../common/text/tokenizer'
import FixedColumn from '../common/text/column/fixed'
import * as Schema from './schema'
import { ReaderResult as Result } from '../result'
import { Task, RuntimeContext } from 'mol-task'
interface State {
tokenizer: Tokenizer,
header: Schema.GroHeader,
numberOfAtoms: number,
runtimeCtx: RuntimeContext
}
function createEmptyHeader(): Schema.GroHeader {
return {
title: '',
timeInPs: 0,
hasVelocities: false,
precision: { position: 0, velocity: 0 },
box: [0, 0, 0]
};
}
function State(tokenizer: Tokenizer, runtimeCtx: RuntimeContext): State {
return {
tokenizer,
header: createEmptyHeader(),
numberOfAtoms: 0,
runtimeCtx
};
}
/**
* title string (free format string, optional time in ps after 't=')
*/
function handleTitleString(state: State) {
const { tokenizer, header } = state;
let line = Tokenizer.readLine(tokenizer);
// skip potential empty lines...
if (line.trim().length === 0) {
line = Tokenizer.readLine(tokenizer);
}
const timeOffset = line.lastIndexOf('t=');
if (timeOffset >= 0) {
header.timeInPs = parseFloat(line.substring(timeOffset + 2));
header.title = line.substring(0, timeOffset).trim();
if (header.title && header.title[header.title.length - 1] === ',') {
header.title = header.title.substring(0, header.title.length - 1);
}
} else {
header.title = line;
}
}
/**
* number of atoms (free format integer)
*/
function handleNumberOfAtoms(state: State) {
const { tokenizer } = state;
Tokenizer.markLine(tokenizer);
const line = Tokenizer.getTokenString(tokenizer);
state.numberOfAtoms = parseInt(line);
}
/**
* This format is fixed, ie. all columns are in a fixed position.
* Optionally (for now only yet with trjconv) you can write gro files
* with any number of decimal places, the format will then be n+5
* positions with n decimal places (n+1 for velocities) in stead
* of 8 with 3 (with 4 for velocities). Upon reading, the precision
* will be inferred from the distance between the decimal points
* (which will be n+5). Columns contain the following information
* (from left to right):
* residue number (5 positions, integer)
* residue name (5 characters)
* atom name (5 characters)
* atom number (5 positions, integer)
* position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places)
* velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places)
*/
async function handleAtoms(state: State): Promise<Schema.GroAtoms> {
const { tokenizer, numberOfAtoms } = state;
const lines = await Tokenizer.readLinesAsync(tokenizer, numberOfAtoms, state.runtimeCtx, 100000);
const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
const precisions = positionSample.match(/\.\d+/g)!;
const hasVelocities = precisions.length === 6;
state.header.hasVelocities = hasVelocities;
state.header.precision.position = precisions[0].length - 1;
state.header.precision.velocity = hasVelocities ? precisions[3].length - 1 : 0;
const pO = 20;
const pW = state.header.precision.position + 5;
const vO = pO + 3 * pW;
const vW = state.header.precision.velocity + 4;
const col = FixedColumn(lines);
const undef = Column.Undefined(state.numberOfAtoms, Column.Schema.float);
const ret = {
count: state.numberOfAtoms,
residueNumber: col(0, 5, Column.Schema.int),
residueName: col(5, 5, Column.Schema.str),
atomName: col(10, 5, Column.Schema.str),
atomNumber: col(15, 5, Column.Schema.int),
x: col(pO, pW, Column.Schema.float),
y: col(pO + pW, pW, Column.Schema.float),
z: col(pO + 2 * pW, pW, Column.Schema.float),
vx: hasVelocities ? col(vO, vW, Column.Schema.float) : undef,
vy: hasVelocities ? col(vO + vW, vW, Column.Schema.float) : undef,
vz: hasVelocities ? col(vO + 2 * vW, vW, Column.Schema.float) : undef,
};
return ret;
}
/**
* box vectors (free format, space separated reals), values:
* v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y),
* the last 6 values may be omitted (they will be set to zero).
* Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0.
*/
function handleBoxVectors(state: State) {
const { tokenizer } = state;
const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
state.header.box = [+values[0], +values[1], +values[2]];
}
async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result<Schema.GroFile>> {
const tokenizer = Tokenizer(data);
await ctx.update({ message: 'Parsing...', current: 0, max: data.length });
const structures: Schema.GroStructure[] = [];
while (tokenizer.position < data.length) {
const state = State(tokenizer, ctx);
handleTitleString(state);
handleNumberOfAtoms(state);
const atoms = await handleAtoms(state);
handleBoxVectors(state);
structures.push({ header: state.header, atoms });
}
const result: Schema.GroFile = { structures };
return Result.success(result);
}
export function parseGRO(data: string) {
return Task.create<Result<Schema.GroFile>>('Parse GRO', async ctx => {
return await parseInternal(data, ctx);
});
}