From 87dbbe3323bcb920f1089a7fcb1fba09346d3022 Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Fri, 22 Sep 2017 16:05:44 +0200 Subject: [PATCH] Updated to new data model --- src/data/data.ts | 3 +- src/data/schema.ts | 16 +- src/data/spec/schema.spec.ts | 8 +- src/index.d.ts | 7 +- src/index.ts | 5 +- src/reader/common/data.ts | 0 src/reader/common/text/data.ts | 115 ++++++++ .../common/text}/number-parser.ts | 0 src/reader/common/text/tokenizer.ts | 173 ++++++++++++ src/reader/gro.ts | 266 ------------------ .../file.ts => reader/gro/index.ts} | 10 +- src/reader/gro/parser.ts | 163 +++++++++++ src/reader/gro/schema.ts | 40 +++ src/{parser.ts => reader/result.ts} | 20 +- src/reader/spec/gro.spec.ts | 71 +++-- src/relational/block.ts | 24 -- src/relational/category.ts | 61 ---- src/relational/column.ts | 38 --- src/relational/constants.ts | 12 - src/relational/text-block.ts | 39 --- src/relational/text-category.ts | 99 ------- src/relational/text-column.ts | 120 -------- src/relational/text-file.ts | 17 -- src/script.ts | 76 ++--- src/utils/chunked-array.ts | 28 -- src/utils/helper.ts | 88 ------ src/utils/short-string-pool.ts | 6 +- src/utils/tokenizer-state.ts | 20 -- src/utils/tokens.ts | 47 ---- 29 files changed, 603 insertions(+), 969 deletions(-) delete mode 100644 src/reader/common/data.ts create mode 100644 src/reader/common/text/data.ts rename src/{utils => reader/common/text}/number-parser.ts (100%) create mode 100644 src/reader/common/text/tokenizer.ts delete mode 100644 src/reader/gro.ts rename src/{relational/file.ts => reader/gro/index.ts} (52%) rename src/{parser.ts => reader/result.ts} (65%) delete mode 100644 src/relational/block.ts delete mode 100644 src/relational/category.ts delete mode 100644 src/relational/column.ts delete mode 100644 src/relational/constants.ts delete mode 100644 src/relational/text-block.ts delete mode 100644 src/relational/text-category.ts delete mode 100644 src/relational/text-column.ts delete mode 100644 src/relational/text-file.ts delete mode 100644 src/utils/helper.ts delete mode 100644 src/utils/tokenizer-state.ts delete mode 100644 src/utils/tokens.ts diff --git a/src/data/data.ts b/src/data/data.ts index 7277567e6..849a0954e 100644 --- a/src/data/data.ts +++ b/src/data/data.ts @@ -65,5 +65,6 @@ export interface Field { stringEquals(row: number, value: string | null): boolean, toStringArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<string>, - toNumberArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number> + toIntArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>, + toFloatArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number> } \ No newline at end of file diff --git a/src/data/schema.ts b/src/data/schema.ts index e0fb2203b..c9f1a1a86 100644 --- a/src/data/schema.ts +++ b/src/data/schema.ts @@ -40,6 +40,7 @@ export namespace Block { export type Category<Fields> = Fields & { readonly _rowCount: number, + readonly _isDefined: boolean, /** For accessing 'non-standard' fields */ _getField(name: string): Data.Field | undefined } @@ -66,14 +67,16 @@ export namespace Field { export function str(spec?: Spec) { return createSchema(spec, Str); } export function int(spec?: Spec) { return createSchema(spec, Int); } export function float(spec?: Spec) { return createSchema(spec, Float); } + export function value<T>(spec?: Spec): Schema<T> { return createSchema(spec, Value); } function create<T>(field: Data.Field, value: (row: number) => T, toArray: Field<T>['toArray']): Field<T> { return { isDefined: field.isDefined, value, presence: field.presence, areValuesEqual: field.areValuesEqual, stringEquals: field.stringEquals, toArray }; } function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); } - function Int(field: Data.Field) { return create(field, field.int, field.toNumberArray); } - function Float(field: Data.Field) { return create(field, field.float, field.toNumberArray); } + function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); } + function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); } + function Value(field: Data.Field) { return create(field, field.value, () => { throw Error('not supported'); }); } const DefaultUndefined: Data.Field = { isDefined: false, @@ -92,7 +95,8 @@ export namespace Field { for (let i = 0; i < count; i++) { ret[i] = null; } return ret; }, - toNumberArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any + toIntArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any, + toFloatArray: (startRow, endRowExclusive, ctor) => new Float32Array(endRowExclusive - startRow) as any }; function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Field<T>): Schema<T> { @@ -113,7 +117,7 @@ class _Block implements Block<any> { // tslint:disable-line:class-name class _Category implements Category<any> { // tslint:disable-line:class-name _rowCount = this._category.rowCount; _getField(name: string) { return this._category.getField(name); } - constructor(private _category: Data.Category, schema: Category.Schema) { + constructor(private _category: Data.Category, schema: Category.Schema, public _isDefined: boolean) { const fieldKeys = Object.keys(schema).filter(k => k !== '@alias'); const cache = Object.create(null); for (const k of fieldKeys) { @@ -137,6 +141,6 @@ function createBlock(schema: Block.Schema, block: Data.Block): any { } function createCategory(key: string, schema: Category.Schema, block: Data.Block) { - const cat = block.categories[schema['@alias'] || key] || Data.Category.Empty; - return new _Category(cat, schema); + const cat = block.categories[schema['@alias'] || key]; + return new _Category(cat || Data.Category.Empty, schema, !!cat); } \ No newline at end of file diff --git a/src/data/spec/schema.spec.ts b/src/data/spec/schema.spec.ts index bc8788b8d..131468eb1 100644 --- a/src/data/spec/schema.spec.ts +++ b/src/data/spec/schema.spec.ts @@ -25,7 +25,13 @@ function Field(values: any[]): Data.Field { for (let i = 0; i < count; i++) { ret[i] = values[startRow + i]; } return ret; }, - toNumberArray: (startRow, endRowExclusive, ctor) => { + toIntArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; } + return ret; + }, + toFloatArray: (startRow, endRowExclusive, ctor) => { const count = endRowExclusive - startRow; const ret = ctor(count) as any; for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; } diff --git a/src/index.d.ts b/src/index.d.ts index 8e857ee33..8b4489030 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -4,9 +4,4 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -export { ParserResult, ParserError, ParserSuccess } from './parser' - -export { Category } from './relational/category' -export { Column } from './relational/column' - -export { parse as groReader } from './reader/gro' +// TODO: fix me \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 3a7b1225e..e16ea3dd0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,4 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -export { Category } from './relational/category' -export { Column } from './relational/column' - -export { parse as groReader } from './reader/gro' +// TODO: fix me diff --git a/src/reader/common/data.ts b/src/reader/common/data.ts deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/reader/common/text/data.ts b/src/reader/common/text/data.ts new file mode 100644 index 000000000..595496de3 --- /dev/null +++ b/src/reader/common/text/data.ts @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Data from '../../../data/data' +import { parseInt as fastParseInt, parseFloat as fastParseFloat } from './number-parser' +import { Tokens } from './tokenizer' +import ShortStringPool from '../../../utils/short-string-pool' + +export function createCategory(data: string, fields: string[], tokens: Tokens, rowCount: number) { + const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices }; + + const categoryFields = Object.create(null); + for (let i = 0; i < fi.fieldCount; ++i) { + categoryFields[fields[i]] = TokenField(fi, i); + } + return Data.Category(rowCount, categoryFields); +} + +export interface TokenFieldInfo { + data: string, + tokens: ArrayLike<number>, + fieldCount: number, + isCif?: boolean +} + +export function TokenField(info: TokenFieldInfo, index: number): Data.Field { + const { data, tokens, fieldCount, isCif = false } = info; + const stringPool = ShortStringPool.create(); + + const str: Data.Field['str'] = isCif ? row => { + const i = (row * fieldCount + index) * 2; + const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); + if (ret === '.' || ret === '?') return null; + return ret; + } : row => { + const i = (row * fieldCount + index) * 2; + return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); + }; + + const int: Data.Field['int'] = row => { + const i = (row * fieldCount + index) * 2; + return fastParseInt(data, tokens[i], tokens[i + 1]) || 0; + }; + + const float: Data.Field['float'] = row => { + const i = (row * fieldCount + index) * 2; + return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0; + }; + + const presence: Data.Field['presence'] = isCif ? row => { + const i = 2 * (row * fieldCount + index); + const s = tokens[i]; + if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present; + const v = data.charCodeAt(s); + if (v === 46 /* . */) return Data.ValuePresence.NotSpecified; + if (v === 63 /* ? */) return Data.ValuePresence.Unknown; + return Data.ValuePresence.Present; + } : row => { + const i = 2 * (row * fieldCount + index); + return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present + }; + + return { + isDefined: true, + str, + int, + float, + value: str, + presence, + areValuesEqual: (rowA, rowB) => { + const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI]; + const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI]; + const len = tokens[aI + 1] - aS; + if (len !== tokens[bI + 1] - bS) return false; + for (let i = 0; i < len; i++) { + if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) { + return false; + } + } + return true; + }, + stringEquals: (row, value) => { + const aI = (row * fieldCount + index) * 2; + const s = tokens[aI]; + if (!value) return presence(row) !== Data.ValuePresence.Present; + const len = value.length; + if (len !== tokens[aI + 1] - s) return false; + for (let i = 0; i < len; i++) { + if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false; + } + return true; + }, + toStringArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); } + return ret; + }, + toIntArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); } + return ret; + }, + toFloatArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); } + return ret; + } + } +} \ No newline at end of file diff --git a/src/utils/number-parser.ts b/src/reader/common/text/number-parser.ts similarity index 100% rename from src/utils/number-parser.ts rename to src/reader/common/text/number-parser.ts diff --git a/src/reader/common/text/tokenizer.ts b/src/reader/common/text/tokenizer.ts new file mode 100644 index 000000000..a4bbf121f --- /dev/null +++ b/src/reader/common/text/tokenizer.ts @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * mostly from https://github.com/dsehnal/CIFTools.js + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +export interface State<Info = any, TokenType = any> { + data: string + + position: number + length: number + + currentLineNumber: number + currentTokenStart: number + currentTokenEnd: number + + currentTokenType: TokenType, + + info: Info +} + +export function State<Info, TokenType>(data: string, info?: Info, initialTokenType?: TokenType): State<Info, TokenType> { + return { + data, + position: 0, + length: data.length, + currentLineNumber: 1, + currentTokenStart: 0, + currentTokenEnd: 0, + currentTokenType: initialTokenType!, + info: info! + }; +} + +/** + * Eat everything until a newline occurs. + */ +export function eatLine(state: State) { + while (state.position < state.length) { + switch (state.data.charCodeAt(state.position)) { + case 10: // \n + state.currentTokenEnd = state.position + ++state.position + ++state.currentLineNumber + return + case 13: // \r + state.currentTokenEnd = state.position + ++state.position + ++state.currentLineNumber + if (state.data.charCodeAt(state.position) === 10) { + ++state.position + } + return + default: + ++state.position + } + } + state.currentTokenEnd = state.position; +} + +/** + * Eat everything until a whitespace/newline occurs. + */ +export function eatValue(state: State) { + while (state.position < state.length) { + switch (state.data.charCodeAt(state.position)) { + case 9: // \t + case 10: // \n + case 13: // \r + case 32: // ' ' + state.currentTokenEnd = state.position; + return; + default: + ++state.position; + break; + } + } + state.currentTokenEnd = state.position; +} + +/** + * Skips all the whitespace - space, tab, newline, CR + * Handles incrementing line count. + */ +export function skipWhitespace(state: State): number { + let prev = 10; + while (state.position < state.length) { + let c = state.data.charCodeAt(state.position); + switch (c) { + case 9: // '\t' + case 32: // ' ' + prev = c; + ++state.position; + break; + case 10: // \n + // handle \r\n + if (prev !== 13) { + ++state.currentLineNumber; + } + prev = c; + ++state.position; + break; + case 13: // \r + prev = c; + ++state.position; + ++state.currentLineNumber; + break; + default: + return prev; + } + } + return prev; +} + +/** Trims spaces and tabs */ +export function trim(state: State, start: number, end: number) { + const { data } = state; + let s = start, e = end - 1; + + let c = data.charCodeAt(s); + while (c === 9 || c === 32) c = data.charCodeAt(++s); + c = data.charCodeAt(e); + while (c === 9 || c === 32) c = data.charCodeAt(--e); + + state.currentTokenStart = s; + state.currentTokenEnd = e + 1; + state.position = end; +} + +export interface Tokens { + indicesLenMinus2: number, + count: number, + indices: Uint32Array +} + +export namespace Tokens { + function resize(tokens: Tokens) { + // scale the size using golden ratio, because why not. + const newBuffer = new Uint32Array((1.61 * tokens.indices.length) | 0); + newBuffer.set(tokens.indices); + tokens.indices = newBuffer; + tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0; + } + + export function add(tokens: Tokens, start: number, end: number) { + if (tokens.count > tokens.indicesLenMinus2) { + resize(tokens); + } + tokens.indices[tokens.count++] = start; + tokens.indices[tokens.count++] = end; + } + + export function addUnchecked(tokens: Tokens, start: number, end: number) { + tokens.indices[tokens.count++] = start; + tokens.indices[tokens.count++] = end; + } + + export function create(size: number): Tokens { + return { + indicesLenMinus2: (size - 2) | 0, + count: 0, + indices: new Uint32Array(size) + } + } +} + + +/** + * A helper for building a typed array of token indices. + */ +export default Tokens \ No newline at end of file diff --git a/src/reader/gro.ts b/src/reader/gro.ts deleted file mode 100644 index d148f3ebc..000000000 --- a/src/reader/gro.ts +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { parseInt } from '../utils/number-parser' -import { eatLine, eatValue, skipWhitespace } from '../utils/helper' -import { Tokens } from '../utils/tokens' -import { TokenizerState } from '../utils/tokenizer-state' - -import { TextFile } from '../relational/text-file' -import { TextBlock } from '../relational/text-block' -import { TextCategory } from '../relational/text-category' - -import { ParserResult } from '../parser' - -/** - * http://manual.gromacs.org/current/online/gro.html - */ - -export const GroCategories = { - 'header': '', - 'atoms': '' -} - -// type GroCategories = keyof typeof GroCategories - -export const GroAtomBasicColumns = { - 'residueNumber': '', - 'residueName': '', - 'atomName': '', - 'atomNumber': '', - 'x': '', - 'y': '', - 'z': '' -} -export type GroAtomBasicColumns = keyof typeof GroAtomBasicColumns - -export const GroAtomVelocityColumns = Object.assign({ - 'vx': '', - 'vy': '', - 'vz': '' -}, GroAtomBasicColumns) -export type GroAtomVelocityColumns = keyof typeof GroAtomVelocityColumns - -export const GroHeaderColumns = { - 'title': '', - 'timeInPs': '', - 'numberOfAtoms': '', - 'boxX': '', - 'boxY': '', - 'boxZ': '' -} -export type GroHeaderColumns = keyof typeof GroHeaderColumns - -export interface GroState extends TokenizerState { - numberOfAtoms: number - hasVelocities: boolean - numberOfDecimalPlaces: number -} - -export function createTokenizer(data: string): GroState { - return { - data, - - position: 0, - length: data.length, - - currentLineNumber: 1, - currentTokenStart: 0, - currentTokenEnd: 0, - - numberOfAtoms: 0, - hasVelocities: false, - numberOfDecimalPlaces: 3 - }; -} - -/** - * title string (free format string, optional time in ps after 't=') - */ -function handleTitleString (state: GroState, tokens: Tokens) { - eatLine(state) - // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd)) - let start = state.currentTokenStart - let end = state.currentTokenEnd - let valueStart = state.currentTokenStart - let valueEnd = start - - while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd; - - if (isTime(state.data, valueEnd)) { - let timeStart = valueEnd + 2 - - while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd; - Tokens.add(tokens, valueStart, valueEnd) // title - - while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart; - while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd; - Tokens.add(tokens, timeStart, end) // time - } else { - Tokens.add(tokens, valueStart, valueEnd) // title - Tokens.add(tokens, valueEnd, valueEnd) // empty token for time - } -} - -function isSpaceOrComma(data: string, position: number): boolean { - const c = data.charCodeAt(position); - return c === 32 || c === 44 -} - -function isTime(data: string, position: number): boolean { - // T/t - const c = data.charCodeAt(position); - if (c !== 84 && c !== 116) return false; - // = - if (data.charCodeAt(position + 1) !== 61) return false; - - return true; -} - -// function isDot(state: TokenizerState): boolean { -// // . -// if (state.data.charCodeAt(state.currentTokenStart) !== 46) return false; - -// return true; -// } - -// function numberOfDecimalPlaces (state: TokenizerState) { -// // var ndec = firstLines[ 2 ].length - firstLines[ 2 ].lastIndexOf('.') - 1 -// const start = state.currentTokenStart -// const end = state.currentTokenEnd -// for (let i = end; start < i; --i) { -// // . -// if (state.data.charCodeAt(i) === 46) return end - start - i -// } -// throw new Error('Could not determine number of decimal places') -// } - -/** - * number of atoms (free format integer) - */ -function handleNumberOfAtoms (state: GroState, tokens: Tokens) { - skipWhitespace(state) - state.currentTokenStart = state.position - eatValue(state) - state.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd) - Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd) - eatLine(state) -} - -// function checkForVelocities (state: GroState) { - -// } - -/** - * This format is fixed, ie. all columns are in a fixed position. - * Optionally (for now only yet with trjconv) you can write gro files - * with any number of decimal places, the format will then be n+5 - * positions with n decimal places (n+1 for velocities) in stead - * of 8 with 3 (with 4 for velocities). Upon reading, the precision - * will be inferred from the distance between the decimal points - * (which will be n+5). Columns contain the following information - * (from left to right): - * residue number (5 positions, integer) - * residue name (5 characters) - * atom name (5 characters) - * atom number (5 positions, integer) - * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places) - * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places) - */ -function handleAtoms (state: GroState, block: TextBlock) { - console.log('MOINMOIN') - const name = 'atoms' - - const columns = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ] - if (state.hasVelocities) { - columns.push('vx', 'vy', 'vz') - } - const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ] - - const columnCount = columns.length - const tokens = Tokens.create(state.numberOfAtoms * 2 * columnCount) - - let start: number - let end: number - let valueStart: number - let valueEnd: number = state.position - - for (let i = 0; i < state.numberOfAtoms; ++i) { - state.currentTokenStart = state.position - end = state.currentTokenStart - for (let j = 0; j < columnCount; ++j) { - start = end - end = start + fieldSizes[j] - - // trim - valueStart = start - valueEnd = end - while (valueStart < valueEnd && state.data.charCodeAt(valueStart) === 32) ++valueStart; - while (valueEnd > valueStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd; - - Tokens.addUnchecked(tokens, valueStart, valueEnd) - } - state.position = valueEnd - eatLine(state) - } - - block.addCategory(new TextCategory(state.data, name, columns, tokens)); -} - -/** - * box vectors (free format, space separated reals), values: - * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y), - * the last 6 values may be omitted (they will be set to zero). - * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0. - */ -function handleBoxVectors (state: GroState, tokens: Tokens) { - // just read the first three values, ignore any remaining - for (let i = 0; i < 3; ++i) { - skipWhitespace(state) - state.currentTokenStart = state.position - eatValue(state) - Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd) - } -} - -/** - * Creates an error result. - */ -// function error(line: number, message: string) { -// return ParserResult.error<TextFile>(message, line); -// } - -/** - * Creates a data result. - */ -function result(data: TextFile) { - return ParserResult.success(data); -} - -function parseInternal(data: string): ParserResult<TextFile> { - const state = createTokenizer(data) - const file = new TextFile(data) - file.blocks - - let block = new TextBlock(data) - file.blocks.push(block) - - const headerColumns = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ'] - const headerTokens = Tokens.create(2 * headerColumns.length) - let header = new TextCategory(state.data, 'header', headerColumns, headerTokens) - block.addCategory(header) - - handleTitleString(state, headerTokens) - handleNumberOfAtoms(state, headerTokens) - handleAtoms(state, block) - handleBoxVectors(state, headerTokens) - - return result(file); -} - -export function parse(data: string) { - return parseInternal(data); -} diff --git a/src/relational/file.ts b/src/reader/gro/index.ts similarity index 52% rename from src/relational/file.ts rename to src/reader/gro/index.ts index 80bf53205..ce24a7978 100644 --- a/src/relational/file.ts +++ b/src/reader/gro/index.ts @@ -2,9 +2,13 @@ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> */ -export interface File<T> { - blocks: T[]; -} +import schema from './schema' +import parse from './parser' +export default { + parse, + schema +}; \ No newline at end of file diff --git a/src/reader/gro/parser.ts b/src/reader/gro/parser.ts index e69de29bb..1e56166e8 100644 --- a/src/reader/gro/parser.ts +++ b/src/reader/gro/parser.ts @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { State as TokenizerState, Tokens, eatLine, skipWhitespace, eatValue, trim } from '../common/text/tokenizer' +import { parseInt } from '../common/text/number-parser' +import { createCategory } from '../common/text/data' +import * as Data from '../../data/data' +import Result from '../result' + +interface StateInfo { + numberOfAtoms: number + hasVelocities: boolean + numberOfDecimalPlaces: number +} + +type State = TokenizerState<StateInfo> + +function createState(data: string): State { + return TokenizerState(data, { numberOfAtoms: 0, hasVelocities: false, numberOfDecimalPlaces: 3 }); +} + +/** + * title string (free format string, optional time in ps after 't=') + */ +function handleTitleString(state: State, tokens: Tokens) { + eatLine(state) + // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd)) + let start = state.currentTokenStart + let end = state.currentTokenEnd + let valueStart = state.currentTokenStart + let valueEnd = start + + while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd; + + if (isTime(state.data, valueEnd)) { + let timeStart = valueEnd + 2 + + while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd; + Tokens.add(tokens, valueStart, valueEnd) // title + + while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart; + while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd; + Tokens.add(tokens, timeStart, end) // time + } else { + Tokens.add(tokens, valueStart, valueEnd) // title + Tokens.add(tokens, valueEnd, valueEnd) // empty token for time + } +} + +function isSpaceOrComma(data: string, position: number): boolean { + const c = data.charCodeAt(position); + return c === 32 || c === 44 +} + +function isTime(data: string, position: number): boolean { + // T/t + const c = data.charCodeAt(position); + if (c !== 84 && c !== 116) return false; + // = + if (data.charCodeAt(position + 1) !== 61) return false; + + return true; +} + +/** + * number of atoms (free format integer) + */ +function handleNumberOfAtoms(state: State, tokens: Tokens) { + skipWhitespace(state) + state.currentTokenStart = state.position + eatValue(state) + state.info.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd) + Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd) + eatLine(state) +} + +/** + * This format is fixed, ie. all columns are in a fixed position. + * Optionally (for now only yet with trjconv) you can write gro files + * with any number of decimal places, the format will then be n+5 + * positions with n decimal places (n+1 for velocities) in stead + * of 8 with 3 (with 4 for velocities). Upon reading, the precision + * will be inferred from the distance between the decimal points + * (which will be n+5). Columns contain the following information + * (from left to right): + * residue number (5 positions, integer) + * residue name (5 characters) + * atom name (5 characters) + * atom number (5 positions, integer) + * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places) + * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places) + */ +function handleAtoms(state: State) { + const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ]; + const fields = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ] + if (state.info.hasVelocities) { + fields.push('vx', 'vy', 'vz') + } + + const fieldCount = fields.length + const tokens = Tokens.create(state.info.numberOfAtoms * 2 * fieldCount) + + let start: number; + let end: number; + + for (let i = 0, _i = state.info.numberOfAtoms; i < _i; ++i) { + state.currentTokenStart = state.position; + end = state.currentTokenStart; + for (let j = 0; j < fieldCount; ++j) { + start = end; + end = start + fieldSizes[j]; + + trim(state, start, end); + Tokens.addUnchecked(tokens, state.currentTokenStart, state.currentTokenEnd); + } + eatLine(state) + } + + return createCategory(state.data, fields, tokens, state.info.numberOfAtoms); +} + +/** + * box vectors (free format, space separated reals), values: + * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y), + * the last 6 values may be omitted (they will be set to zero). + * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0. + */ +function handleBoxVectors(state: State, tokens: Tokens) { + // just read the first three values, ignore any remaining + for (let i = 0; i < 3; ++i) { + skipWhitespace(state); + state.currentTokenStart = state.position; + eatValue(state); + Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd); + } +} + +function parseInternal(data: string): Result<Data.File> { + const state = createState(data); + + const headerFields = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']; + const headerTokens = Tokens.create(2 * headerFields.length); + + handleTitleString(state, headerTokens); + handleNumberOfAtoms(state, headerTokens); + const atoms = handleAtoms(state); + handleBoxVectors(state, headerTokens); + + const block = Data.Block({ + header: createCategory(data, headerFields, headerTokens, 1), + atoms + }); + + return Result.success(Data.File([block])); +} + +export default function parse(data: string) { + return parseInternal(data); +} \ No newline at end of file diff --git a/src/reader/gro/schema.ts b/src/reader/gro/schema.ts index e69de29bb..d8e9c576a 100644 --- a/src/reader/gro/schema.ts +++ b/src/reader/gro/schema.ts @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Schema from '../../data/schema' +import * as Data from '../../data/data' + +const str = Schema.Field.str() +const int = Schema.Field.int() +const float = Schema.Field.float() + +const header = { + 'title': str, + 'timeInPs': float, + 'numberOfAtoms': int, + 'boxX': float, + 'boxY': float, + 'boxZ': float +} + +const atoms = { + 'residueNumber': int, + 'residueName': str, + 'atomName': str, + 'atomNumber': int, + 'x': float, + 'y': float, + 'z': float, + 'vx': float, + 'vy': float, + 'vz': float +} + +const schema = { header, atoms }; +export default function (block: Data.Block) { + return Schema.apply(schema, block); +} \ No newline at end of file diff --git a/src/parser.ts b/src/reader/result.ts similarity index 65% rename from src/parser.ts rename to src/reader/result.ts index 72ab0e42a..a6b29f020 100644 --- a/src/parser.ts +++ b/src/reader/result.ts @@ -5,19 +5,19 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -export type ParserResult<T> = ParserSuccess<T> | ParserError +type ReaderResult<T> = Success<T> | Error -export namespace ParserResult { - export function error<T>(message: string, line = -1): ParserResult<T> { - return new ParserError(message, line); +namespace ReaderResult { + export function error<T>(message: string, line = -1): ReaderResult<T> { + return new Error(message, line); } - export function success<T>(result: T, warnings: string[] = []): ParserResult<T> { - return new ParserSuccess<T>(result, warnings); + export function success<T>(result: T, warnings: string[] = []): ReaderResult<T> { + return new Success<T>(result, warnings); } } -export class ParserError { +export class Error { isError: true = true; toString() { @@ -33,8 +33,10 @@ export class ParserError { } } -export class ParserSuccess<T> { +export class Success<T> { isError: false = false; constructor(public result: T, public warnings: string[]) { } -} \ No newline at end of file +} + +export default ReaderResult \ No newline at end of file diff --git a/src/reader/spec/gro.spec.ts b/src/reader/spec/gro.spec.ts index 0071a447c..f140b1a4f 100644 --- a/src/reader/spec/gro.spec.ts +++ b/src/reader/spec/gro.spec.ts @@ -2,10 +2,10 @@ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> */ -import { parse } from '../gro' -// import { Category } from '../../relational/category' +import Gro from '../gro/index' const groString = `MD of 2 waters, t= 4.2 6 @@ -26,52 +26,71 @@ const groStringHighPrecision = `Generated by trjconv : 2168 system t= 15.00000 describe('gro reader', () => { it('basic', () => { - const parsed = parse(groString) + const parsed = Gro.parse(groString) if (parsed.isError) { console.log(parsed) } else { - const groFile = parsed.result + const groFile = parsed.result; + const data = Gro.schema(groFile.blocks[0]); - const header = groFile.blocks[0].getCategory('header') - if (header) { - expect(header.columnNames).toEqual(['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']) + const { header, atoms } = data; + if (header._isDefined) { + expect(header.title.value(0)).toBe('MD of 2 waters') + expect(header.timeInPs.value(0)).toBeCloseTo(4.2) + expect(header.numberOfAtoms.value(0)).toBe(6) - expect(header.getColumn('title').getString(0)).toBe('MD of 2 waters') - expect(header.getColumn('timeInPs').getFloat(0)).toBeCloseTo(4.2) - expect(header.getColumn('numberOfAtoms').getInteger(0)).toBe(6) - - expect(header.getColumn('boxX').getFloat(0)).toBeCloseTo(1.82060) - expect(header.getColumn('boxY').getFloat(0)).toBeCloseTo(1.82060) - expect(header.getColumn('boxZ').getFloat(0)).toBeCloseTo(1.82060) + expect(header.boxX.value(0)).toBeCloseTo(1.82060) + expect(header.boxY.value(0)).toBeCloseTo(1.82060) + expect(header.boxZ.value(0)).toBeCloseTo(1.82060) } else { console.error('no header') } + + if (atoms._rowCount === 6) { + expect(atoms.x.value(0)).toBeCloseTo(0.126); + expect(atoms.y.value(0)).toBeCloseTo(1.624); + expect(atoms.z.value(0)).toBeCloseTo(1.679); + + // TODO: check velocities when they are parsed. + } else { + console.error('no atoms'); + } } }) it('high precision', () => { - const parsed = parse(groStringHighPrecision) + const parsed = Gro.parse(groStringHighPrecision) if (parsed.isError) { console.log(parsed) } else { - const groFile = parsed.result + const groFile = parsed.result; + const data = Gro.schema(groFile.blocks[0]); - const header = groFile.blocks[0].getCategory('header') - if (header) { - expect(header.columnNames).toEqual(['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']) + const { header, atoms } = data; + if (header._isDefined) { + expect(header.title.value(0)).toBe('Generated by trjconv : 2168 system') + expect(header.timeInPs.value(0)).toBeCloseTo(15) + expect(header.numberOfAtoms.value(0)).toBe(3) - expect(header.getColumn('title').getString(0)).toBe('Generated by trjconv : 2168 system') - expect(header.getColumn('timeInPs').getFloat(0)).toBeCloseTo(15) - expect(header.getColumn('numberOfAtoms').getInteger(0)).toBe(3) - - expect(header.getColumn('boxX').getFloat(0)).toBeCloseTo(1.82060) - expect(header.getColumn('boxY').getFloat(0)).toBeCloseTo(1.82060) - expect(header.getColumn('boxZ').getFloat(0)).toBeCloseTo(1.82060) + expect(header.boxX.value(0)).toBeCloseTo(1.82060) + expect(header.boxY.value(0)).toBeCloseTo(1.82060) + expect(header.boxZ.value(0)).toBeCloseTo(1.82060) } else { console.error('no header') } + + if (atoms._rowCount === 3) { + // TODO: test when high-prec parser is available + // expect(atoms.x.value(1)).toBeCloseTo(0.015804, 0.00001); + // expect(atoms.y.value(1)).toBeCloseTo(2.716597, 0.00001); + // expect(atoms.z.value(1)).toBeCloseTo(1.460588, 0.00001); + + // TODO: check velocities when they are parsed. + } else { + console.error('no atoms'); + } } }) }); diff --git a/src/relational/block.ts b/src/relational/block.ts deleted file mode 100644 index d8c212bff..000000000 --- a/src/relational/block.ts +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { Category, UndefinedCategory } from './category' - -export abstract class Block<T> { - abstract getCategory(name: string): T|undefined - abstract addCategory(category: T): void - - getCategoriesFromSchema<T extends object> (schema: T) { - return BlockCategories(this, schema) - } -} - -export type BlockCategories<Categories extends string> = { readonly [name in Categories]: Category } -export function BlockCategories<T extends object>(block: Block<any> | undefined, categories: T): BlockCategories<keyof T> { - const ret = Object.create(null); - if (!block) for (const c of Object.keys(categories)) ret[c] = UndefinedCategory; - else for (const c of Object.keys(categories)) ret[c] = block.getCategory(c); - return ret; -} diff --git a/src/relational/category.ts b/src/relational/category.ts deleted file mode 100644 index 0d387b49d..000000000 --- a/src/relational/category.ts +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import { Column, UndefinedColumn } from './column' - -/** - * Represents a tabular category with multiple fields represented as columns. - * - * Example: - * _category.field1 - * _category.field2 - * ... - */ -export abstract class Category { - name: string; - rowCount: number; - columnCount: number; - columnNames: string[]; - - /** - * If a field with the given name is not present, returns UndefinedColumn. - * - * Columns are accessed by their field name only, i.e. - * _category.field is accessed by - * category.getColumn('field') - * - * Note that columns are created on demand and there is some computational - * cost when creating a new column. Therefore, if you need to reuse a column, - * it is a good idea to cache it. - */ - abstract getColumn(name: string): Column; - - getColumnsFromSchema<T extends object> (schema: T) { - return CategoryColumns(this, schema) - } -} - -/** - * Represents a category that is not present. - */ -class _UndefinedCategory extends Category { // tslint:disable-line:class-name - name: '' - rowCount = 0 - columnCount = 0 - columnNames = [] - getColumn(name: string) { return UndefinedColumn } -} -export const UndefinedCategory = new _UndefinedCategory() as Category; - - -export type CategoryColumns<Columns extends string> = { readonly [name in Columns]: Column } -export function CategoryColumns<T extends object>(category: Category | undefined, columns: T): CategoryColumns<keyof T> { - const ret = Object.create(null); - if (!category) for (const c of Object.keys(columns)) ret[c] = UndefinedColumn; - else for (const c of Object.keys(columns)) ret[c] = category.getColumn(c); - return ret; -} diff --git a/src/relational/column.ts b/src/relational/column.ts deleted file mode 100644 index ee9615dc8..000000000 --- a/src/relational/column.ts +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import { ValuePresence } from './constants' - -/** - * A columns represents a single field of a CIF category. - */ -export interface Column { - isDefined: boolean; - - getString(row: number): string | null; - getInteger(row: number): number; - getFloat(row: number): number; - - getValuePresence(row: number): ValuePresence; - - areValuesEqual(rowA: number, rowB: number): boolean; - stringEquals(row: number, value: string): boolean; -} - -/** - * Represents a column that is not present. - */ -class _UndefinedColumn implements Column { // tslint:disable-line:class-name - isDefined = false; - getString(row: number): string | null { return null; }; - getInteger(row: number): number { return 0; } - getFloat(row: number): number { return 0.0; } - getValuePresence(row: number): ValuePresence { return ValuePresence.NotSpecified; } - areValuesEqual(rowA: number, rowB: number): boolean { return true; } - stringEquals(row: number, value: string): boolean { return value === null; } -} -export const UndefinedColumn = new _UndefinedColumn() as Column; diff --git a/src/relational/constants.ts b/src/relational/constants.ts deleted file mode 100644 index 34394c55e..000000000 --- a/src/relational/constants.ts +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - */ - -export const enum ValuePresence { - Present = 0, - NotSpecified = 1, - Unknown = 2 -} diff --git a/src/relational/text-block.ts b/src/relational/text-block.ts deleted file mode 100644 index c4ef35b76..000000000 --- a/src/relational/text-block.ts +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { Block } from './block' -import { TextCategory } from './text-category' - -export class TextBlock extends Block<TextCategory> { - private categoryMap: Map<string, TextCategory>; - private categoryList: TextCategory[]; - - data: string; - - /** - * Gets a category by its name. - */ - getCategory(name: string) { - return this.categoryMap.get(name); - } - - /** - * Adds a category. - */ - addCategory(category: TextCategory) { - this.categoryList[this.categoryList.length] = category; - this.categoryMap.set(category.name, category); - } - - constructor(data: string) { - super() - - this.data = data; - - this.categoryMap = new Map() - this.categoryList = [] - } -} \ No newline at end of file diff --git a/src/relational/text-category.ts b/src/relational/text-category.ts deleted file mode 100644 index f550213df..000000000 --- a/src/relational/text-category.ts +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { Category } from './category' -import { UndefinedColumn } from './column' -import { TextColumn, CifColumn } from './text-column' - -import { Tokens } from '../utils/tokens' - -/** - * Represents a category backed by a string. - */ -export class TextCategory extends Category { - protected data: string; - protected columnNameList: string[]; - protected columnIndices: Map<string, number>; - - /** - * Name of the category. - */ - name: string; - - /** - * The array of columns. - */ - get columnNames() { - return this.columnNameList; - } - - /** - * Number of columns in the category. - */ - columnCount: number; - - /** - * Number of rows in the category. - */ - rowCount: number; - - /** - * Pairs of (start at index 2 * i, end at index 2 * i + 1) indices to the data string. - * The "end" character is not included (for it's iterated as for (i = start; i < end; i++)). - */ - indices: Int32Array; - - /** - * Get a column object that makes accessing data easier. - */ - getColumn(name: string): TextColumn { - let i = this.columnIndices.get(name); - if (i !== void 0) return new TextColumn(this, this.data, name, i); - return UndefinedColumn as TextColumn; - } - - initColumns(columns: string[]): void { - this.columnIndices = new Map<string, number>(); - this.columnNameList = []; - for (let i = 0; i < columns.length; i++) { - this.columnIndices.set(columns[i], i); - this.columnNameList.push(columns[i]); - } - } - - constructor(data: string, name: string, columns: string[], tokens: Tokens) { - super() - - this.name = name; - this.indices = tokens.indices; - this.data = data; - - this.columnCount = columns.length; - this.rowCount = (tokens.count / 2 / columns.length) | 0; - - this.initColumns(columns) - } -} - -export class CifCategory extends TextCategory { - getColumn(name: string): CifColumn { - let i = this.columnIndices.get(name); - if (i !== void 0) return new CifColumn(this, this.data, name, i); - return UndefinedColumn as CifColumn; - } - - initColumns(columns: string[]): void { - this.columnIndices = new Map<string, number>(); - this.columnNameList = []; - for (let i = 0; i < columns.length; i++) { - let colName = columns[i].substr(this.name.length + 1); - this.columnIndices.set(colName, i); - this.columnNameList.push(colName); - } - } -} diff --git a/src/relational/text-column.ts b/src/relational/text-column.ts deleted file mode 100644 index 7c04c09d6..000000000 --- a/src/relational/text-column.ts +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { Column } from './column' -import { ValuePresence } from './constants' -import { TextCategory } from './text-category' - -import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../utils/number-parser' -import { ShortStringPool } from '../utils/short-string-pool' - -/** - * Represents a single column. - */ -export class TextColumn implements Column { - - protected indices: Int32Array; - protected columnCount: number; - protected rowCount: number; - protected stringPool = ShortStringPool.create(); - - isDefined = true; - - /** - * Returns the string value at given row. - */ - getString(row: number): string | null { - let i = (row * this.columnCount + this.index) * 2; - return ShortStringPool.get(this.stringPool, this.data.substring(this.indices[i], this.indices[i + 1])); - } - - /** - * Returns the integer value at given row. - */ - getInteger(row: number): number { - let i = (row * this.columnCount + this.index) * 2; - return fastParseInt(this.data, this.indices[i], this.indices[i + 1]); - } - - /** - * Returns the float value at given row. - */ - getFloat(row: number): number { - let i = (row * this.columnCount + this.index) * 2; - return fastParseFloat(this.data, this.indices[i], this.indices[i + 1]); - } - - /** - * Returns true if the token has the specified string value. - */ - stringEquals(row: number, value: string) { - let aIndex = (row * this.columnCount + this.index) * 2, - s = this.indices[aIndex], - len = value.length; - if (len !== this.indices[aIndex + 1] - s) return false; - for (let i = 0; i < len; i++) { - if (this.data.charCodeAt(i + s) !== value.charCodeAt(i)) return false; - } - return true; - } - - /** - * Determines if values at the given rows are equal. - */ - areValuesEqual(rowA: number, rowB: number): boolean { - const aIndex = (rowA * this.columnCount + this.index) * 2 - const bIndex = (rowB * this.columnCount + this.index) * 2 - const aS = this.indices[aIndex] - const bS = this.indices[bIndex] - const len = this.indices[aIndex + 1] - aS - if (len !== this.indices[bIndex + 1] - bS) return false; - for (let i = 0; i < len; i++) { - if (this.data.charCodeAt(i + aS) !== this.data.charCodeAt(i + bS)) { - return false; - } - } - return true; - } - - getValuePresence(row: number): ValuePresence { - let index = 2 * (row * this.columnCount + this.index); - if (this.indices[index] === this.indices[index + 1]) { - return ValuePresence.NotSpecified - } - return ValuePresence.Present - } - - constructor(table: TextCategory, protected data: string, public name: string, public index: number) { - this.indices = table.indices; - this.columnCount = table.columnCount; - } -} - -export class CifColumn extends TextColumn { - /** - * Returns the string value at given row. - */ - getString(row: number): string | null { - let ret = super.getString(row) - if (ret === '.' || ret === '?') return null; - return ret; - } - - /** - * Returns true if the value is not defined (. or ? token). - */ - getValuePresence(row: number): ValuePresence { - let index = 2 * (row * this.columnCount + this.index); - let s = this.indices[index]; - if (this.indices[index + 1] - s !== 1) return ValuePresence.Present; - let v = this.data.charCodeAt(s); - if (v === 46 /* . */) return ValuePresence.NotSpecified; - if (v === 63 /* ? */) return ValuePresence.Unknown; - return ValuePresence.Present; - } -} diff --git a/src/relational/text-file.ts b/src/relational/text-file.ts deleted file mode 100644 index 6058531cc..000000000 --- a/src/relational/text-file.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { File } from './file' -import { TextBlock } from './text-block' - -export class TextFile implements File<TextBlock> { - data: string; - blocks: TextBlock[] = []; - - constructor(data: string) { - this.data = data; - } -} diff --git a/src/script.ts b/src/script.ts index 013e07921..d62c6f66f 100644 --- a/src/script.ts +++ b/src/script.ts @@ -7,33 +7,12 @@ // import * as util from 'util' import * as fs from 'fs' -import { parse, GroCategories, GroAtomBasicColumns } from './reader/gro' -import { Category } from './relational/category' +import Gro from './reader/gro/index' -const file = '1crn.gro' +//const file = '1crn.gro' // const file = 'water.gro' // const file = 'test.gro' -// const file = 'md_1u19_trj.gro' - -function getFloatArray(category: Category, name: string) { - const column = category.getColumn(name) - const n = category.rowCount - const array = new Float32Array(n) - for (let i = 0; i < n; ++i) { - array[i] = column.getFloat(i) - } - return array -} - -function getIntArray(category: Category, name: string) { - const column = category.getColumn(name) - const n = category.rowCount - const array = new Int32Array(n) - for (let i = 0; i < n; ++i) { - array[i] = column.getInteger(i) - } - return array -} +const file = 'md_1u19_trj.gro' fs.readFile(`./examples/${file}`, 'utf8', function (err,data) { if (err) { @@ -42,61 +21,54 @@ fs.readFile(`./examples/${file}`, 'utf8', function (err,data) { // console.log(data); console.time('parse') - const parsed = parse(data) + const parsed = Gro.parse(data) console.timeEnd('parse') if (parsed.isError) { console.log(parsed) } else { const groFile = parsed.result - const categories = groFile.blocks[0].getCategoriesFromSchema(GroCategories) + const data = Gro.schema(groFile.blocks[0]) // const header = groFile.blocks[0].getCategory('header') - const header = categories.header - if (header) { - console.log(header.columnNames) - - console.log('title', header.getColumn('title').getString(0)) - console.log('timeInPs', header.getColumn('timeInPs').getFloat(0)) - console.log('numberOfAtoms', header.getColumn('numberOfAtoms').getInteger(0)) - console.log('boxX', header.getColumn('boxX').getFloat(0)) - console.log('boxY', header.getColumn('boxY').getFloat(0)) - console.log('boxZ', header.getColumn('boxZ').getFloat(0)) + const { header, atoms } = data; + if (header._rowCount !== 1) { + console.log('title', header.title.value(0)) + console.log('timeInPs', header.timeInPs.value(0)) + console.log('numberOfAtoms', header.numberOfAtoms.value(0)) + console.log('boxX', header.boxX.value(0)) + console.log('boxY', header.boxY.value(0)) + console.log('boxZ', header.boxZ.value(0)) } else { console.error('no header') } - const atoms = categories.atoms - if (atoms) { - console.log(atoms.columnNames) - - const columns = atoms.getColumnsFromSchema(GroAtomBasicColumns) - - console.log(`'${columns.residueNumber.getString(1)}'`) - console.log(`'${columns.residueName.getString(1)}'`) - console.log(`'${columns.atomName.getString(1)}'`) - console.log(columns.z.getFloat(1)) - console.log(`'${columns.z.getString(1)}'`) + if (atoms._rowCount > 0) { + console.log(`'${atoms.residueNumber.value(1)}'`) + console.log(`'${atoms.residueName.value(1)}'`) + console.log(`'${atoms.atomName.value(1)}'`) + console.log(atoms.z.value(1)) + console.log(`'${atoms.z.value(1)}'`) - const n = atoms.rowCount + const n = atoms._rowCount console.log('rowCount', n) console.time('getFloatArray x') - const x = getFloatArray(atoms, 'x') + const x = atoms.x.toArray(0, n, x => new Float32Array(x))! console.timeEnd('getFloatArray x') console.log(x.length, x[0], x[x.length-1]) console.time('getFloatArray y') - const y = getFloatArray(atoms, 'y') + const y = atoms.y.toArray(0, n, x => new Float32Array(x))! console.timeEnd('getFloatArray y') console.log(y.length, y[0], y[y.length-1]) console.time('getFloatArray z') - const z = getFloatArray(atoms, 'z') + const z = atoms.z.toArray(0, n, x => new Float32Array(x))! console.timeEnd('getFloatArray z') console.log(z.length, z[0], z[z.length-1]) console.time('getIntArray residueNumber') - const residueNumber = getIntArray(atoms, 'residueNumber') + const residueNumber = atoms.residueNumber.toArray(0, n, x => new Int32Array(x))! console.timeEnd('getIntArray residueNumber') console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1]) } else { diff --git a/src/utils/chunked-array.ts b/src/utils/chunked-array.ts index 82d2004bc..54391c303 100644 --- a/src/utils/chunked-array.ts +++ b/src/utils/chunked-array.ts @@ -113,34 +113,6 @@ export namespace ChunkedArray { return ret as any; } - export function forVertex3D(chunkVertexCount: number = 262144): ChunkedArray<number> { - return create<number>(size => new Float32Array(size) as any, chunkVertexCount, 3) - } - - export function forIndexBuffer(chunkIndexCount: number = 262144): ChunkedArray<number> { - return create<number>(size => new Uint32Array(size) as any, chunkIndexCount, 3) - } - - export function forTokenIndices(chunkTokenCount: number = 131072): ChunkedArray<number> { - return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 2) - } - - export function forIndices(chunkTokenCount: number = 131072): ChunkedArray<number> { - return create<number>(size => new Int32Array(size) as any, chunkTokenCount, 1) - } - - export function forInt32(chunkSize: number = 131072): ChunkedArray<number> { - return create<number>(size => new Int32Array(size) as any, chunkSize, 1) - } - - export function forFloat32(chunkSize: number = 131072): ChunkedArray<number> { - return create<number>(size => new Float32Array(size) as any, chunkSize, 1) - } - - export function forArray<T>(chunkSize: number = 131072): ChunkedArray<T> { - return create<T>(size => [] as any, chunkSize, 1) - } - export function create<T>(creator: (size: number) => any, chunkElementCount: number, elementSize: number): ChunkedArray<T> { chunkElementCount = chunkElementCount | 0; if (chunkElementCount <= 0) chunkElementCount = 1; diff --git a/src/utils/helper.ts b/src/utils/helper.ts deleted file mode 100644 index 65b4220aa..000000000 --- a/src/utils/helper.ts +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import { TokenizerState } from './tokenizer-state' - -/** - * Eat everything until a newline occurs. - */ -export function eatLine(state: TokenizerState) { - while (state.position < state.length) { - switch (state.data.charCodeAt(state.position)) { - case 10: // \n - state.currentTokenEnd = state.position - ++state.position - ++state.currentLineNumber - return - case 13: // \r - state.currentTokenEnd = state.position - ++state.position - ++state.currentLineNumber - if (state.data.charCodeAt(state.position) === 10) { - ++state.position - } - return - default: - ++state.position - } - } - state.currentTokenEnd = state.position; -} - -/** - * Eat everything until a whitespace/newline occurs. - */ -export function eatValue(state: TokenizerState) { - while (state.position < state.length) { - switch (state.data.charCodeAt(state.position)) { - case 9: // \t - case 10: // \n - case 13: // \r - case 32: // ' ' - state.currentTokenEnd = state.position; - return; - default: - ++state.position; - break; - } - } - state.currentTokenEnd = state.position; -} - -/** - * Skips all the whitespace - space, tab, newline, CR - * Handles incrementing line count. - */ -export function skipWhitespace(state: TokenizerState): number { - let prev = 10; - while (state.position < state.length) { - let c = state.data.charCodeAt(state.position); - switch (c) { - case 9: // '\t' - case 32: // ' ' - prev = c; - ++state.position; - break; - case 10: // \n - // handle \r\n - if (prev !== 13) { - ++state.currentLineNumber; - } - prev = c; - ++state.position; - break; - case 13: // \r - prev = c; - ++state.position; - ++state.currentLineNumber; - break; - default: - return prev; - } - } - return prev; -} diff --git a/src/utils/short-string-pool.ts b/src/utils/short-string-pool.ts index 747bcf85b..54d673b77 100644 --- a/src/utils/short-string-pool.ts +++ b/src/utils/short-string-pool.ts @@ -9,8 +9,8 @@ * This ensures there is only 1 instance of a short string. * Also known as string interning, see https://en.wikipedia.org/wiki/String_interning */ -export type ShortStringPool = { [key: string]: string } -export namespace ShortStringPool { +interface ShortStringPool { [key: string]: string } +namespace ShortStringPool { export function create(): ShortStringPool { return Object.create(null); } export function get(pool: ShortStringPool, str: string) { if (str.length > 6) return str; @@ -20,3 +20,5 @@ export namespace ShortStringPool { return str; } } + +export default ShortStringPool; diff --git a/src/utils/tokenizer-state.ts b/src/utils/tokenizer-state.ts deleted file mode 100644 index c6cdc44b8..000000000 --- a/src/utils/tokenizer-state.ts +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -export interface TokenizerState { - data: string - - position: number - length: number - - currentLineNumber: number - currentTokenStart: number - currentTokenEnd: number - - currentTokenType?: number -} diff --git a/src/utils/tokens.ts b/src/utils/tokens.ts deleted file mode 100644 index 9657837c5..000000000 --- a/src/utils/tokens.ts +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * from https://github.com/dsehnal/CIFTools.js - * @author David Sehnal <david.sehnal@gmail.com> - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -/** - * A helper for building a typed array of token indices. - */ -export interface Tokens { - indicesLenMinus2: number, - count: number, - indices: Int32Array -} - -export namespace Tokens { - function resize(tokens: Tokens) { - // scale the size using golden ratio, because why not. - const newBuffer = new Int32Array((1.61 * tokens.indices.length) | 0); - newBuffer.set(tokens.indices); - tokens.indices = newBuffer; - tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0; - } - - export function add(tokens: Tokens, start: number, end: number) { - if (tokens.count > tokens.indicesLenMinus2) { - resize(tokens); - } - tokens.indices[tokens.count++] = start; - tokens.indices[tokens.count++] = end; - } - - export function addUnchecked(tokens: Tokens, start: number, end: number) { - tokens.indices[tokens.count++] = start; - tokens.indices[tokens.count++] = end; - } - - export function create(size: number): Tokens { - return { - indicesLenMinus2: (size - 2) | 0, - count: 0, - indices: new Int32Array(size) - } - } -} -- GitLab