diff --git a/README.md b/README.md index 5b2a0416a987bbe16f5e261acf701f2c6333bd21..30294cdc052b6b23891dfe55d9c0666bf3b541d4 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,4 @@ TODO ---- -- write about unittest (AR) -- gro reader - - read more than one block - - read velocities - - detect number of decimal places +- write about unittest (AR) \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000000000000000000000000000000000000..e211546c2e17aecf72e79eabc20961af48563ceb Binary files /dev/null and b/package-lock.json differ diff --git a/package.json b/package.json index c1de25f15bdd3122a3d4556674ac56b9df1b4278..ddb3982637a9e78f7c1bffccd5a2126c8237f476 100644 --- a/package.json +++ b/package.json @@ -15,8 +15,13 @@ "script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js" }, "jest": { - "moduleFileExtensions": [ "ts", "js" ], - "transform": { "\\.ts$": "<rootDir>/node_modules/ts-jest/preprocessor.js" }, + "moduleFileExtensions": [ + "ts", + "js" + ], + "transform": { + "\\.ts$": "<rootDir>/node_modules/ts-jest/preprocessor.js" + }, "testRegex": "\\.spec\\.ts$" }, "author": "", diff --git a/src/data/data.ts b/src/reader/cif/data.ts similarity index 95% rename from src/data/data.ts rename to src/reader/cif/data.ts index 849a0954ebaaba7d14b0fd2caccf9354623b0873..6d85c6a83957e51bf40b5b3854ac8e77538e28a6 100644 --- a/src/data/data.ts +++ b/src/reader/cif/data.ts @@ -56,9 +56,6 @@ export interface Field { int(row: number): number, float(row: number): number, - /** The 'intrinsic value' of the field, e.g., array, binary data, ... */ - value(row: number): any, - presence(row: number): ValuePresence, areValuesEqual(rowA: number, rowB: number): boolean, diff --git a/src/data/schema.ts b/src/reader/cif/schema.ts similarity index 96% rename from src/data/schema.ts rename to src/reader/cif/schema.ts index c9f1a1a860c34ce5490ddb450c2646c62ca2038a..9db6cb7978d4a0954dc16939b665322a35d4431f 100644 --- a/src/data/schema.ts +++ b/src/reader/cif/schema.ts @@ -67,7 +67,6 @@ export namespace Field { export function str(spec?: Spec) { return createSchema(spec, Str); } export function int(spec?: Spec) { return createSchema(spec, Int); } export function float(spec?: Spec) { return createSchema(spec, Float); } - export function value<T>(spec?: Spec): Schema<T> { return createSchema(spec, Value); } function create<T>(field: Data.Field, value: (row: number) => T, toArray: Field<T>['toArray']): Field<T> { return { isDefined: field.isDefined, value, presence: field.presence, areValuesEqual: field.areValuesEqual, stringEquals: field.stringEquals, toArray }; @@ -76,14 +75,12 @@ export namespace Field { function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); } function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); } function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); } - function Value(field: Data.Field) { return create(field, field.value, () => { throw Error('not supported'); }); } const DefaultUndefined: Data.Field = { isDefined: false, str: row => null, int: row => 0, float: row => 0, - value: row => null, presence: row => Data.ValuePresence.NotSpecified, areValuesEqual: (rowA, rowB) => true, diff --git a/src/reader/common/column.ts b/src/reader/common/column.ts new file mode 100644 index 0000000000000000000000000000000000000000..6a2cd768b04788833cceb24ac48de8370acace30 --- /dev/null +++ b/src/reader/common/column.ts @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export type ArrayType = string[] | number[] | Float32Array | Float64Array | Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array +export type ColumnType = typeof ColumnType.str | typeof ColumnType.int | typeof ColumnType.float + +export namespace ColumnType { + export const str = { '@type': '' as string, kind: 'str' as 'str' }; + export const int = { '@type': 0 as number, kind: 'int' as 'int' }; + export const float = { '@type': 0 as number, kind: 'float' as 'float' }; +} + +export interface Column<T> { + readonly isColumnDefined: boolean, + readonly rowCount: number, + value(row: number): T, + toArray(ctor?: (size: number) => ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<T> +} + +export function UndefinedColumn<T extends ColumnType>(rowCount: number, type: T): Column<T['@type']> { + const value: Column<T['@type']>['value'] = type.kind === 'str' ? row => '' : row => 0; + return { + isColumnDefined: false, + rowCount, + value, + toArray(ctor, s, e) { + const { array } = createArray(rowCount, ctor, s, e); + for (let i = 0, _i = array.length; i < _i; i++) array[i] = value(0) + return array; + } + } +} + +/** A helped function for Column.toArray */ +export function createArray(rowCount: number, ctor?: (size: number) => ArrayType, start?: number, end?: number) { + const c = typeof ctor !== 'undefined' ? ctor : (s: number) => new Array(s); + const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0; + const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount; + return { array: c(e - s) as any[], start: s, end: e }; +} \ No newline at end of file diff --git a/src/reader/common/spec/fixed-column.spec.ts b/src/reader/common/spec/fixed-column.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..4aa5e3f24657ee744c7a54d9bdc4c32956e3807d --- /dev/null +++ b/src/reader/common/spec/fixed-column.spec.ts @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import FixedColumn from '../text/column/fixed' +import { ColumnType } from '../../common/column' + +const lines = [ + '1.123 abc', + '1.00 a', + '1.1 bcd ', + '', + ' 5' +] + +const data = lines.join('\n'); + +const linesTokens = (function () { + const tokens: number[] = []; + let last = 0; + for (const l of lines) { + tokens.push(last, last + l.length); + last += l.length + 1; + } + if (tokens[tokens.length - 1] > data.length) tokens[tokens.length - 1] = data.length; + return tokens; +}()); + +describe('fixed text column', () => { + const col = FixedColumn({ data, lines: linesTokens, rowCount: lines.length }); + const col1 = col(0, 5, ColumnType.float); + const col2 = col(5, 4, ColumnType.str); + it('number', () => { + expect(col1.value(0)).toBe(1.123); + expect(col1.value(1)).toBe(1.0); + expect(col1.value(2)).toBe(1.1); + expect(col1.value(3)).toBe(0); + expect(col1.value(4)).toBe(5); + }) + it('str', () => { + expect(col2.value(0)).toBe('abc'); + expect(col2.value(1)).toBe('a'); + expect(col2.value(2)).toBe('bc'); + expect(col2.value(3)).toBe(''); + expect(col2.value(4)).toBe(''); + }) +}); diff --git a/src/reader/common/text/column/__token.ts b/src/reader/common/text/column/__token.ts new file mode 100644 index 0000000000000000000000000000000000000000..87326c26d1cff04f62335d3d0802093c9dfcd9ee --- /dev/null +++ b/src/reader/common/text/column/__token.ts @@ -0,0 +1,114 @@ +// /* +// * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. +// * +// * @author David Sehnal <david.sehnal@gmail.com> +// */ + +// import * as Data from '../../../../data/data' +// import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../number-parser' +// import { Tokens } from '../tokenizer' +// import ShortStringPool from '../../../../utils/short-string-pool' + +// export function createTokenFields(data: string, fields: string[], tokens: Tokens): { [name: string]: Data.Field } { +// const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices }; +// const categoryFields = Object.create(null); +// for (let i = 0; i < fi.fieldCount; ++i) { +// categoryFields[fields[i]] = TokenField(fi, i); +// } +// return categoryFields; +// } + +// export interface TokenFieldInfo { +// data: string, +// tokens: ArrayLike<number>, +// fieldCount: number, +// isCif?: boolean +// } + +// export function TokenField(info: TokenFieldInfo, index: number): Data.Field { +// const { data, tokens, fieldCount, isCif = false } = info; +// const stringPool = ShortStringPool.create(); + +// const str: Data.Field['str'] = isCif ? row => { +// const i = (row * fieldCount + index) * 2; +// const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); +// if (ret === '.' || ret === '?') return null; +// return ret; +// } : row => { +// const i = (row * fieldCount + index) * 2; +// return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); +// }; + +// const int: Data.Field['int'] = row => { +// const i = (row * fieldCount + index) * 2; +// return fastParseInt(data, tokens[i], tokens[i + 1]) || 0; +// }; + +// const float: Data.Field['float'] = row => { +// const i = (row * fieldCount + index) * 2; +// return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0; +// }; + +// const presence: Data.Field['presence'] = isCif ? row => { +// const i = 2 * (row * fieldCount + index); +// const s = tokens[i]; +// if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present; +// const v = data.charCodeAt(s); +// if (v === 46 /* . */) return Data.ValuePresence.NotSpecified; +// if (v === 63 /* ? */) return Data.ValuePresence.Unknown; +// return Data.ValuePresence.Present; +// } : row => { +// const i = 2 * (row * fieldCount + index); +// return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present +// }; + +// return { +// isDefined: true, +// str, +// int, +// float, +// value: str, +// presence, +// areValuesEqual: (rowA, rowB) => { +// const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI]; +// const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI]; +// const len = tokens[aI + 1] - aS; +// if (len !== tokens[bI + 1] - bS) return false; +// for (let i = 0; i < len; i++) { +// if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) { +// return false; +// } +// } +// return true; +// }, +// stringEquals: (row, value) => { +// const aI = (row * fieldCount + index) * 2; +// const s = tokens[aI]; +// if (!value) return presence(row) !== Data.ValuePresence.Present; +// const len = value.length; +// if (len !== tokens[aI + 1] - s) return false; +// for (let i = 0; i < len; i++) { +// if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false; +// } +// return true; +// }, +// toStringArray: (startRow, endRowExclusive, ctor) => { +// const count = endRowExclusive - startRow; +// const ret = ctor(count) as any; +// for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); } +// return ret; +// }, +// toIntArray: (startRow, endRowExclusive, ctor) => { +// const count = endRowExclusive - startRow; +// const ret = ctor(count) as any; +// for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); } +// return ret; +// }, +// toFloatArray: (startRow, endRowExclusive, ctor) => { +// const count = endRowExclusive - startRow; +// const ret = ctor(count) as any; +// for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); } +// return ret; +// } +// } +// } \ No newline at end of file diff --git a/src/reader/common/text/column/fixed.ts b/src/reader/common/text/column/fixed.ts new file mode 100644 index 0000000000000000000000000000000000000000..e7df1e0746104f1dd69c99cdb9df55b645f82398 --- /dev/null +++ b/src/reader/common/text/column/fixed.ts @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Column, ColumnType, createArray } from '../../column' +import { trimStr } from '../tokenizer' +import { parseIntSkipLeadingWhitespace, parseFloatSkipLeadingWhitespace } from '../number-parser' + +export interface FixedColumnInfo { + data: string, + lines: ArrayLike<number>, + rowCount: number +} + +export default function FixedColumnProvider(info: FixedColumnInfo) { + return function<T extends ColumnType>(offset: number, width: number, type: T) { + return FixedColumn(info, offset, width, type); + } +} + +function getArrayValues(value: (row: number) => any, target: any[], start: number) { + for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i); + return target; +} + +export function FixedColumn<T extends ColumnType>(info: FixedColumnInfo, offset: number, width: number, type: T): Column<T['@type']> { + const { data, lines, rowCount } = info; + const { kind } = type; + + const value: Column<T['@type']>['value'] = kind === 'str' ? row => { + let s = lines[2 * row] + offset, e = s + width, le = lines[2 * row + 1]; + if (s >= le) return ''; + if (e > le) e = le; + return trimStr(data, s, e); + } : kind === 'int' ? row => { + const s = lines[2 * row] + offset, e = s + width; + return parseIntSkipLeadingWhitespace(data, s, e); + } : row => { + const s = lines[2 * row] + offset, e = s + width; + return parseFloatSkipLeadingWhitespace(data, s, e); + } + return { + isColumnDefined: true, + rowCount, + value, + toArray(ctor, s, e) { + const { array, start } = createArray(rowCount, ctor, s, e); + return getArrayValues(value, array, start); + } + }; +} \ No newline at end of file diff --git a/src/reader/common/text/token-field.ts b/src/reader/common/text/token-field.ts deleted file mode 100644 index 849d7cb843651dd9065892a6c165983a5b0d9d05..0000000000000000000000000000000000000000 --- a/src/reader/common/text/token-field.ts +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import * as Data from '../../../data/data' -import { parseInt as fastParseInt, parseFloat as fastParseFloat } from './number-parser' -import { Tokens } from './tokenizer' -import ShortStringPool from '../../../utils/short-string-pool' - -export function createTokenFields(data: string, fields: string[], tokens: Tokens): { [name: string]: Data.Field } { - const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices }; - const categoryFields = Object.create(null); - for (let i = 0; i < fi.fieldCount; ++i) { - categoryFields[fields[i]] = TokenField(fi, i); - } - return categoryFields; -} - -export interface TokenFieldInfo { - data: string, - tokens: ArrayLike<number>, - fieldCount: number, - isCif?: boolean -} - -export function TokenField(info: TokenFieldInfo, index: number): Data.Field { - const { data, tokens, fieldCount, isCif = false } = info; - const stringPool = ShortStringPool.create(); - - const str: Data.Field['str'] = isCif ? row => { - const i = (row * fieldCount + index) * 2; - const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); - if (ret === '.' || ret === '?') return null; - return ret; - } : row => { - const i = (row * fieldCount + index) * 2; - return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1])); - }; - - const int: Data.Field['int'] = row => { - const i = (row * fieldCount + index) * 2; - return fastParseInt(data, tokens[i], tokens[i + 1]) || 0; - }; - - const float: Data.Field['float'] = row => { - const i = (row * fieldCount + index) * 2; - return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0; - }; - - const presence: Data.Field['presence'] = isCif ? row => { - const i = 2 * (row * fieldCount + index); - const s = tokens[i]; - if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present; - const v = data.charCodeAt(s); - if (v === 46 /* . */) return Data.ValuePresence.NotSpecified; - if (v === 63 /* ? */) return Data.ValuePresence.Unknown; - return Data.ValuePresence.Present; - } : row => { - const i = 2 * (row * fieldCount + index); - return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present - }; - - return { - isDefined: true, - str, - int, - float, - value: str, - presence, - areValuesEqual: (rowA, rowB) => { - const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI]; - const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI]; - const len = tokens[aI + 1] - aS; - if (len !== tokens[bI + 1] - bS) return false; - for (let i = 0; i < len; i++) { - if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) { - return false; - } - } - return true; - }, - stringEquals: (row, value) => { - const aI = (row * fieldCount + index) * 2; - const s = tokens[aI]; - if (!value) return presence(row) !== Data.ValuePresence.Present; - const len = value.length; - if (len !== tokens[aI + 1] - s) return false; - for (let i = 0; i < len; i++) { - if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false; - } - return true; - }, - toStringArray: (startRow, endRowExclusive, ctor) => { - const count = endRowExclusive - startRow; - const ret = ctor(count) as any; - for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); } - return ret; - }, - toIntArray: (startRow, endRowExclusive, ctor) => { - const count = endRowExclusive - startRow; - const ret = ctor(count) as any; - for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); } - return ret; - }, - toFloatArray: (startRow, endRowExclusive, ctor) => { - const count = endRowExclusive - startRow; - const ret = ctor(count) as any; - for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); } - return ret; - } - } -} \ No newline at end of file diff --git a/src/reader/common/text/tokenizer.ts b/src/reader/common/text/tokenizer.ts index 79cabe1005658450320c7ecb8285edef229c9fe7..b957a35b1c2f22a4054845ebd27eb57429c29a9b 100644 --- a/src/reader/common/text/tokenizer.ts +++ b/src/reader/common/text/tokenizer.ts @@ -6,7 +6,7 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -export interface State<Info = any, TokenType = any> { +export interface State<TokenType = any> { data: string position: number @@ -16,12 +16,10 @@ export interface State<Info = any, TokenType = any> { currentTokenStart: number currentTokenEnd: number - currentTokenType: TokenType, - - info: Info + currentTokenType: TokenType } -export function State<Info, TokenType>(data: string, info?: Info, initialTokenType?: TokenType): State<Info, TokenType> { +export function State<TokenType>(data: string, initialTokenType?: TokenType): State<TokenType> { return { data, position: 0, @@ -29,37 +27,53 @@ export function State<Info, TokenType>(data: string, info?: Info, initialTokenTy currentLineNumber: 1, currentTokenStart: 0, currentTokenEnd: 0, - currentTokenType: initialTokenType!, - info: info! + currentTokenType: initialTokenType! }; } +export function getTokenString(state: State) { + return state.data.substring(state.currentTokenStart, state.currentTokenEnd); +} + /** * Eat everything until a newline occurs. */ export function eatLine(state: State) { + const { data } = state; while (state.position < state.length) { - switch (state.data.charCodeAt(state.position)) { + switch (data.charCodeAt(state.position)) { case 10: // \n - state.currentTokenEnd = state.position - ++state.position - ++state.currentLineNumber - return + state.currentTokenEnd = state.position; + ++state.position; + ++state.currentLineNumber; + return; case 13: // \r - state.currentTokenEnd = state.position - ++state.position - ++state.currentLineNumber - if (state.data.charCodeAt(state.position) === 10) { - ++state.position + state.currentTokenEnd = state.position; + ++state.position; + ++state.currentLineNumber; + if (data.charCodeAt(state.position) === 10) { + ++state.position; } - return + return; default: - ++state.position + ++state.position; + break; } } state.currentTokenEnd = state.position; } +/** Sets the current token start to the current position */ +export function markStart(state: State) { + state.currentTokenStart = state.position; +} + +/** Sets the current token start to current position and moves to the next line. */ +export function markLine(state: State) { + state.currentTokenStart = state.position; + eatLine(state); +} + /** * Eat everything until a whitespace/newline occurs. */ @@ -129,6 +143,15 @@ export function trim(state: State, start: number, end: number) { state.position = end; } +export function trimStr(data: string, start: number, end: number) { + let s = start, e = end - 1; + let c = data.charCodeAt(s); + while ((c === 9 || c === 32) && s <= e) c = data.charCodeAt(++s); + c = data.charCodeAt(e); + while ((c === 9 || c === 32) && e >= s) c = data.charCodeAt(--e); + return data.substring(s, e + 1); +} + export interface Tokens { indicesLenMinus2: number, count: number, diff --git a/src/reader/gro/format.ts b/src/reader/gro/format.ts deleted file mode 100644 index ce24a7978d120ba5f0ed796ae4eb0f0ca6f57139..0000000000000000000000000000000000000000 --- a/src/reader/gro/format.ts +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import schema from './schema' -import parse from './parser' - -export default { - parse, - schema -}; \ No newline at end of file diff --git a/src/reader/gro/parser.ts b/src/reader/gro/parser.ts index 7c41e81bbb49d50ea5e5a2bc82bd94815eba9d9c..f4560630e454fc1cf24f30557a9c90e918cd2062 100644 --- a/src/reader/gro/parser.ts +++ b/src/reader/gro/parser.ts @@ -5,77 +5,71 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import { State as TokenizerState, Tokens, eatLine, skipWhitespace, eatValue, trim } from '../common/text/tokenizer' -import { parseInt } from '../common/text/number-parser' -import { createTokenFields } from '../common/text/token-field' -import * as Data from '../../data/data' +import { State as TokenizerState, Tokens, markLine, getTokenString } from '../common/text/tokenizer' +import FixedColumn from '../common/text/column/fixed' +import { ColumnType, UndefinedColumn } from '../common/column' +import * as Schema from './schema' import Result from '../result' -interface StateInfo { - numberOfAtoms: number - hasVelocities: boolean - numberOfDecimalPlaces: number +interface State { + tokenizer: TokenizerState, + header: Schema.Header, + numberOfAtoms: number, } -type State = TokenizerState<StateInfo> +function createEmptyHeader(): Schema.Header { + return { + title: '', + timeInPs: 0, + hasVelocities: false, + precision: { position: 0, velocity: 0 }, + box: [0, 0, 0] + }; +} -function createState(data: string): State { - return TokenizerState(data, { numberOfAtoms: 0, hasVelocities: false, numberOfDecimalPlaces: 3 }); +function createState(tokenizer: TokenizerState): State { + return { + tokenizer, + header: createEmptyHeader(), + numberOfAtoms: 0 + }; } /** * title string (free format string, optional time in ps after 't=') */ -function handleTitleString(state: State, tokens: Tokens) { - eatLine(state) - // console.log('title', state.data.substring(state.currentTokenStart, state.currentTokenEnd)) - let start = state.currentTokenStart - let end = state.currentTokenEnd - let valueStart = state.currentTokenStart - let valueEnd = start - - while (valueEnd < end && !isTime(state.data, valueEnd)) ++valueEnd; +function handleTitleString(state: State) { + const { tokenizer, header } = state; + markLine(tokenizer); - if (isTime(state.data, valueEnd)) { - let timeStart = valueEnd + 2 + let line = getTokenString(tokenizer); - while (valueEnd > start && isSpaceOrComma(state.data, valueEnd - 1)) --valueEnd; - Tokens.add(tokens, valueStart, valueEnd) // title + // skip potential empty lines... + if (line.trim().length === 0) { + markLine(tokenizer); + line = getTokenString(tokenizer); + } - while (timeStart < end && state.data.charCodeAt(timeStart) === 32) ++timeStart; - while (valueEnd > timeStart && state.data.charCodeAt(valueEnd - 1) === 32) --valueEnd; - Tokens.add(tokens, timeStart, end) // time + const timeOffset = line.lastIndexOf('t='); + if (timeOffset >= 0) { + header.timeInPs = parseFloat(line.substring(timeOffset + 2)); + header.title = line.substring(0, timeOffset).trim(); + if (header.title && header.title[header.title.length - 1] === ',') { + header.title = header.title.substring(0, header.title.length - 1); + } } else { - Tokens.add(tokens, valueStart, valueEnd) // title - Tokens.add(tokens, valueEnd, valueEnd) // empty token for time + header.title = line; } } -function isSpaceOrComma(data: string, position: number): boolean { - const c = data.charCodeAt(position); - return c === 32 || c === 44 -} - -function isTime(data: string, position: number): boolean { - // T/t - const c = data.charCodeAt(position); - if (c !== 84 && c !== 116) return false; - // = - if (data.charCodeAt(position + 1) !== 61) return false; - - return true; -} - /** * number of atoms (free format integer) */ -function handleNumberOfAtoms(state: State, tokens: Tokens) { - skipWhitespace(state) - state.currentTokenStart = state.position - eatValue(state) - state.info.numberOfAtoms = parseInt(state.data, state.currentTokenStart, state.currentTokenEnd) - Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd) - eatLine(state) +function handleNumberOfAtoms(state: State) { + const { tokenizer } = state; + markLine(tokenizer); + const line = getTokenString(tokenizer); + state.numberOfAtoms = parseInt(line); } /** @@ -94,33 +88,46 @@ function handleNumberOfAtoms(state: State, tokens: Tokens) { * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places) * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places) */ -function handleAtoms(state: State) { - const fieldSizes = [ 5, 5, 5, 5, 8, 8, 8, 8, 8, 8 ]; - const fields = [ 'residueNumber', 'residueName', 'atomName', 'atomNumber', 'x', 'y', 'z' ] - if (state.info.hasVelocities) { - fields.push('vx', 'vy', 'vz') - } - - const fieldCount = fields.length - const tokens = Tokens.create(state.info.numberOfAtoms * 2 * fieldCount) +function handleAtoms(state: State): Schema.Atoms { + const { tokenizer, numberOfAtoms } = state; + const lineTokens = Tokens.create(numberOfAtoms * 2); - let start: number; - let end: number; - - for (let i = 0, _i = state.info.numberOfAtoms; i < _i; ++i) { - state.currentTokenStart = state.position; - end = state.currentTokenStart; - for (let j = 0; j < fieldCount; ++j) { - start = end; - end = start + fieldSizes[j]; - - trim(state, start, end); - Tokens.addUnchecked(tokens, state.currentTokenStart, state.currentTokenEnd); - } - eatLine(state) + for (let i = 0; i < numberOfAtoms; i++) { + markLine(tokenizer); + Tokens.addUnchecked(lineTokens, tokenizer.currentTokenStart, tokenizer.currentTokenEnd); } - return Data.Category(state.info.numberOfAtoms, createTokenFields(state.data, fields, tokens)); + const lines = lineTokens.indices; + const positionSample = tokenizer.data.substring(lines[0], lines[1]).substring(20); + const precisions = positionSample.match(/\.\d+/g)!; + const hasVelocities = precisions.length === 6; + state.header.hasVelocities = hasVelocities; + state.header.precision.position = precisions[0].length - 1; + state.header.precision.velocity = hasVelocities ? precisions[3].length - 1 : 0; + + const pO = 20; + const pW = state.header.precision.position + 5; + const vO = pO + 3 * pW; + const vW = state.header.precision.velocity + 4; + + const col = FixedColumn({ data: tokenizer.data, lines, rowCount: state.numberOfAtoms }); + const undef = UndefinedColumn(state.numberOfAtoms, ColumnType.float); + + const ret = { + count: state.numberOfAtoms, + residueNumber: col(0, 5, ColumnType.int), + residueName: col(5, 5, ColumnType.str), + atomName: col(10, 5, ColumnType.str), + atomNumber: col(15, 5, ColumnType.int), + x: col(pO, pW, ColumnType.float), + y: col(pO + pW, pW, ColumnType.float), + z: col(pO + 2 * pW, pW, ColumnType.float), + vx: hasVelocities ? col(vO, vW, ColumnType.float) : undef, + vy: hasVelocities ? col(vO + vW, vW, ColumnType.float) : undef, + vz: hasVelocities ? col(vO + 2 * vW, vW, ColumnType.float) : undef, + }; + + return ret; } /** @@ -129,35 +136,32 @@ function handleAtoms(state: State) { * the last 6 values may be omitted (they will be set to zero). * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0. */ -function handleBoxVectors(state: State, tokens: Tokens) { - // just read the first three values, ignore any remaining - for (let i = 0; i < 3; ++i) { - skipWhitespace(state); - state.currentTokenStart = state.position; - eatValue(state); - Tokens.add(tokens, state.currentTokenStart, state.currentTokenEnd); - } +function handleBoxVectors(state: State) { + const { tokenizer } = state; + markLine(tokenizer); + const values = getTokenString(tokenizer).trim().split(/\s+/g); + state.header.box = [+values[0], +values[1], +values[2]]; } -function parseInternal(data: string): Result<Data.File> { - const state = createState(data); - - const headerFields = ['title', 'timeInPs', 'numberOfAtoms', 'boxX', 'boxY', 'boxZ']; - const headerTokens = Tokens.create(2 * headerFields.length); - - handleTitleString(state, headerTokens); - handleNumberOfAtoms(state, headerTokens); - const atoms = handleAtoms(state); - handleBoxVectors(state, headerTokens); - - const block = Data.Block({ - header: Data.Category(1, createTokenFields(data, headerFields, headerTokens)), - atoms - }); +function parseInternal(data: string): Result<Schema.File> { + const tokenizer = TokenizerState(data); + + const structures: Schema.Structure[] = []; + while (tokenizer.position < data.length) { + const state = createState(tokenizer); + handleTitleString(state); + handleNumberOfAtoms(state); + const atoms = handleAtoms(state); + handleBoxVectors(state); + structures.push({ header: state.header, atoms }); + } - return Result.success(Data.File([block])); + const result: Schema.File = { structures }; + return Result.success(result); } -export default function parse(data: string) { +export function parse(data: string) { return parseInternal(data); -} \ No newline at end of file +} + +export default parse; \ No newline at end of file diff --git a/src/reader/gro/schema.d.ts b/src/reader/gro/schema.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..1d49d9641970598475fe2efcee25c231fca973c4 --- /dev/null +++ b/src/reader/gro/schema.d.ts @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Column } from '../common/column' + +export interface Header { + title: string, + timeInPs: number, + /** number of decimal places */ + precision: { position: number, velocity: number }, + hasVelocities: boolean, + box: [number, number, number] +} + +export interface Atoms { + count: number, + residueNumber: Column<number>, + residueName: Column<string>, + atomName: Column<string>, + atomNumber: Column<number>, + x: Column<number>, + y: Column<number>, + z: Column<number>, + vx: Column<number>, + vy: Column<number>, + vz: Column<number> +} + +export interface Structure { + header: Readonly<Header>, + atoms: Readonly<Atoms> +} + +export interface File { + structures: Structure[] +} \ No newline at end of file diff --git a/src/reader/gro/schema.ts b/src/reader/gro/schema.ts deleted file mode 100644 index d8e9c576a995f107de5f1563d7af0a96054b0424..0000000000000000000000000000000000000000 --- a/src/reader/gro/schema.ts +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import * as Schema from '../../data/schema' -import * as Data from '../../data/data' - -const str = Schema.Field.str() -const int = Schema.Field.int() -const float = Schema.Field.float() - -const header = { - 'title': str, - 'timeInPs': float, - 'numberOfAtoms': int, - 'boxX': float, - 'boxY': float, - 'boxZ': float -} - -const atoms = { - 'residueNumber': int, - 'residueName': str, - 'atomName': str, - 'atomNumber': int, - 'x': float, - 'y': float, - 'z': float, - 'vx': float, - 'vy': float, - 'vz': float -} - -const schema = { header, atoms }; -export default function (block: Data.Block) { - return Schema.apply(schema, block); -} \ No newline at end of file diff --git a/src/data/spec/schema.spec.ts b/src/reader/spec/cif.spec.ts similarity index 95% rename from src/data/spec/schema.spec.ts rename to src/reader/spec/cif.spec.ts index 131468eb14a8c00b6017ef5cb64c1e07c6775b3a..bbd81a32180ee367c92f6adb9067aa744c310a88 100644 --- a/src/data/spec/schema.spec.ts +++ b/src/reader/spec/cif.spec.ts @@ -4,8 +4,8 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import * as Data from '../data' -import * as Schema from '../schema' +import * as Data from '../cif/data' +import * as Schema from '../cif/schema' function Field(values: any[]): Data.Field { return { @@ -13,7 +13,6 @@ function Field(values: any[]): Data.Field { str: row => '' + values[row], int: row => +values[row] || 0, float: row => +values[row] || 0, - value: row => values[row], presence: row => Data.ValuePresence.Present, areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB], diff --git a/src/reader/spec/gro.spec.ts b/src/reader/spec/gro.spec.ts index dc2423731f95954408cc14b33f33f41fe2e01b0e..67dd4b21d87597f5f19716b88061769ff04cadbe 100644 --- a/src/reader/spec/gro.spec.ts +++ b/src/reader/spec/gro.spec.ts @@ -5,7 +5,7 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import Gro from '../gro/format' +import Gro from '../gro/parser' const groString = `MD of 2 waters, t= 4.2 6 @@ -15,82 +15,77 @@ const groString = `MD of 2 waters, t= 4.2 2WATER OW1 4 1.275 0.053 0.622 0.2519 0.3140 -0.1734 2WATER HW2 5 1.337 0.002 0.680 -1.0641 -1.1349 0.0257 2WATER HW3 6 1.326 0.120 0.568 1.9427 -0.8216 -0.0244 - 1.82060 1.82060 1.82060` + 1.82060 2.82060 3.82060` const groStringHighPrecision = `Generated by trjconv : 2168 system t= 15.00000 3 1ETH C1 1 2.735383 2.672010 1.450194 0.2345 -0.1622 0.2097 1ETH H11 2 0.015804 2.716597 1.460588 0.8528 -0.7984 0.6605 1ETH H12 3 2.744822 2.565544 1.409227 -2.3812 2.8618 1.8101 - 1.82060 1.82060 1.82060` + 1.82060 2.82060 3.82060` describe('gro reader', () => { it('basic', () => { - const parsed = Gro.parse(groString) + const parsed = Gro(groString) if (parsed.isError) { console.log(parsed) - } else { - const groFile = parsed.result; - const data = Gro.schema(groFile.blocks[0]); - - const { header, atoms } = data; - if (header._isDefined) { - expect(header.title.value(0)).toBe('MD of 2 waters') - expect(header.timeInPs.value(0)).toBeCloseTo(4.2) - expect(header.numberOfAtoms.value(0)).toBe(6) - - expect(header.boxX.value(0)).toBeCloseTo(1.82060) - expect(header.boxY.value(0)).toBeCloseTo(1.82060) - expect(header.boxZ.value(0)).toBeCloseTo(1.82060) - } else { - console.error('no header') - } - - if (atoms._rowCount === 6) { - expect(atoms.x.value(0)).toBeCloseTo(0.126); - expect(atoms.y.value(0)).toBeCloseTo(1.624); - expect(atoms.z.value(0)).toBeCloseTo(1.679); - - // TODO: check velocities when they are parsed. - } else { - console.error('no atoms'); - } + return; } - }) + + const groFile = parsed.result; + const data = groFile.structures[0]; + + const { header, atoms } = data; + expect(header.title).toBe('MD of 2 waters') + expect(header.timeInPs).toBeCloseTo(4.2) + expect(header.hasVelocities).toBe(true); + expect(header.precision.position).toBe(3); + expect(header.precision.velocity).toBe(4); + expect(header.box[0]).toBeCloseTo(1.82060, 0.00001) + expect(header.box[1]).toBeCloseTo(2.82060, 0.00001) + expect(header.box[2]).toBeCloseTo(3.82060, 0.00001) + + expect(atoms.count).toBe(6); + + expect(atoms.x.value(0)).toBeCloseTo(0.126, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(1.624, 0.001); + expect(atoms.z.value(0)).toBeCloseTo(1.679, 0.001); + + expect(atoms.vx.value(5)).toBeCloseTo(1.9427, 0.0001); + expect(atoms.vy.value(5)).toBeCloseTo(-0.8216, 0.0001); + expect(atoms.vz.value(5)).toBeCloseTo(-0.0244, 0.0001); + }); it('high precision', () => { - const parsed = Gro.parse(groStringHighPrecision) + const parsed = Gro(groStringHighPrecision); if (parsed.isError) { console.log(parsed) - } else { - const groFile = parsed.result; - const data = Gro.schema(groFile.blocks[0]); - - const { header, atoms } = data; - if (header._isDefined) { - expect(header.title.value(0)).toBe('Generated by trjconv : 2168 system') - expect(header.timeInPs.value(0)).toBeCloseTo(15) - expect(header.numberOfAtoms.value(0)).toBe(3) - - expect(header.boxX.value(0)).toBeCloseTo(1.82060) - expect(header.boxY.value(0)).toBeCloseTo(1.82060) - expect(header.boxZ.value(0)).toBeCloseTo(1.82060) - } else { - console.error('no header') - } - - if (atoms._rowCount === 3) { - // TODO: test when high-prec parser is available - // expect(atoms.x.value(1)).toBeCloseTo(0.015804, 0.00001); - // expect(atoms.y.value(1)).toBeCloseTo(2.716597, 0.00001); - // expect(atoms.z.value(1)).toBeCloseTo(1.460588, 0.00001); - - // TODO: check velocities when they are parsed. - } else { - console.error('no atoms'); - } + return; } - }) + + const groFile = parsed.result; + const data = groFile.structures[0]; + + const { header, atoms } = data; + expect(header.title).toBe('Generated by trjconv : 2168 system') + expect(header.timeInPs).toBeCloseTo(15) + expect(header.hasVelocities).toBe(true); + expect(header.precision.position).toBe(6); + expect(header.precision.velocity).toBe(4); + expect(header.box[0]).toBeCloseTo(1.82060, 0.00001) + expect(header.box[1]).toBeCloseTo(2.82060, 0.00001) + expect(header.box[2]).toBeCloseTo(3.82060, 0.00001) + + expect(atoms.count).toBe(3); + + expect(atoms.x.value(1)).toBeCloseTo(0.015804, 0.000001); + expect(atoms.y.value(1)).toBeCloseTo(2.716597, 0.000001); + expect(atoms.z.value(1)).toBeCloseTo(1.460588, 0.000001); + + expect(atoms.vx.value(0)).toBeCloseTo(0.2345, 0.0001); + expect(atoms.vy.value(0)).toBeCloseTo(-0.1622, 0.0001); + expect(atoms.vz.value(0)).toBeCloseTo(0.2097, 0.0001); + }); }); diff --git a/src/script.ts b/src/script.ts index 2febe153c9bc24ab424e5e8dd99aa0371b52febc..92b27d329dab62e02807d0ef5af5854ba7d276b0 100644 --- a/src/script.ts +++ b/src/script.ts @@ -7,72 +7,64 @@ // import * as util from 'util' import * as fs from 'fs' -import Gro from './reader/gro/format' +import Gro from './reader/gro/parser' //const file = '1crn.gro' // const file = 'water.gro' // const file = 'test.gro' const file = 'md_1u19_trj.gro' -fs.readFile(`./examples/${file}`, 'utf8', function (err,data) { +fs.readFile(`./examples/${file}`, 'utf8', function (err,input) { if (err) { return console.log(err); } // console.log(data); console.time('parse') - const parsed = Gro.parse(data) + const parsed = Gro(input) console.timeEnd('parse') if (parsed.isError) { console.log(parsed) - } else { - const groFile = parsed.result - const data = Gro.schema(groFile.blocks[0]) + return; + } - // const header = groFile.blocks[0].getCategory('header') - const { header, atoms } = data; - if (header._rowCount !== 1) { - console.log('title', header.title.value(0)) - console.log('timeInPs', header.timeInPs.value(0)) - console.log('numberOfAtoms', header.numberOfAtoms.value(0)) - console.log('boxX', header.boxX.value(0)) - console.log('boxY', header.boxY.value(0)) - console.log('boxZ', header.boxZ.value(0)) - } else { - console.error('no header') - } + const groFile = parsed.result - if (atoms._rowCount > 0) { - console.log(`'${atoms.residueNumber.value(1)}'`) - console.log(`'${atoms.residueName.value(1)}'`) - console.log(`'${atoms.atomName.value(1)}'`) - console.log(atoms.z.value(1)) - console.log(`'${atoms.z.value(1)}'`) + console.log('structure count: ', groFile.structures.length); - const n = atoms._rowCount - console.log('rowCount', n) + const data = groFile.structures[0]; - console.time('getFloatArray x') - const x = atoms.x.toArray(0, n, x => new Float32Array(x))! - console.timeEnd('getFloatArray x') - console.log(x.length, x[0], x[x.length-1]) + // const header = groFile.blocks[0].getCategory('header') + const { header, atoms } = data; + console.log(JSON.stringify(header, null, 2)); + console.log('number of atoms:', atoms.count); - console.time('getFloatArray y') - const y = atoms.y.toArray(0, n, x => new Float32Array(x))! - console.timeEnd('getFloatArray y') - console.log(y.length, y[0], y[y.length-1]) + console.log(`'${atoms.residueNumber.value(1)}'`) + console.log(`'${atoms.residueName.value(1)}'`) + console.log(`'${atoms.atomName.value(1)}'`) + console.log(atoms.z.value(1)) + console.log(`'${atoms.z.value(1)}'`) - console.time('getFloatArray z') - const z = atoms.z.toArray(0, n, x => new Float32Array(x))! - console.timeEnd('getFloatArray z') - console.log(z.length, z[0], z[z.length-1]) + const n = atoms.count; + console.log('rowCount', n) - console.time('getIntArray residueNumber') - const residueNumber = atoms.residueNumber.toArray(0, n, x => new Int32Array(x))! - console.timeEnd('getIntArray residueNumber') - console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1]) - } else { - console.error('no atoms') - } - } + console.time('getFloatArray x') + const x = atoms.x.toArray(x => new Float32Array(x))! + console.timeEnd('getFloatArray x') + console.log(x.length, x[0], x[x.length-1]) + + console.time('getFloatArray y') + const y = atoms.y.toArray(x => new Float32Array(x))! + console.timeEnd('getFloatArray y') + console.log(y.length, y[0], y[y.length-1]) + + console.time('getFloatArray z') + const z = atoms.z.toArray(x => new Float32Array(x))! + console.timeEnd('getFloatArray z') + console.log(z.length, z[0], z[z.length-1]) + + console.time('getIntArray residueNumber') + const residueNumber = atoms.residueNumber.toArray(x => new Int32Array(x))! + console.timeEnd('getIntArray residueNumber') + console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1]) });