Skip to content
Snippets Groups Projects
Commit 6979ae82 authored by David Sehnal's avatar David Sehnal
Browse files

mmCIF schema

parent c5b1b84e
No related branches found
No related tags found
No related merge requests found
Showing with 487 additions and 237 deletions
...@@ -83,4 +83,26 @@ export function DefaultUndefinedField(rowCount: number): Field { ...@@ -83,4 +83,26 @@ export function DefaultUndefinedField(rowCount: number): Field {
toIntArray: (p) => Column.createArray(rowCount, p).array, toIntArray: (p) => Column.createArray(rowCount, p).array,
toFloatArray: (p) => Column.createArray(rowCount, p).array toFloatArray: (p) => Column.createArray(rowCount, p).array
}; };
}
export function getMatrix(category: Category, field: string, rows: number, cols: number, row: number) {
const ret: number[][] = [];
for (let i = 0; i < rows; i++) {
const r: number[] = [];
for (let j = 0; j < cols; j++) {
const f = category.getField(`${field}[${i + 1}][${j + 1}]`);
r[j] = f ? f.float(row) : 0.0;
}
ret[i] = r;
}
return ret;
}
export function getVector(category: Category, field: string, rows: number, row: number) {
const ret: number[] = [];
for (let i = 0; i < rows; i++) {
const f = category.getField(`${field}[${i + 1}]`);
ret[i] = f ? f.float(row) : 0.0;
}
return ret;
} }
\ No newline at end of file
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import parseText from './text-parser'
import { apply as applySchema } from './schema'
import mmCIF from './schema/mmcif'
export default {
parseText,
applySchema,
schema: { mmCIF }
}
\ No newline at end of file
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import * as Data from './data-model' import * as Data from './data-model'
import * as Column from '../common/column' import * as Column from '../common/column'
import StringPool from '../../utils/short-string-pool'
/** /**
* A schema defines the shape of categories and fields. * A schema defines the shape of categories and fields.
...@@ -47,27 +48,21 @@ export type Category<Fields> = Fields & { ...@@ -47,27 +48,21 @@ export type Category<Fields> = Fields & {
} }
export namespace Category { export namespace Category {
export type Schema = { '@alias'?: string } & { [field: string]: Field.Schema<any> } export type Schema = { [field: string]: Field.Schema<any> }
export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }> export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }>
} }
// export interface Field<T> {
// readonly isDefined: boolean,
// value(row: number): T,
// presence(row: number): Data.ValuePresence,
// areValuesEqual(rowA: number, rowB: number): boolean,
// stringEquals(row: number, value: string | null): boolean,
// /** Converts the selected row range to an array. ctor might or might not be called depedning on the source data format. */
// toArray(params?: Column.ToArrayParams): ReadonlyArray<T>
// }
export namespace Field { export namespace Field {
export interface Schema<T> { type: T, ctor: (field: Data.Field) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string }; export interface Schema<T> { type: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string } export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string }
export function alias(name: string): Schema<any> { return { alias: name } as any; }
export function pooledStr(spec?: Spec) { return createSchema(spec, PooledStr); }
export function str(spec?: Spec) { return createSchema(spec, Str); } export function str(spec?: Spec) { return createSchema(spec, Str); }
export function int(spec?: Spec) { return createSchema(spec, Int); } export function int(spec?: Spec) { return createSchema(spec, Int); }
export function float(spec?: Spec) { return createSchema(spec, Float); } export function float(spec?: Spec) { return createSchema(spec, Float); }
export function vector(rows: number, spec?: Spec) { return createSchema(spec, Vector(rows)); }
export function matrix(rows: number, cols: number, spec?: Spec) { return createSchema(spec, Matrix(rows, cols)); }
function create<T>(field: Data.Field, value: (row: number) => T, toArray: Column.Column<T>['toArray']): Column.Column<T> { function create<T>(field: Data.Field, value: (row: number) => T, toArray: Column.Column<T>['toArray']): Column.Column<T> {
const presence = field.presence; const presence = field.presence;
...@@ -81,11 +76,31 @@ export namespace Field { ...@@ -81,11 +76,31 @@ export namespace Field {
}; };
} }
function PooledStr(field: Data.Field) {
const pool = StringPool.create();
const value = (row: number) => StringPool.get(pool, field.str(row));
const array = (params?: Column.ToArrayParams) => Column.createAndFillArray(field.rowCount, value, params);
return create<string>(field, value, array);
}
function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); } function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); }
function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); } function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); }
function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); } function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); }
function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Column.Column<T>): Schema<T> { function Vector(rows: number) {
return function(field: Data.Field, category: Data.Category, key: string) {
const value = (row: number) => Data.getVector(category, key, rows, row);
return create(field, value, params => Column.createAndFillArray(field.rowCount, value, params));
}
}
function Matrix(rows: number, cols: number) {
return function(field: Data.Field, category: Data.Category, key: string) {
const value = (row: number) => Data.getMatrix(category, key, rows, cols, row);
return create(field, value, params => Column.createAndFillArray(field.rowCount, value, params));
}
}
function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>): Schema<T> {
return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias }; return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
} }
} }
...@@ -111,8 +126,9 @@ class _Category implements Category<any> { // tslint:disable-line:class-name ...@@ -111,8 +126,9 @@ class _Category implements Category<any> { // tslint:disable-line:class-name
Object.defineProperty(this, k, { Object.defineProperty(this, k, {
get: function() { get: function() {
if (cache[k]) return cache[k]; if (cache[k]) return cache[k];
const field = _category.getField(s.alias || k) || s.undefinedField(_category.rowCount); const name = s.alias || k;
cache[k] = s.ctor(field); const field = _category.getField(name) || s.undefinedField(_category.rowCount);
cache[k] = s.ctor(field, _category, name);
return cache[k]; return cache[k];
}, },
enumerable: true, enumerable: true,
...@@ -127,6 +143,8 @@ function createBlock(schema: Block.Schema, block: Data.Block): any { ...@@ -127,6 +143,8 @@ function createBlock(schema: Block.Schema, block: Data.Block): any {
} }
function createCategory(key: string, schema: Category.Schema, block: Data.Block) { function createCategory(key: string, schema: Category.Schema, block: Data.Block) {
const cat = block.categories[schema['@alias'] || key]; const alias = (schema['@alias'] && schema['@alias'].alias) || key;
const name = alias[0] === '_' ? alias : '_' + alias;
const cat = block.categories[name];
return new _Category(cat || Data.Category.Empty, schema, !!cat); return new _Category(cat || Data.Category.Empty, schema, !!cat);
} }
\ No newline at end of file
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { Field } from '../schema'
const pooledStr = Field.pooledStr();
const str = Field.str();
const int = Field.int();
const float = Field.float();
const entry = {
id: str
}
const entity = {
id: str,
type: str as Field.Schema<'polymer' | 'non-polymer' | 'water'>,
src_method: str,
pdbx_description: str,
formula_weight: float,
pdbx_number_of_molecules: int,
details: str,
pdbx_mutation: str,
pdbx_fragment: str,
pdbx_ec: str
}
const exptl = {
entry_id: str,
method: str
}
const cell = {
entry_id: str,
length_a: float,
length_b: float,
length_c: float,
angle_alpha: float,
angle_beta: float,
angle_gamma: float,
Z_PDB: int,
pdbx_unique_axis: str
}
const symmetry = {
entry_id: str,
space_group_name_HM: Field.str({ alias: 'space_group_name_H-M' }),
pdbx_full_space_group_name_HM: Field.str({ alias: 'pdbx_full_space_group_name_H-M' }),
cell_setting: str,
Int_Tables_number: int,
space_group_name_Hall: str
}
const struct_conf = {
conf_type_id: str,
id: str,
pdbx_PDB_helix_id: int,
beg_label_comp_id: pooledStr,
beg_label_asym_id: pooledStr,
beg_label_seq_id: int,
pdbx_beg_PDB_ins_code: pooledStr,
end_label_comp_id: pooledStr,
end_label_asym_id: pooledStr,
end_label_seq_id: int,
pdbx_end_PDB_ins_code: pooledStr,
beg_auth_comp_id: pooledStr,
beg_auth_asym_id: pooledStr,
beg_auth_seq_id: int,
end_auth_comp_id: pooledStr,
end_auth_asym_id: pooledStr,
end_auth_seq_id: int,
pdbx_PDB_helix_class: int,
details: str,
pdbx_PDB_helix_length: int
}
const struct_sheet_range = {
sheet_id: pooledStr,
id: int,
beg_label_comp_id: pooledStr,
beg_label_asym_id: pooledStr,
beg_label_seq_id: int,
pdbx_beg_PDB_ins_code: pooledStr,
end_label_comp_id: pooledStr,
end_label_asym_id: pooledStr,
end_label_seq_id: int,
pdbx_end_PDB_ins_code: pooledStr,
beg_auth_comp_id: pooledStr,
beg_auth_asym_id: pooledStr,
beg_auth_seq_id: int,
end_auth_comp_id: pooledStr,
end_auth_asym_id: pooledStr,
end_auth_seq_id: int
}
type StructConnTypeId =
| 'covale'
| 'covale_base'
| 'covale_phosphate'
| 'covale_sugar'
| 'disulf'
| 'hydrog'
| 'metalc'
| 'mismat'
| 'modres'
| 'saltbr'
type BondValueOrder =
| 'SING'
| 'DOUB'
| 'TRIP'
| 'QUAD'
const struct_conn = {
id: str,
conn_type_id: pooledStr as Field.Schema<StructConnTypeId>,
pdbx_PDB_id: str,
ptnr1_label_asym_id: pooledStr,
ptnr1_label_comp_id: pooledStr,
ptnr1_label_seq_id: int,
ptnr1_label_atom_id: pooledStr,
pdbx_ptnr1_label_alt_id: pooledStr,
pdbx_ptnr1_PDB_ins_code: pooledStr,
pdbx_ptnr1_standard_comp_id: pooledStr,
ptnr1_symmetry: pooledStr,
ptnr2_label_asym_id: pooledStr,
ptnr2_label_comp_id: pooledStr,
ptnr2_label_seq_id: int,
ptnr2_label_atom_id: pooledStr,
pdbx_ptnr2_label_alt_id: pooledStr,
pdbx_ptnr2_PDB_ins_code: pooledStr,
ptnr1_auth_asym_id: pooledStr,
ptnr1_auth_comp_id: pooledStr,
ptnr1_auth_seq_id: int,
ptnr2_auth_asym_id: pooledStr,
ptnr2_auth_comp_id: pooledStr,
ptnr2_auth_seq_id: int,
ptnr2_symmetry: pooledStr,
pdbx_ptnr3_label_atom_id: pooledStr,
pdbx_ptnr3_label_seq_id: int,
pdbx_ptnr3_label_comp_id: pooledStr,
pdbx_ptnr3_label_asym_id: pooledStr,
pdbx_ptnr3_label_alt_id: pooledStr,
pdbx_ptnr3_PDB_ins_code: pooledStr,
details: pooledStr,
pdbx_dist_value: float,
pdbx_value_order: pooledStr as Field.Schema<BondValueOrder>
}
const struct_conn_type = {
id: str as Field.Schema<StructConnTypeId>,
criteria: str,
reference: str
}
const chem_comp_bond = {
comp_id: pooledStr,
pdbx_stereo_config: pooledStr,
pdbx_ordinal: int,
pdbx_aromatic_flag: pooledStr as Field.Schema<'Y' | 'N'>,
atom_id_1: pooledStr,
atom_id_2: pooledStr,
value_order: pooledStr as Field.Schema<BondValueOrder>
}
const pdbx_struct_assembly = {
id: str,
details: str,
method_details: str,
oligomeric_details: str,
oligomeric_count: int
}
const pdbx_struct_assembly_gen = {
assembly_id: str,
oper_expression: str,
asym_id_list: str
}
const pdbx_struct_oper_list = {
id: str,
type: str,
name: str,
symmetry_operation: str,
matrix: Field.matrix(3, 3),
vector: Field.vector(3)
}
const pdbx_struct_mod_residue = {
id: int,
label_asym_id: pooledStr,
label_seq_id: int,
label_comp_id: pooledStr,
auth_asym_id: pooledStr,
auth_seq_id: int,
auth_comp_id: pooledStr,
PDB_ins_code: pooledStr,
parent_comp_id: pooledStr,
details: str
}
const atom_site = {
group_PDB: pooledStr,
id: int,
type_symbol: pooledStr,
label_atom_id: pooledStr,
label_alt_id: pooledStr,
label_comp_id: pooledStr,
label_asym_id: pooledStr,
label_entity_id: pooledStr,
label_seq_id: int,
pdbx_PDB_ins_code: pooledStr,
pdbx_formal_charge: pooledStr,
Cartn_x: float,
Cartn_y: float,
Cartn_z: float,
occupancy: float,
B_iso_or_equiv: float,
auth_atom_id: pooledStr,
auth_comp_id: pooledStr,
auth_asym_id: pooledStr,
auth_seq_id: int,
pdbx_PDB_model_num: int
}
const schema = {
entry,
entity,
exptl,
cell,
symmetry,
struct_conf,
struct_sheet_range,
struct_conn,
struct_conn_type,
chem_comp_bond,
pdbx_struct_assembly,
pdbx_struct_assembly_gen,
pdbx_struct_oper_list,
pdbx_struct_mod_residue,
atom_site
};
export default schema;
\ No newline at end of file
...@@ -5,30 +5,31 @@ ...@@ -5,30 +5,31 @@
*/ */
import * as Column from '../common/column' import * as Column from '../common/column'
import * as TokenColumn from '../common/text/column/token'
import { Tokens } from '../common/text/tokenizer'
import * as Data from './data-model' import * as Data from './data-model'
import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser' import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser'
import StringPool from '../../utils/short-string-pool'
export default function CifTextField(data: string, tokens: ArrayLike<number>, rowCount: number): Data.Field { export default function CifTextField(tokens: Tokens, rowCount: number): Data.Field {
const stringPool = StringPool.create(); const { data, indices } = tokens;
const str: Data.Field['str'] = row => { const str: Data.Field['str'] = row => {
const ret = StringPool.get(stringPool, data.substring(tokens[2 * row], tokens[2 * row + 1])); const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
if (ret === '.' || ret === '?') return ''; if (ret === '.' || ret === '?') return '';
return ret; return ret;
}; };
const int: Data.Field['int'] = row => { const int: Data.Field['int'] = row => {
return fastParseInt(data, tokens[2 * row], tokens[2 * row + 1]) || 0; return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
}; };
const float: Data.Field['float'] = row => { const float: Data.Field['float'] = row => {
return fastParseFloat(data, tokens[2 * row], tokens[2 * row + 1]) || 0; return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
}; };
const presence: Data.Field['presence'] = row => { const presence: Data.Field['presence'] = row => {
const s = tokens[2 * row]; const s = indices[2 * row];
if (tokens[2 * row + 1] - s !== 1) return Data.ValuePresence.Present; if (indices[2 * row + 1] - s !== 1) return Data.ValuePresence.Present;
const v = data.charCodeAt(s); const v = data.charCodeAt(s);
if (v === 46 /* . */) return Data.ValuePresence.NotSpecified; if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
if (v === 63 /* ? */) return Data.ValuePresence.Unknown; if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
...@@ -42,43 +43,20 @@ export default function CifTextField(data: string, tokens: ArrayLike<number>, ro ...@@ -42,43 +43,20 @@ export default function CifTextField(data: string, tokens: ArrayLike<number>, ro
int, int,
float, float,
presence, presence,
areValuesEqual(rowA, rowB) { areValuesEqual: TokenColumn.areValuesEqualProvider(tokens),
const aS = tokens[2 * rowA], bS = tokens[2 * rowB]; stringEquals(row, v) {
const len = tokens[2 * rowA + 1] - aS; const s = indices[2 * row];
if (len !== tokens[2 * rowB + 1] - bS) return false; const value = v || '';
for (let i = 0; i < len; i++) { if (!value && presence(row) !== Data.ValuePresence.Present) return true;
if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
return false;
}
}
return true;
},
stringEquals(row, value) {
const s = tokens[2 * row];
if (!value) return presence(row) !== Data.ValuePresence.Present;
const len = value.length; const len = value.length;
if (len !== tokens[2 * row + 1] - s) return false; if (len !== indices[2 * row + 1] - s) return false;
for (let i = 0; i < len; i++) { for (let i = 0; i < len; i++) {
if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false; if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
} }
return true; return true;
}, },
toStringArray(params) { toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); },
const { array, start } = Column.createArray(rowCount, params); toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); },
return fillArrayValues(str, array, start); toFloatArray(params) { return Column.createAndFillArray(rowCount, float, params); }
},
toIntArray(params) {
const { array, start } = Column.createArray(rowCount, params);
return fillArrayValues(int, array, start);
},
toFloatArray(params) {
const { array, start } = Column.createArray(rowCount, params);
return fillArrayValues(float, array, start);
}
} }
}
function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
return target;
} }
\ No newline at end of file
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
import * as Data from './data-model' import * as Data from './data-model'
import Field from './text-field' import Field from './text-field'
import { Tokens } from '../common/text/tokenizer' import { Tokens, TokenBuilder } from '../common/text/tokenizer'
import Result from '../result' import Result from '../result'
/** /**
...@@ -430,7 +430,7 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D ...@@ -430,7 +430,7 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
errorMessage: 'Expected value.' errorMessage: 'Expected value.'
} }
} }
fields[fieldName] = Field(tokenizer.data, [tokenizer.currentTokenStart, tokenizer.currentTokenEnd], 1); fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.currentTokenStart, tokenizer.currentTokenEnd], count: 1 }, 1);
moveNext(tokenizer); moveNext(tokenizer);
} }
...@@ -461,11 +461,11 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat ...@@ -461,11 +461,11 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32; const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
const tokens: Tokens[] = []; const tokens: Tokens[] = [];
const fieldCount = fieldNames.length; const fieldCount = fieldNames.length;
for (let i = 0; i < fieldCount; i++) tokens[i] = Tokens.create(rowCountEstimate); for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate);
let tokenCount = 0; let tokenCount = 0;
while (tokenizer.currentTokenType === CifTokenType.Value) { while (tokenizer.currentTokenType === CifTokenType.Value) {
Tokens.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd); TokenBuilder.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
moveNext(tokenizer); moveNext(tokenizer);
} }
...@@ -480,7 +480,7 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat ...@@ -480,7 +480,7 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
const rowCount = (tokenCount / fieldCount) | 0; const rowCount = (tokenCount / fieldCount) | 0;
const fields = Object.create(null); const fields = Object.create(null);
for (let i = 0; i < fieldCount; i++) { for (let i = 0; i < fieldCount; i++) {
fields[fieldNames[i]] = Field(tokenizer.data, tokens[i].indices, rowCount); fields[fieldNames[i]] = Field(tokens[i], rowCount);
} }
categories[name] = Data.Category(rowCount, fields); categories[name] = Data.Category(rowCount, fields);
......
...@@ -53,4 +53,18 @@ export function createArray(rowCount: number, params?: ToArrayParams) { ...@@ -53,4 +53,18 @@ export function createArray(rowCount: number, params?: ToArrayParams) {
const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0; const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0;
const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount; const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount;
return { array: new c(e - s) as any[], start: s, end: e }; return { array: new c(e - s) as any[], start: s, end: e };
} }
\ No newline at end of file
/** A helped function for Column.toArray */
export function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
return target;
}
/** A helped function for Column.toArray */
export function createAndFillArray(rowCount: number, value: (row: number) => any, params?: ToArrayParams) {
const { array, start } = createArray(rowCount, params);
return fillArrayValues(value, array, start);
}
// /*
// * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
// *
// * @author David Sehnal <david.sehnal@gmail.com>
// */
// import * as Data from '../../../../data/data'
// import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../number-parser'
// import { Tokens } from '../tokenizer'
// import ShortStringPool from '../../../../utils/short-string-pool'
// export function createTokenFields(data: string, fields: string[], tokens: Tokens): { [name: string]: Data.Field } {
// const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices };
// const categoryFields = Object.create(null);
// for (let i = 0; i < fi.fieldCount; ++i) {
// categoryFields[fields[i]] = TokenField(fi, i);
// }
// return categoryFields;
// }
// export interface TokenFieldInfo {
// data: string,
// tokens: ArrayLike<number>,
// fieldCount: number,
// isCif?: boolean
// }
// export function TokenField(info: TokenFieldInfo, index: number): Data.Field {
// const { data, tokens, fieldCount, isCif = false } = info;
// const stringPool = ShortStringPool.create();
// const str: Data.Field['str'] = isCif ? row => {
// const i = (row * fieldCount + index) * 2;
// const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
// if (ret === '.' || ret === '?') return null;
// return ret;
// } : row => {
// const i = (row * fieldCount + index) * 2;
// return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
// };
// const int: Data.Field['int'] = row => {
// const i = (row * fieldCount + index) * 2;
// return fastParseInt(data, tokens[i], tokens[i + 1]) || 0;
// };
// const float: Data.Field['float'] = row => {
// const i = (row * fieldCount + index) * 2;
// return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0;
// };
// const presence: Data.Field['presence'] = isCif ? row => {
// const i = 2 * (row * fieldCount + index);
// const s = tokens[i];
// if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present;
// const v = data.charCodeAt(s);
// if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
// if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
// return Data.ValuePresence.Present;
// } : row => {
// const i = 2 * (row * fieldCount + index);
// return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present
// };
// return {
// isDefined: true,
// str,
// int,
// float,
// value: str,
// presence,
// areValuesEqual: (rowA, rowB) => {
// const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI];
// const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI];
// const len = tokens[aI + 1] - aS;
// if (len !== tokens[bI + 1] - bS) return false;
// for (let i = 0; i < len; i++) {
// if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
// return false;
// }
// }
// return true;
// },
// stringEquals: (row, value) => {
// const aI = (row * fieldCount + index) * 2;
// const s = tokens[aI];
// if (!value) return presence(row) !== Data.ValuePresence.Present;
// const len = value.length;
// if (len !== tokens[aI + 1] - s) return false;
// for (let i = 0; i < len; i++) {
// if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
// }
// return true;
// },
// toStringArray: (startRow, endRowExclusive, ctor) => {
// const count = endRowExclusive - startRow;
// const ret = ctor(count) as any;
// for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); }
// return ret;
// },
// toIntArray: (startRow, endRowExclusive, ctor) => {
// const count = endRowExclusive - startRow;
// const ret = ctor(count) as any;
// for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); }
// return ret;
// },
// toFloatArray: (startRow, endRowExclusive, ctor) => {
// const count = endRowExclusive - startRow;
// const ret = ctor(count) as any;
// for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); }
// return ret;
// }
// }
// }
\ No newline at end of file
...@@ -4,46 +4,41 @@ ...@@ -4,46 +4,41 @@
* @author David Sehnal <david.sehnal@gmail.com> * @author David Sehnal <david.sehnal@gmail.com>
*/ */
import { Column, ColumnType, createArray } from '../../column' import { Column, ColumnType, createAndFillArray } from '../../column'
import { trimStr, Lines } from '../tokenizer' import { trimStr, Tokens } from '../tokenizer'
import { parseIntSkipLeadingWhitespace, parseFloatSkipLeadingWhitespace } from '../number-parser' import { parseIntSkipLeadingWhitespace, parseFloatSkipLeadingWhitespace } from '../number-parser'
import StringPool from '../../../../utils/short-string-pool' import StringPool from '../../../../utils/short-string-pool'
export default function FixedColumnProvider(lines: Lines) { export default function FixedColumnProvider(lines: Tokens) {
return function<T extends ColumnType>(offset: number, width: number, type: T) { return function<T extends ColumnType>(offset: number, width: number, type: T) {
return FixedColumn(lines, offset, width, type); return FixedColumn(lines, offset, width, type);
} }
} }
function fillArrayValues(value: (row: number) => any, target: any[], start: number) { export function FixedColumn<T extends ColumnType>(lines: Tokens, offset: number, width: number, type: T): Column<T['@type']> {
for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i); const { data, indices, count: rowCount } = lines;
return target;
}
export function FixedColumn<T extends ColumnType>(lines: Lines, offset: number, width: number, type: T): Column<T['@type']> {
const { data, tokens, count: rowCount } = lines;
const { kind } = type; const { kind } = type;
const pool = kind === 'pooled-str' ? StringPool.create() : void 0; const pool = kind === 'pooled-str' ? StringPool.create() : void 0;
const value: Column<T['@type']>['value'] = kind === 'str' ? row => { const value: Column<T['@type']>['value'] = kind === 'str' ? row => {
let s = tokens[2 * row] + offset, le = tokens[2 * row + 1]; let s = indices[2 * row] + offset, le = indices[2 * row + 1];
if (s >= le) return ''; if (s >= le) return '';
let e = s + width; let e = s + width;
if (e > le) e = le; if (e > le) e = le;
return trimStr(data, s, e); return trimStr(data, s, e);
} : kind === 'pooled-str' ? row => { } : kind === 'pooled-str' ? row => {
let s = tokens[2 * row] + offset, le = tokens[2 * row + 1]; let s = indices[2 * row] + offset, le = indices[2 * row + 1];
if (s >= le) return ''; if (s >= le) return '';
let e = s + width; let e = s + width;
if (e > le) e = le; if (e > le) e = le;
return StringPool.get(pool!, trimStr(data, s, e)); return StringPool.get(pool!, trimStr(data, s, e));
} : kind === 'int' ? row => { } : kind === 'int' ? row => {
const s = tokens[2 * row] + offset; const s = indices[2 * row] + offset;
if (s > tokens[2 * row + 1]) return 0; if (s > indices[2 * row + 1]) return 0;
return parseIntSkipLeadingWhitespace(data, s, s + width); return parseIntSkipLeadingWhitespace(data, s, s + width);
} : row => { } : row => {
const s = tokens[2 * row] + offset; const s = indices[2 * row] + offset;
if (s > tokens[2 * row + 1]) return 0; if (s > indices[2 * row + 1]) return 0;
return parseFloatSkipLeadingWhitespace(data, s, s + width); return parseFloatSkipLeadingWhitespace(data, s, s + width);
}; };
return { return {
...@@ -51,10 +46,7 @@ export function FixedColumn<T extends ColumnType>(lines: Lines, offset: number, ...@@ -51,10 +46,7 @@ export function FixedColumn<T extends ColumnType>(lines: Lines, offset: number,
rowCount, rowCount,
value, value,
isValueDefined(row) { return true; }, isValueDefined(row) { return true; },
toArray(params) { toArray(params) { return createAndFillArray(rowCount, value, params); },
const { array, start } = createArray(rowCount, params);
return fillArrayValues(value, array, start);
},
areValuesEqual(rowA, rowB) { areValuesEqual(rowA, rowB) {
return value(rowA) === value(rowB); return value(rowA) === value(rowB);
} }
......
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { Column, ColumnType, createAndFillArray } from '../../column'
import { Tokens } from '../tokenizer'
import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../number-parser'
import StringPool from '../../../../utils/short-string-pool'
export default function TokenColumnProvider(tokens: Tokens) {
return function<T extends ColumnType>(type: T) {
return TokenColumn(tokens, type);
}
}
export function TokenColumn<T extends ColumnType>(tokens: Tokens, type: T): Column<T['@type']> {
const { data, indices, count: rowCount } = tokens;
const { kind } = type;
const pool = kind === 'pooled-str' ? StringPool.create() : void 0;
const value: Column<T['@type']>['value'] =
kind === 'str'
? row => data.substring(indices[2 * row], indices[2 * row + 1])
: kind === 'pooled-str'
? row => StringPool.get(pool!, data.substring(indices[2 * row], indices[2 * row + 1]))
: kind === 'int'
? row => fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0
: row => fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
return {
isDefined: true,
rowCount,
value,
isValueDefined(row) { return true; },
toArray(params) { return createAndFillArray(rowCount, value, params); },
areValuesEqual: areValuesEqualProvider(tokens)
};
}
export function areValuesEqualProvider(tokens: Tokens) {
const { data, indices } = tokens;
return function(rowA: number, rowB: number) {
const aS = indices[2 * rowA], bS = indices[2 * rowB];
const len = indices[2 * rowA + 1] - aS;
if (len !== indices[2 * rowB + 1] - bS) return false;
for (let i = 0; i < len; i++) {
if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
return false;
}
}
return true;
}
}
\ No newline at end of file
...@@ -17,10 +17,10 @@ export interface Tokenizer { ...@@ -17,10 +17,10 @@ export interface Tokenizer {
currentTokenEnd: number currentTokenEnd: number
} }
export interface Lines { export interface Tokens {
data: string, data: string,
count: number, count: number,
tokens: ArrayLike<number> indices: ArrayLike<number>
} }
export function Tokenizer(data: string): Tokenizer { export function Tokenizer(data: string): Tokenizer {
...@@ -80,15 +80,21 @@ export namespace Tokenizer { ...@@ -80,15 +80,21 @@ export namespace Tokenizer {
} }
/** Advance the state by the given number of lines and return line starts/ends as tokens. */ /** Advance the state by the given number of lines and return line starts/ends as tokens. */
export function readLines(state: Tokenizer, count: number): Lines { export function readLine(state: Tokenizer): string {
const lineTokens = Tokens.create(count * 2); markLine(state);
return getTokenString(state);
}
/** Advance the state by the given number of lines and return line starts/ends as tokens. */
export function readLines(state: Tokenizer, count: number): Tokens {
const lineTokens = TokenBuilder.create(state, count * 2);
for (let i = 0; i < count; i++) { for (let i = 0; i < count; i++) {
markLine(state); markLine(state);
Tokens.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd); TokenBuilder.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd);
} }
return { data: state.data, count, tokens: lineTokens.indices }; return { data: state.data, count, indices: lineTokens.indices };
} }
/** /**
...@@ -170,38 +176,43 @@ export function trimStr(data: string, start: number, end: number) { ...@@ -170,38 +176,43 @@ export function trimStr(data: string, start: number, end: number) {
return data.substring(s, e + 1); return data.substring(s, e + 1);
} }
export interface Tokens { export namespace TokenBuilder {
indicesLenMinus2: number, interface Builder extends Tokens {
count: number, offset: number,
indices: Uint32Array indices: Uint32Array,
} indicesLenMinus2: number
}
export namespace Tokens { function resize(builder: Builder) {
function resize(tokens: Tokens) {
// scale the size using golden ratio, because why not. // scale the size using golden ratio, because why not.
const newBuffer = new Uint32Array((1.61 * tokens.indices.length) | 0); const newBuffer = new Uint32Array((1.61 * builder.indices.length) | 0);
newBuffer.set(tokens.indices); newBuffer.set(builder.indices);
tokens.indices = newBuffer; builder.indices = newBuffer;
tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0; builder.indicesLenMinus2 = (newBuffer.length - 2) | 0;
} }
export function add(tokens: Tokens, start: number, end: number) { export function add(tokens: Tokens, start: number, end: number) {
if (tokens.count > tokens.indicesLenMinus2) { const builder = tokens as Builder;
resize(tokens); if (builder.offset > builder.indicesLenMinus2) {
resize(builder);
} }
tokens.indices[tokens.count++] = start; builder.indices[builder.offset++] = start;
tokens.indices[tokens.count++] = end; builder.indices[builder.offset++] = end;
tokens.count++;
} }
export function addUnchecked(tokens: Tokens, start: number, end: number) { export function addUnchecked(tokens: Tokens, start: number, end: number) {
tokens.indices[tokens.count++] = start; (tokens as Builder).indices[(tokens as Builder).offset++] = start;
tokens.indices[tokens.count++] = end; (tokens as Builder).indices[(tokens as Builder).offset++] = end;
tokens.count++;
} }
export function create(size: number): Tokens { export function create(tokenizer: Tokenizer, size: number): Tokens {
return { return <Builder>{
data: tokenizer.data,
indicesLenMinus2: (size - 2) | 0, indicesLenMinus2: (size - 2) | 0,
count: 0, count: 0,
offset: 0,
indices: new Uint32Array(size) indices: new Uint32Array(size)
} }
} }
......
...@@ -40,14 +40,11 @@ function State(tokenizer: Tokenizer): State { ...@@ -40,14 +40,11 @@ function State(tokenizer: Tokenizer): State {
*/ */
function handleTitleString(state: State) { function handleTitleString(state: State) {
const { tokenizer, header } = state; const { tokenizer, header } = state;
Tokenizer.markLine(tokenizer); let line = Tokenizer.readLine(tokenizer);
let line = Tokenizer.getTokenString(tokenizer);
// skip potential empty lines... // skip potential empty lines...
if (line.trim().length === 0) { if (line.trim().length === 0) {
Tokenizer.markLine(tokenizer); line = Tokenizer.readLine(tokenizer);
line = Tokenizer.getTokenString(tokenizer);
} }
const timeOffset = line.lastIndexOf('t='); const timeOffset = line.lastIndexOf('t=');
...@@ -92,7 +89,7 @@ function handleAtoms(state: State): Schema.Atoms { ...@@ -92,7 +89,7 @@ function handleAtoms(state: State): Schema.Atoms {
const { tokenizer, numberOfAtoms } = state; const { tokenizer, numberOfAtoms } = state;
const lines = Tokenizer.readLines(tokenizer, numberOfAtoms); const lines = Tokenizer.readLines(tokenizer, numberOfAtoms);
const positionSample = tokenizer.data.substring(lines.tokens[0], lines.tokens[1]).substring(20); const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
const precisions = positionSample.match(/\.\d+/g)!; const precisions = positionSample.match(/\.\d+/g)!;
const hasVelocities = precisions.length === 6; const hasVelocities = precisions.length === 6;
...@@ -133,8 +130,7 @@ function handleAtoms(state: State): Schema.Atoms { ...@@ -133,8 +130,7 @@ function handleAtoms(state: State): Schema.Atoms {
*/ */
function handleBoxVectors(state: State) { function handleBoxVectors(state: State) {
const { tokenizer } = state; const { tokenizer } = state;
Tokenizer.markLine(tokenizer); const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g);
state.header.box = [+values[0], +values[1], +values[2]]; state.header.box = [+values[0], +values[1], +values[2]];
} }
......
...@@ -10,8 +10,8 @@ import * as Schema from '../cif/schema' ...@@ -10,8 +10,8 @@ import * as Schema from '../cif/schema'
const columnData = `123abc`; const columnData = `123abc`;
const intField = TextField(columnData, [0, 1, 1, 2, 2, 3], 3); const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3);
const strField = TextField(columnData, [3, 4, 4, 5, 5, 6], 3); const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3);
const testBlock = Data.Block({ const testBlock = Data.Block({
'atoms': Data.Category(3, { 'atoms': Data.Category(3, {
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
*/ */
import FixedColumn from '../common/text/column/fixed' import FixedColumn from '../common/text/column/fixed'
import TokenColumn from '../common/text/column/token'
import { ColumnType } from '../common/column' import { ColumnType } from '../common/column'
const lines = [ const lines = [
...@@ -16,7 +17,7 @@ const lines = [ ...@@ -16,7 +17,7 @@ const lines = [
' 5' ' 5'
] ]
const data = lines.join('\n'); const linesData = lines.join('\n');
const linesTokens = (function () { const linesTokens = (function () {
const tokens: number[] = []; const tokens: number[] = [];
...@@ -25,12 +26,12 @@ const linesTokens = (function () { ...@@ -25,12 +26,12 @@ const linesTokens = (function () {
tokens.push(last, last + l.length); tokens.push(last, last + l.length);
last += l.length + 1; last += l.length + 1;
} }
if (tokens[tokens.length - 1] > data.length) tokens[tokens.length - 1] = data.length; if (tokens[tokens.length - 1] > linesData.length) tokens[tokens.length - 1] = linesData.length;
return tokens; return tokens;
}()); }());
describe('fixed text column', () => { describe('fixed text column', () => {
const col = FixedColumn({ data, tokens: linesTokens, count: lines.length }); const col = FixedColumn({ data: linesData, indices: linesTokens, count: lines.length });
const col1 = col(0, 5, ColumnType.float); const col1 = col(0, 5, ColumnType.float);
const col2 = col(5, 4, ColumnType.str); const col2 = col(5, 4, ColumnType.str);
it('number', () => { it('number', () => {
...@@ -48,3 +49,14 @@ describe('fixed text column', () => { ...@@ -48,3 +49,14 @@ describe('fixed text column', () => {
expect(col2.value(4)).toBe(''); expect(col2.value(4)).toBe('');
}) })
}); });
describe('token text column', () => {
const tokensData = '321';
const col = TokenColumn({ data: tokensData, indices: [0, 1, 1, 2, 2, 3], count: 3 });
const col1 = col(ColumnType.int);
it('number', () => {
expect(col1.value(0)).toBe(3);
expect(col1.value(1)).toBe(2);
expect(col1.value(2)).toBe(1);
})
});
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
import * as fs from 'fs' import * as fs from 'fs'
import Gro from './reader/gro/parser' import Gro from './reader/gro/parser'
import CIF from './reader/cif/text-parser' import CIF from './reader/cif/index'
// const file = '1crn.gro' // const file = '1crn.gro'
// const file = 'water.gro' // const file = 'water.gro'
...@@ -81,7 +81,7 @@ export function _cif() { ...@@ -81,7 +81,7 @@ export function _cif() {
} }
console.time('parseCIF'); console.time('parseCIF');
const parsed = CIF(input); const parsed = CIF.parseText(input);
console.timeEnd('parseCIF'); console.timeEnd('parseCIF');
if (parsed.isError) { if (parsed.isError) {
console.log(parsed); console.log(parsed);
...@@ -92,7 +92,12 @@ export function _cif() { ...@@ -92,7 +92,12 @@ export function _cif() {
const atom_site = data.categories._atom_site; const atom_site = data.categories._atom_site;
console.log(atom_site.getField('Cartn_x')!.float(0)); console.log(atom_site.getField('Cartn_x')!.float(0));
console.log(atom_site.getField('label_atom_id')!.toStringArray()); //console.log(atom_site.getField('label_atom_id')!.toStringArray());
const mmcif = CIF.applySchema(CIF.schema.mmCIF, data);
console.log(mmcif.atom_site.Cartn_x.value(0));
console.log(mmcif.entity.type.toArray());
console.log(mmcif.pdbx_struct_oper_list.matrix.value(0));
}); });
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment