From 9cf93363ebda7af86a192bfa0edde920ab230b31 Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Thu, 21 Sep 2017 16:22:08 +0200 Subject: [PATCH] Data model for tables and schema --- .vscode/settings.json | 3 + src/data/data.ts | 61 ++++++++++++++++ src/data/schema.ts | 136 +++++++++++++++++++++++++++++++++++ src/data/spec/schema.spec.ts | 72 +++++++++++++++++++ 4 files changed, 272 insertions(+) create mode 100644 .vscode/settings.json create mode 100644 src/data/data.ts create mode 100644 src/data/schema.ts create mode 100644 src/data/spec/schema.spec.ts diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..00ad71fba --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "typescript.tsdk": "node_modules\\typescript\\lib" +} \ No newline at end of file diff --git a/src/data/data.ts b/src/data/data.ts new file mode 100644 index 000000000..ae0e170b1 --- /dev/null +++ b/src/data/data.ts @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export interface File { + readonly name?: string, + readonly blocks: ReadonlyArray<Block> +} + +export interface Block { + readonly header?: string, + readonly categories: { readonly [name: string]: Category } +} + +export interface Category { + readonly rowCount: number, + getField(name: string): Field | undefined +} + +export namespace Category { + export const Empty: Category = { rowCount: 0, getField(name: string) { return void 0; } }; +} + +export const enum ValuePresence { + Present = 0, + NotSpecified = 1, + Unknown = 2 +} + +export const enum ArrayKind { + String, + Float32, + Float64 +} + +export type FieldArray = number[] | Float32Array | Float64Array | Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array + +/** + * Implementation note: + * Always implement this as a "plain" object so that the functions are "closures" + * by default. This is to ensure that the schema access works without definiting + * additional closures. + */ +export interface Field { + readonly isDefined: boolean, + + str(row: number): string | null, + int(row: number): number, + float(row: number): number, + bin(row: number): Uint8Array | null, + + presence(row: number): ValuePresence, + + areValuesEqual(rowA: number, rowB: number): boolean, + stringEquals(row: number, value: string | null): boolean, + + toStringArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<string>, + toNumberArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number> +} \ No newline at end of file diff --git a/src/data/schema.ts b/src/data/schema.ts new file mode 100644 index 000000000..69184e0b5 --- /dev/null +++ b/src/data/schema.ts @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Data from './data' + +/** + * A schema defines the shape of categories and fields. + * + * @example: + * const atom_site = { + * '@alias': '_atom_site', + * label_atom_id: Field.str(), + * Cartn_x: Field.float(), + * Cartn_y: Field.float(), + * Cartn_z: Field.float(), + * } + * + * const mmCIF = { atom_site }; + */ +export type BlockDefinition = { [category: string]: CategoryDefinition } +export type CategoryDefinition = { '@alias'?: string } & { [field: string]: Field.Schema<any> } + +export type Schema<Definition extends BlockDefinition> = Block<{ [C in keyof Definition]: Category<{ [F in keyof Definition[C]]: Field<Definition[C][F]['type']> }> }> + +export function apply<T extends BlockDefinition>(schema: T, block: Data.Block): Schema<T> { + return createBlock(schema, block) as Schema<T>; +} + +export type Block<Categories> = Categories & { + readonly _header?: string, + /** For accessing 'non-standard' categories */ + _getCategory(name: string): Data.Category | undefined +} + +export type Category<Fields> = Fields & { + readonly _rowCount: number, + /** For accessing 'non-standard' fields */ + _getField(name: string): Data.Field | undefined +} + +export interface Field<T> { + readonly isDefined: boolean, + value(row: number): T, + presence(row: number): Data.ValuePresence, + areValuesEqual(rowA: number, rowB: number): boolean, + stringEquals(row: number, value: string | null): boolean, + /** Converts the selected row range to an array. ctor might or might not be called depedning on the source data format. */ + toArray(startRow: number, endRowExclusive: number, ctor: (size: number) => Data.FieldArray): ReadonlyArray<T> | undefined +} + +export namespace Field { + function create<T>(field: Data.Field, value: (row: number) => T, toArray: Field<T>['toArray']): Field<T> { + return { isDefined: field.isDefined, value, presence: field.presence, areValuesEqual: field.areValuesEqual, stringEquals: field.stringEquals, toArray }; + } + + function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); } + function Int(field: Data.Field) { return create(field, field.int, field.toNumberArray); } + function Float(field: Data.Field) { return create(field, field.float, field.toNumberArray); } + function Bin(field: Data.Field) { return create(field, field.bin, (s, e, ctor) => void 0); } + + const DefaultUndefined: Data.Field = { + isDefined: false, + str: row => null, + int: row => 0, + float: row => 0, + bin: row => null, + + presence: row => Data.ValuePresence.NotSpecified, + areValuesEqual: (rowA, rowB) => true, + stringEquals: (row, value) => value === null, + + toStringArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = null; } + return ret; + }, + toNumberArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any + }; + + export interface Schema<T> { type: T, ctor: (field: Data.Field) => Field<T>, undefinedField: Data.Field, alias?: string }; + export interface Spec { undefinedField?: Data.Field, alias?: string } + + function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Field<T>): Schema<T> { + return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || DefaultUndefined, alias: spec && spec.alias }; + } + + export function str(spec?: Spec) { return createSchema(spec, Str); } + export function int(spec?: Spec) { return createSchema(spec, Int); } + export function float(spec?: Spec) { return createSchema(spec, Float); } + export function bin(spec?: Spec) { return createSchema(spec, Bin); } +} + +class _Block implements Block<any> { // tslint:disable-line:class-name + header = this._block.header; + getCategory(name: string) { return this._block.categories[name]; } + constructor(private _block: Data.Block, schema: BlockDefinition) { + for (const k of Object.keys(schema)) { + Object.defineProperty(this, k, { value: createCategory(k, schema[k], _block), enumerable: true, writable: false, configurable: false }); + } + } +} + +class _Category implements Category<any> { // tslint:disable-line:class-name + _rowCount = this._category.rowCount; + _getField(name: string) { return this._category.getField(name); } + constructor(private _category: Data.Category, schema: CategoryDefinition) { + const fieldKeys = Object.keys(schema).filter(k => k !== '@alias'); + const cache = Object.create(null); + for (const k of fieldKeys) { + const s = schema[k]; + Object.defineProperty(this, k, { + get: function() { + if (cache[k]) return cache[k]; + const field = _category.getField(s.alias || k) || s.undefinedField; + cache[k] = s.ctor(field); + return cache[k]; + }, + enumerable: true, + configurable: false + }); + } + } +} + +function createBlock(schema: BlockDefinition, block: Data.Block): any { + return new _Block(block, schema); +} + +function createCategory(key: string, schema: CategoryDefinition, block: Data.Block) { + const cat = block.categories[schema['@alias'] || key] || Data.Category.Empty; + return new _Category(cat, schema); +} \ No newline at end of file diff --git a/src/data/spec/schema.spec.ts b/src/data/spec/schema.spec.ts new file mode 100644 index 000000000..8ca0e7c8a --- /dev/null +++ b/src/data/spec/schema.spec.ts @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Data from '../data' +import * as Schema from '../schema' + +function Field(values: any[]): Data.Field { + return { + isDefined: true, + str: row => '' + values[row], + int: row => +values[row] || 0, + float: row => +values[row] || 0, + bin: row => null, + + presence: row => Data.ValuePresence.Present, + areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB], + stringEquals: (row, value) => '' + values[row] === value, + + toStringArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = values[startRow + i]; } + return ret; + }, + toNumberArray: (startRow, endRowExclusive, ctor) => { + const count = endRowExclusive - startRow; + const ret = ctor(count) as any; + for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; } + return ret; + } + } +} + +class Category implements Data.Category { + getField(name: string) { return this.fields[name]; } + constructor(public rowCount: number, private fields: any) { } +} + +class Block implements Data.Block { + constructor(public categories: { readonly [name: string]: Data.Category }, public header?: string) { } +} + +const testBlock = new Block({ + 'atoms': new Category(2, { + x: Field([1, 2]), + name: Field(['C', 'O']) + }) +}); + +namespace TestSchema { + export const atoms = { x: Schema.Field.float(), name: Schema.Field.str() } + export const schema = { atoms } +} + +describe('schema', () => { + const data = Schema.apply(TestSchema.schema, testBlock); + it('property access', () => { + const { x, name } = data.atoms; + expect(x.value(0)).toBe(1); + expect(name.value(1)).toBe('O'); + }); + + it('toArray', () => { + const ret = data.atoms.x.toArray(0, 2, (s) => new Int32Array(s))!; + expect(ret.length).toBe(2); + expect(ret[0]).toBe(1); + expect(ret[1]).toBe(2); + }) +}); \ No newline at end of file -- GitLab