diff --git a/docs/cif-schemas.md b/docs/cif-schemas.md new file mode 100644 index 0000000000000000000000000000000000000000..7ff4633237a43c2ce33dae0bc299dc92a7a8958f --- /dev/null +++ b/docs/cif-schemas.md @@ -0,0 +1,85 @@ +How CIF schemas work +======== + +CIF representation (simplified): + +```ts +type Frame = (name: string) => Category | undefined // Frame is either a data block or a save frame +type Category = (name: string) => Field | undefined +type Field = { rowCount: number, getNumber: (row) => number, getString: (row) => string } +``` + +This is obviously not strongly typed and the "fields" don't know what type they are. To solve this, we create a type to describe what a field contains and how to map it to a "typed column": + +```ts +type FieldSchema<T> = { T: T /* remember the type */, createColumn: (field: Field) => Column<T> } +``` + +where column is just a simple interface that returns a value of ``T`` for a given row: + +```ts +type Column<T> = { rowCount: number, get: (row: number) => T } +``` + +Category schema is just an object whose properties are all instances of "field schemas", its "shape" has the type: + +```ts +type CategorySchema = { [fieldName: string]: FieldSchema<any> } +``` + +We can declare our first category "schema": + +```ts +const my_category = { + num_field: { T: 0 as number, createColumn: f => ({ rowCount: f.rowCount, get: f.getNumber }) } + str_field: { T: '' as string, createColumn: f => ({ rowCount: f.rowCount, get: f.getString }) } +} +``` + +Notice that the type of ``my_category`` is not specified. Assigning it explictly would hide the actual property names which we do not want. Moreover, the names of the properties must match the names of the fields in the actual category (optionally, a field ``alias`` can be added to the field schema). + +Given a category schema, we need to construct a type that defines the typed category itself: + +```ts +type TypedCategory<Schema extends CategorySchema> = { [F in keyof Schema]: Column<Schema[F]['T']> } +``` + +In other words, the type ``TypedCategory`` has a property of type ``Column<_>`` for each property of the schema. ``Schema[F]['T']`` just says: extract the type of property called ``T`` from property ``F`` in ``Schema`` (see [mapped types in Typescript](https://www.typescriptlang.org/docs/handbook/advanced-types.html)). ``Schema extends CategorySchema`` says that all properties of ``Schema`` must be of type ``FieldSchema<any>``. + +Finally, we just define a mapping, ``toTypedCategory``: + +```ts +function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Category): TypedCategory<Schema> { + const typedCategory: any = {}; + for (const key in Object.keys(schema)) { + // remember a category is just a function that assigns a Field to a name + const field = category(key); + typedCategory[key] = field + ? schema[key].createFolumn(field) + : UndefinedColumn(schema[key].T); // a column that always returns 0 or empty string depending on type + } + return typedCategory; +} +``` + +This transforms the ''untyped'' ``Category`` to some typed category and gives us code-completion for CIF files: + +```ts +const typed = toTypedCategory(my_category, ...); +typed.n /* shows code completion for num_field */ +const num = typed.num_field.get(0); /* num has type number number */ +``` + +And that's all there is to it. Extending the types to the "frame" level is left as an exercise to the reader. + +The advantage of this approach is that the types are generated directly from the data. This means we only need to define them once (as opposed to defining the data interfaces separately) and on top of that, the "schemas" also serve as a template for how to actually performs the transformation to the typed version of CIF (again without the need to do this "manually" except the one time definition of the schema). + +---------------- + + +**Note:** To create a type alias for a category defined this way we can do: + +```ts +type MyCategory = TypedCategory<typeof my_category> +function makeMyTypedCategory(c: Category): MyCategory { return toTypedCategory(my_category, c); } +``` diff --git a/package.json b/package.json index 31ad572ad994ae22ff0b852798a91c6d15b35d76..ae74e31e7843cfcee96df6862a932c6d13e567e9 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "test": "./node_modules/.bin/jest", "dist": "./node_modules/.bin/uglifyjs build/js/molio.dev.js -cm > dist/molio.js && cp build/js/molio.esm.js dist/molio.esm.js", "script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js", - "runscript": "npm run script && node build/js/script.js" + "runscript": "npm run script && node build/js/script.js", + "download-dics": "./node_modules/.bin/download -o build/dics http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic && ./node_modules/.bin/download -o build/dics http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ddl.dic" }, "jest": { "moduleFileExtensions": [ @@ -29,7 +30,8 @@ "license": "MIT", "devDependencies": { "@types/jest": "^21.1.2", - "@types/node": "^8.0.32", + "@types/node": "^8.0.34", + "download-cli": "^1.0.5", "jest": "^21.2.1", "rollup": "^0.50.0", "rollup-plugin-buble": "^0.16.0", @@ -37,10 +39,11 @@ "rollup-plugin-json": "^2.3.0", "rollup-plugin-node-resolve": "^3.0.0", "rollup-watch": "^4.3.1", - "ts-jest": "^21.0.1", + "ts-jest": "^21.1.2", "tslint": "^5.7.0", "typescript": "^2.5.3", - "uglify-js": "^3.1.3" + "uglify-js": "^3.1.3", + "util.promisify": "^1.0.0" }, "dependencies": {} } diff --git a/src/reader/cif/binary/field.ts b/src/reader/cif/binary/field.ts index 4d668b5817e24d1fa776467bb58844f9912183d1..2c8821e685a49c205f23fbe7cd4f7be86b3040e1 100644 --- a/src/reader/cif/binary/field.ts +++ b/src/reader/cif/binary/field.ts @@ -13,7 +13,7 @@ import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../co export default function Field(column: EncodedColumn): Data.Field { const mask = column.mask ? decode(column.mask) as number[] : void 0; const data = decode(column.data); - const isNumeric = (data as any).buffer && (data as any).byteLength && (data as any).BYTES_PER_ELEMENT; + const isNumeric = Column.isTypedArray(data); const str: Data.Field['str'] = isNumeric ? mask @@ -45,9 +45,13 @@ export default function Field(column: EncodedColumn): Data.Field { float, presence, areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB], - stringEquals(row, v) { return str(row) === v; }, - toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); }, - toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); }, - toFloatArray(params) { return Column.createAndFillArray(rowCount, float, params); } + stringEquals: (row, v) => str(row) === v, + toStringArray: params => Column.createAndFillArray(rowCount, str, params), + toIntArray: isNumeric + ? params => Column.typedArrayWindow(data, params) + : params => Column.createAndFillArray(rowCount, int, params), + toFloatArray: isNumeric + ? params => Column.typedArrayWindow(data, params) + : params => Column.createAndFillArray(rowCount, float, params) }; } \ No newline at end of file diff --git a/src/reader/cif/data-model.ts b/src/reader/cif/data-model.ts index 666bdf88c473a02fb9cdf117b5f99360dceec11d..aa8a4d89caa11ff39cf73f926485f2c0cc94b05a 100644 --- a/src/reader/cif/data-model.ts +++ b/src/reader/cif/data-model.ts @@ -15,25 +15,23 @@ export function File(blocks: ArrayLike<Block>, name?: string): File { return { name, blocks: blocks as any }; } -export interface Block { +export interface Frame { readonly header: string, readonly categories: Categories - readonly saveFrames: SafeFrame[] } -export function Block(categories: Categories, header: string, saveFrames: SafeFrame[] = []): Block { +export interface Block extends Frame { + readonly saveFrames: Frame[] +} + +export function Block(categories: Categories, header: string, saveFrames: Frame[] = []): Block { if (Object.keys(categories).some(k => k[0] !== '_')) { throw new Error(`Category names must start with '_'.`); } return { header, categories, saveFrames }; } -export interface SafeFrame { - readonly header: string, - readonly categories: Categories -} - -export function SafeFrame(categories: Categories, header: string): SafeFrame { +export function SafeFrame(categories: Categories, header: string): Frame { return { header, categories }; } diff --git a/src/reader/cif/index.ts b/src/reader/cif/index.ts index fad12ae1b6d19dbe8c6d0698c94530937846de7e..aa10d1ffa6063cd5338d1b0a01d038501937bc39 100644 --- a/src/reader/cif/index.ts +++ b/src/reader/cif/index.ts @@ -7,7 +7,7 @@ import parseText from './text/parser' import parseBinary from './binary/parser' import { Block } from './data-model' -import { apply as applySchema } from './schema' +import { toTypedFrame as applySchema } from './schema' import mmCIF from './schema/mmcif' export default { diff --git a/src/reader/cif/schema.ts b/src/reader/cif/schema.ts index 28f2afca2d78d3956ce2ac46fd1d8bfa11f0585d..4723e404eb62cfcb544a97fcb3ece5f621e4fa4f 100644 --- a/src/reader/cif/schema.ts +++ b/src/reader/cif/schema.ts @@ -25,35 +25,30 @@ import StringPool from '../../utils/short-string-pool' ////////////////////////////////////////////// -export function apply<Schema extends Block.Schema>(schema: Schema, block: Data.Block): Block.Instance<Schema> { - return createBlock(schema, block) as Block.Instance<Schema>; +export function toTypedFrame<Schema extends FrameSchema>(schema: Schema, frame: Data.Frame): TypedFrame<Schema> { + return createTypedFrame(schema, frame) as TypedFrame<Schema>; } -export type Block<Categories> = Categories & { - readonly _header?: string, - /** For accessing 'non-standard' categories */ - _getCategory(name: string): Data.Category | undefined +export function toTypedCategory<Schema extends CategorySchema>(schema: Schema, category: Data.Category): TypedCategory<Schema> { + return new _TypedCategory(category, schema, true) as TypedCategory<any>; } -export namespace Block { - export type Schema = { [category: string]: Category.Schema } - export type Instance<T extends Schema> = Block<{ [C in keyof T]: Category.Instance<T[C]> }> -} +export type FrameSchema = { [category: string]: CategorySchema } +export type TypedFrame<Schema extends FrameSchema> = { + readonly _header?: string, + readonly _frame: Data.Frame +} & { [C in keyof Schema]: TypedCategory<Schema[C]> } + -export type Category<Fields> = Fields & { +export type CategorySchema = { [field: string]: Field.Schema<any> } +export type TypedCategory<Schema extends CategorySchema> = { readonly _rowCount: number, readonly _isDefined: boolean, - /** For accessing 'non-standard' fields */ - _getField(name: string): Data.Field | undefined -} - -export namespace Category { - export type Schema = { [field: string]: Field.Schema<any> } - export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }> -} + readonly _category: Data.Category +} & { [F in keyof Schema]: Column.Column<Schema[F]['T']> } export namespace Field { - export interface Schema<T> { type: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string }; + export interface Schema<T> { T: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string }; export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string } export function alias(name: string): Schema<any> { return { alias: name } as any; } @@ -101,25 +96,24 @@ export namespace Field { } } + // spec argument is to allow for specialised implementation for undefined fields function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>): Schema<T> { - return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias }; + return { T: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias }; } } -class _Block implements Block<any> { // tslint:disable-line:class-name - header = this._block.header; - getCategory(name: string) { return this._block.categories[name]; } - constructor(private _block: Data.Block, schema: Block.Schema) { +class _TypedFrame implements TypedFrame<any> { // tslint:disable-line:class-name + header = this._frame.header; + constructor(public _frame: Data.Frame, schema: FrameSchema) { for (const k of Object.keys(schema)) { - Object.defineProperty(this, k, { value: createCategory(k, schema[k], _block), enumerable: true, writable: false, configurable: false }); + Object.defineProperty(this, k, { value: createTypedCategory(k, schema[k], _frame), enumerable: true, writable: false, configurable: false }); } } } -class _Category implements Category<any> { // tslint:disable-line:class-name +class _TypedCategory implements TypedCategory<any> { // tslint:disable-line:class-name _rowCount = this._category.rowCount; - _getField(name: string) { return this._category.getField(name); } - constructor(private _category: Data.Category, schema: Category.Schema, public _isDefined: boolean) { + constructor(public _category: Data.Category, schema: CategorySchema, public _isDefined: boolean) { const fieldKeys = Object.keys(schema).filter(k => k !== '@alias'); const cache = Object.create(null); for (const k of fieldKeys) { @@ -139,13 +133,13 @@ class _Category implements Category<any> { // tslint:disable-line:class-name } } -function createBlock(schema: Block.Schema, block: Data.Block): any { - return new _Block(block, schema); +function createTypedFrame(schema: FrameSchema, frame: Data.Frame): any { + return new _TypedFrame(frame, schema); } -function createCategory(key: string, schema: Category.Schema, block: Data.Block) { +function createTypedCategory(key: string, schema: CategorySchema, frame: Data.Frame) { const alias = (schema['@alias'] && schema['@alias'].alias) || key; const name = alias[0] === '_' ? alias : '_' + alias; - const cat = block.categories[name]; - return new _Category(cat || Data.Category.Empty, schema, !!cat); + const cat = frame.categories[name]; + return new _TypedCategory(cat || Data.Category.Empty, schema, !!cat); } \ No newline at end of file diff --git a/src/reader/cif/schema/ddl.ts b/src/reader/cif/schema/ddl.ts new file mode 100644 index 0000000000000000000000000000000000000000..562133592a0ecb811f45af4ff26eccd6896a9c62 --- /dev/null +++ b/src/reader/cif/schema/ddl.ts @@ -0,0 +1,2 @@ + +// TODO save frame schema for ddl http://mmcif.wwpdb.org/dictionaries/mmcif_ddl.dic/Index/ diff --git a/src/reader/cif/schema/dic.ts b/src/reader/cif/schema/dic.ts new file mode 100644 index 0000000000000000000000000000000000000000..77e4dac7e9c74c0e26ce0cbe242dae88a538c11f --- /dev/null +++ b/src/reader/cif/schema/dic.ts @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Field, TypedFrame } from '../schema' + +const str = Field.str() +const float = Field.float() + +const datablock = { + id: str, + description: str +} + +const dictionary = { + title: str, + datablock_id: str, + version: str +} + +const dictionary_history = { + version: str, + update: str, + revision: str +} + +const sub_category = { + id: str, + description: str +} + +const category_group_list = { + id: str, + parent_id: str, + description: str +} + +const item_type_list = { + code: str, + primitive_code: str, + construct: str, + detail: str +} + +const item_units_list = { + code: str, + detail: str +} + +const item_units_conversion = { + from_code: str, + to_code: str, + operator: str, + factor: float +} + +// TODO save frame dic schema + +const dic = { + datablock, + dictionary, + dictionary_history, + sub_category, + category_group_list, + item_type_list, + item_units_list, + item_units_conversion +} + +type dic = TypedFrame<typeof dic> +export default dic diff --git a/src/reader/cif/schema/mmcif.ts b/src/reader/cif/schema/mmcif.ts index 8425485209b0fbd596eca82d3e3555ad053d82dc..45f31a003d75608660acc7a369a83496caca9605 100644 --- a/src/reader/cif/schema/mmcif.ts +++ b/src/reader/cif/schema/mmcif.ts @@ -4,7 +4,7 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import { Field, Block } from '../schema' +import { Field, TypedFrame } from '../schema' const pooledStr = Field.pooledStr(); const str = Field.str(); @@ -243,5 +243,5 @@ const mmCIF = { pdbx_struct_mod_residue, atom_site }; -type mmCIF = Block.Instance<typeof mmCIF> +type mmCIF = TypedFrame<typeof mmCIF> export default mmCIF; \ No newline at end of file diff --git a/src/reader/cif/schema/utils.ts b/src/reader/cif/schema/utils.ts new file mode 100644 index 0000000000000000000000000000000000000000..072257addba3f27eae13deb2b03b4042de50684b --- /dev/null +++ b/src/reader/cif/schema/utils.ts @@ -0,0 +1,203 @@ + +// import dic from './dic' +import * as Data from '../data-model' + +export function getFieldType (type: string, values?: string[]) { + switch (type) { + case 'code': + case 'ucode': + if (values && values.length) { + return `str as Field.Schema<'${values.join("'|'")}'>` + } else { + return 'str' + } + case 'line': + case 'uline': + case 'text': + case 'name': + case 'idname': + case 'any': + case 'atcode': + case 'fax': + case 'phone': + case 'email': + case 'code30': + case 'ec-type': + case 'seq-one-letter-code': + case 'author': + case 'orcid_id': + case 'sequence_dep': + case 'pdb_id': + case 'emd_id': + // todo, consider adding specialised fields + case 'yyyy-mm-dd': + case 'yyyy-mm-dd:hh:mm': + case 'yyyy-mm-dd:hh:mm-flex': + case 'int-range': + case 'float-range': + case 'binary': + case 'operation_expression': + case 'ucode-alphanum-csv': + case 'point_symmetry': + case 'id_list': + case '4x3_matrix': + case 'point_group': + case 'point_group_helical': + case 'boolean': + case 'symmetry_operation': + case 'date_dep': + return 'str' + case 'uchar3': + case 'uchar1': + case 'symop': + return 'pooledStr' + case 'int': + case 'non_negative_int': + case 'positive_int': + return 'int' + case 'float': + return 'float' + } + console.log(`unknown type '${type}'`) + return 'str' +} + +type FrameCategories = { [category: string]: Data.Frame } +type FrameLinks = { [k: string]: string } + +interface FrameData { + categories: FrameCategories + links: FrameLinks +} + +// get field from given or linked category +function getField ( category: string, field: string, d: Data.Frame, ctx: FrameData): Data.Field|undefined { + const { categories, links } = ctx + + const cat = d.categories[category] + if (cat) { + return cat.getField(field) + } else { + if (d.header in links) { + return getField(category, field, categories[links[d.header]], ctx) + } else { + // console.log(`no links found for '${d.header}'`) + } + } +} + +function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined { + const value = getField('_item_enumeration', 'value', d, ctx) + if (value) { + const enums: string[] = [] + for (let i = 0; i < value.rowCount; ++i) { + enums.push(value.str(i)) + // console.log(value.str(i)) + } + return enums + } else { + // console.log(`item_enumeration.value not found for '${d.header}'`) + } +} + +function getCode (d: Data.Frame, ctx: FrameData): [string, string[]]|undefined { + const code = getField('_item_type', 'code', d, ctx) + if (code) { + let c = code.str(0) + let e = [] + if (c === 'ucode') { + const enums = getEnums(d, ctx) + if (enums) e.push(...enums) + } + return [c, e] + } else { + console.log(`item_type.code not found for '${d.header}'`) + } +} + +const header = `/** + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Your friendly code generator + */ + +import { Field, TypedFrame } from '../schema' + +const pooledStr = Field.pooledStr(); +const str = Field.str(); +const int = Field.int(); +const float = Field.float();` + +const footer = ` +type mmCIF = TypedFrame<typeof mmCIF> +export default mmCIF;` + +export function generateSchema (dic: Data.Block) { // todo Block needs to be specialized with safe frames as well + // const schema: FrameSchema = {} // { [category: string]: Category.Schema } = {} + const schema: { [category: string]: { [field: string]: string } } = {} + + const codeLines: string[] = [] + + // TODO: for fields with finite allowed values, generate: + // type FieldValue = 'a' | 'b' | 'c' + // const catetegory = { field: <type> as Field.Schema<FieldValue> } + + const categories: FrameCategories = {} + const links: FrameLinks = {} + dic.saveFrames.forEach(d => { + if (d.header[0] !== '_') return + categories[d.header] = d + const item_linked = d.categories['_item_linked'] + if (item_linked) { + const child_name = item_linked.getField('child_name') + const parent_name = item_linked.getField('parent_name') + if (child_name && parent_name) { + for (let i = 0; i < item_linked.rowCount; ++i) { + const childName = child_name.str(i) + const parentName = parent_name.str(i) + if (childName in links && links[childName] !== parentName) { + console.log(`${childName} linked to ${links[childName]}, ignoring link to ${parentName}`) + } + links[childName] = parentName + } + } + } + }) + + Object.keys(categories).forEach(fullName => { + const d = categories[fullName] + const categoryName = d.header.substring(1, d.header.indexOf('.')) + const itemName = d.header.substring(d.header.indexOf('.') + 1) + let fields + if (categoryName in schema) { + fields = schema[categoryName] + } else { + fields = {} + schema[categoryName] = fields + } + + const code = getCode(d, { categories, links }) + if (code) { + fields[itemName] = getFieldType(code[0], code[1]) + } else { + console.log(`could not determine code for '${d.header}'`) + } + }) + + schema.entry = { id: 'str' } + + codeLines.push(`const mmCIF = {`) + Object.keys(schema).forEach(category => { + codeLines.push(`\t${category}: {`) + const fields = schema[category] + Object.keys(fields).forEach(field => { + const type = fields[field] + // TODO: check if quoting is required + codeLines.push(`\t\t'${field}': ${type},`) + }) + codeLines.push('\t},') + }) + codeLines.push('}') + + return `${header}\n\n${codeLines.join('\n')}\n${footer}` +} diff --git a/src/reader/cif/text/field.ts b/src/reader/cif/text/field.ts index ecf14c3618d7a49c493c34bbcc89a7054c13ada6..2e09cd1b0cf369ac0d4cef3346d7d2fb88aad4e9 100644 --- a/src/reader/cif/text/field.ts +++ b/src/reader/cif/text/field.ts @@ -44,7 +44,7 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Fie float, presence, areValuesEqual: TokenColumn.areValuesEqualProvider(tokens), - stringEquals(row, v) { + stringEquals: (row, v) => { const s = indices[2 * row]; const value = v || ''; if (!value && presence(row) !== Data.ValuePresence.Present) return true; @@ -55,8 +55,8 @@ export default function CifTextField(tokens: Tokens, rowCount: number): Data.Fie } return true; }, - toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); }, - toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); }, - toFloatArray(params) { return Column.createAndFillArray(rowCount, float, params); } + toStringArray: params => Column.createAndFillArray(rowCount, str, params), + toIntArray: params => Column.createAndFillArray(rowCount, int, params), + toFloatArray: params => Column.createAndFillArray(rowCount, float, params) } } \ No newline at end of file diff --git a/src/reader/cif/text/parser.ts b/src/reader/cif/text/parser.ts index 3b02bd78d300203babd27d9f75598ae0b0f208ce..4b86e99a1ab1bda07751913005ac2bcc26a3dafa 100644 --- a/src/reader/cif/text/parser.ts +++ b/src/reader/cif/text/parser.ts @@ -554,9 +554,9 @@ async function parseInternal(data: string, ctx: Computation.Context) { let inSaveFrame = false // the next three initial values are never used in valid files - let saveFrames: Data.SafeFrame[] = []; + let saveFrames: Data.Frame[] = []; let saveCategories = Object.create(null); - let saveFrame: Data.SafeFrame = Data.SafeFrame(saveCategories, ''); + let saveFrame: Data.Frame = Data.SafeFrame(saveCategories, ''); ctx.update({ message: 'Parsing...', current: 0, max: data.length }); diff --git a/src/reader/common/binary/column.ts b/src/reader/common/binary/column.ts new file mode 100644 index 0000000000000000000000000000000000000000..28ce9a00172e698d50f8e908a3908248acf2ccb9 --- /dev/null +++ b/src/reader/common/binary/column.ts @@ -0,0 +1,5 @@ +/** + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ \ No newline at end of file diff --git a/src/reader/common/column.ts b/src/reader/common/column.ts index 209d47961bea83d612566d7f001a8356d5c18ff3..13dc3f2bafb0ea0ce3576ab829cec9bb2cf257a2 100644 --- a/src/reader/common/column.ts +++ b/src/reader/common/column.ts @@ -37,24 +37,57 @@ export function UndefinedColumn<T extends ColumnType>(rowCount: number, type: T) isDefined: false, rowCount, value, - isValueDefined(row) { return false; }, - toArray(params) { + isValueDefined: row => false, + toArray: params => { const { array } = createArray(rowCount, params); for (let i = 0, _i = array.length; i < _i; i++) array[i] = value(0) return array; }, - stringEquals(row, value) { return !value; }, - areValuesEqual(rowA, rowB) { return true; } + stringEquals: (row, value) => !value, + areValuesEqual: (rowA, rowB) => true + } +} + +export function ArrayColumn<T>(array: ArrayLike<T>): Column<T> { + const rowCount = array.length; + const value: Column<T>['value'] = row => array[row]; + const isTyped = isTypedArray(array); + return { + isDefined: false, + rowCount, + value, + isValueDefined: row => true, + toArray: isTyped + ? params => typedArrayWindow(array, params) as any as ReadonlyArray<T> + : params => { + const { start, end } = getArrayBounds(rowCount, params); + const ret = new Array(end - start); + for (let i = 0, _i = end - start; i < _i; i++) ret[i] = array[start + i]; + return ret; + }, + stringEquals: isTyped + ? (row, value) => (array as any)[row] === +value + : (row, value) => { + const v = array[row]; + if (typeof v !== 'string') return '' + v === value; + return v === value; + }, + areValuesEqual: (rowA, rowB) => array[rowA] === array[rowB] } } +/** A helped function for Column.toArray */ +export function getArrayBounds(rowCount: number, params?: ToArrayParams) { + const start = params && typeof params.start !== 'undefined' ? Math.max(Math.min(params.start, rowCount - 1), 0) : 0; + const end = params && typeof params.end !== 'undefined' ? Math.min(params.end, rowCount) : rowCount; + return { start, end }; +} + /** A helped function for Column.toArray */ export function createArray(rowCount: number, params?: ToArrayParams) { - const { array, start, end } = params || ({} as ToArrayParams); - const c = typeof array !== 'undefined' ? array : Array; - const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0; - const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount; - return { array: new c(e - s) as any[], start: s, end: e }; + const c = params && typeof params.array !== 'undefined' ? params.array : Array; + const { start, end } = getArrayBounds(rowCount, params); + return { array: new c(end - start) as any[], start, end }; } /** A helped function for Column.toArray */ @@ -69,4 +102,13 @@ export function createAndFillArray(rowCount: number, value: (row: number) => any return fillArrayValues(value, array, start); } +export function isTypedArray(data: any) { + return data.buffer && typeof data.byteLength === 'number' && data.BYTES_PER_ELEMENT; +} +export function typedArrayWindow(data: any, params?: ToArrayParams): ReadonlyArray<number> { + const { constructor, buffer, length, byteOffset, BYTES_PER_ELEMENT } = data; + const { start, end } = getArrayBounds(length, params); + if (start === 0 && end === length) return data; + return new constructor(buffer, byteOffset + BYTES_PER_ELEMENT * start, Math.min(length, end - start)); +} \ No newline at end of file diff --git a/src/reader/common/text/column/fixed.ts b/src/reader/common/text/column/fixed.ts index ac411242ab8ebde05d71a72e81d8c5e4586a0844..fb7534e85c8736929bbd740e8fb7a0fb76475f8e 100644 --- a/src/reader/common/text/column/fixed.ts +++ b/src/reader/common/text/column/fixed.ts @@ -45,11 +45,9 @@ export function FixedColumn<T extends ColumnType>(lines: Tokens, offset: number, isDefined: true, rowCount, value, - isValueDefined(row) { return true; }, - toArray(params) { return createAndFillArray(rowCount, value, params); }, - stringEquals(row, v) { return value(row) === v; }, - areValuesEqual(rowA, rowB) { - return value(rowA) === value(rowB); - } + isValueDefined: row => true, + toArray: params => createAndFillArray(rowCount, value, params), + stringEquals: (row, v) => value(row) === v, + areValuesEqual: (rowA, rowB) => value(rowA) === value(rowB) }; } \ No newline at end of file diff --git a/src/reader/common/text/column/token.ts b/src/reader/common/text/column/token.ts index 4d30f821118b279df63b88f0fd005bd3d2221837..adfc613d074c1e030fa212ed0fee0c4b61935c74 100644 --- a/src/reader/common/text/column/token.ts +++ b/src/reader/common/text/column/token.ts @@ -33,9 +33,9 @@ export function TokenColumn<T extends ColumnType>(tokens: Tokens, type: T): Colu isDefined: true, rowCount, value, - isValueDefined(row) { return true; }, - toArray(params) { return createAndFillArray(rowCount, value, params); }, - stringEquals(row, v) { + isValueDefined: row => true, + toArray: params => createAndFillArray(rowCount, value, params), + stringEquals: (row, v) => { const s = indices[2 * row]; const value = v || ''; const len = value.length; diff --git a/src/reader/spec/cif.spec.ts b/src/reader/spec/cif.spec.ts index c13ab97885321f534e833b8d9e364ef8bf958a59..accb0ab6abf1d8df80344aed155d013653f9435a 100644 --- a/src/reader/spec/cif.spec.ts +++ b/src/reader/spec/cif.spec.ts @@ -26,7 +26,7 @@ namespace TestSchema { } describe('schema', () => { - const data = Schema.apply(TestSchema.schema, testBlock); + const data = Schema.toTypedFrame(TestSchema.schema, testBlock); it('property access', () => { const { x, name } = data.atoms; expect(x.value(0)).toBe(1); diff --git a/src/reader/spec/text-column.spec.ts b/src/reader/spec/column.spec.ts similarity index 77% rename from src/reader/spec/text-column.spec.ts rename to src/reader/spec/column.spec.ts index 3d3f15edb39f30b188ad3fa27f2093a734993afd..1bd08dfdb5f7e537ef173b34c5d982310b2e3265 100644 --- a/src/reader/spec/text-column.spec.ts +++ b/src/reader/spec/column.spec.ts @@ -7,7 +7,7 @@ import FixedColumn from '../common/text/column/fixed' import TokenColumn from '../common/text/column/token' -import { ColumnType } from '../common/column' +import { ColumnType, typedArrayWindow } from '../common/column' const lines = [ '1.123 abc', @@ -60,3 +60,17 @@ describe('token text column', () => { expect(col1.value(2)).toBe(1); }) }); + +describe('binary column', () => { + it('window works', () => { + const xs = new Float64Array([1, 2, 3, 4]); + const w1 = typedArrayWindow(xs, { start: 1 }); + const w2 = typedArrayWindow(xs, { start: 2, end: 4 }); + + expect(w1.length).toBe(3); + for (let i = 0; i < w1.length; i++) expect(w1[i]).toBe(xs[i + 1]); + + expect(w2.length).toBe(2); + for (let i = 0; i < w2.length; i++) expect(w2[i]).toBe(xs[i + 2]); + }); +}) diff --git a/src/script.ts b/src/script.ts index 724767d15e7d62088e67cd2cd27ef6ec260647e0..c10a3e1bd579544c51b1f0a0915e20b76315a95f 100644 --- a/src/script.ts +++ b/src/script.ts @@ -8,9 +8,16 @@ import * as util from 'util' import * as fs from 'fs' +require('util.promisify').shim(); +const readFileAsync = util.promisify(fs.readFile); +const writeFileAsync = util.promisify(fs.writeFile); + import Gro from './reader/gro/parser' import CIF from './reader/cif/index' +// import { toTypedFrame as applySchema } from './reader/cif/schema' +import { generateSchema } from './reader/cif/schema/utils' + const file = '1crn.gro' // const file = 'water.gro' // const file = 'test.gro' @@ -74,13 +81,9 @@ async function runGro(input: string) { console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1]) } -export function _gro() { - fs.readFile(`./examples/${file}`, 'utf8', function (err, input) { - if (err) { - return console.log(err); - } - runGro(input) - }); +export async function _gro() { + const input = await readFileAsync(`./examples/${file}`, 'utf8') + runGro(input) } // _gro() @@ -106,35 +109,38 @@ async function runCIF(input: string | Uint8Array) { console.log(mmcif.atom_site.Cartn_x.value(0)); console.log(mmcif.entity.type.toArray()); console.log(mmcif.pdbx_struct_oper_list.matrix.value(0)); + + // const schema = await _dic() + // if (schema) { + // const mmcif2 = applySchema(schema, data) + // // console.log(util.inspect(mmcif2.atom_site, {showHidden: false, depth: 3})) + // console.log(mmcif2.atom_site.Cartn_x.value(0)); + // console.log(mmcif2.entity.type.toArray()); + // // console.log(mmcif2.pdbx_struct_oper_list.matrix.value(0)); // TODO + // } else { + // console.log('error getting mmcif schema from dic') + // } } -export function _cif() { +export async function _cif() { let path = `./examples/1cbs_updated.cif`; - path = '../test/3j3q.cif' // lets have a relative path for big test files - fs.readFile(path, 'utf8', function (err, input) { - if (err) { - return console.log(err); - } - console.log('------------------'); - console.log('Text CIF:'); - runCIF(input); - }); + // path = '../test/3j3q.cif' // lets have a relative path for big test files + const input = await readFileAsync(path, 'utf8') + console.log('------------------'); + console.log('Text CIF:'); + runCIF(input); path = `./examples/1cbs_full.bcif`; // const path = 'c:/test/quick/3j3q.cif'; - fs.readFile(path, function (err, input) { - if (err) { - return console.log(err); - } - console.log('------------------'); - console.log('BinaryCIF:'); - const data = new Uint8Array(input.byteLength); - for (let i = 0; i < input.byteLength; i++) data[i] = input[i]; - runCIF(input); - }); + const input2 = await readFileAsync(path) + console.log('------------------'); + console.log('BinaryCIF:'); + const data = new Uint8Array(input2.byteLength); + for (let i = 0; i < input2.byteLength; i++) data[i] = input2[i]; + runCIF(input2); } -// _cif(); +_cif(); async function runDic(input: string | Uint8Array) { console.time('parseDic'); @@ -148,20 +154,21 @@ async function runDic(input: string | Uint8Array) { return; } - const data = parsed.result.blocks[0]; - console.log(util.inspect(data.saveFrames, {showHidden: false, depth: 3})) + const schema = generateSchema(parsed.result.blocks[0]) + // console.log(schema) + // console.log(util.inspect(Object.keys(schema).length, {showHidden: false, depth: 1})) + + await writeFileAsync('./src/reader/cif/schema/mmcif-gen.ts', schema, 'utf8') + + return schema } -export function _dic() { - let path = '../test/mmcif_pdbx_v50.dic' - fs.readFile(path, 'utf8', function (err, input) { - if (err) { - return console.log(err); - } - console.log('------------------'); - console.log('Text DIC:'); - runDic(input); - }); +export async function _dic() { + let path = './build/dics/mmcif_pdbx_v50.dic' + const input = await readFileAsync(path, 'utf8') + console.log('------------------'); + console.log('Text DIC:'); + return runDic(input); } _dic();