diff --git a/src/apps/cif2bcif.ts b/src/apps/cif2bcif.ts new file mode 100644 index 0000000000000000000000000000000000000000..26d583aae3d312a07976d30533e8de6a11aff24d --- /dev/null +++ b/src/apps/cif2bcif.ts @@ -0,0 +1,16 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as fs from 'fs' +import convert from './cif2bcif/converter' + +(async function () { + const src = process.argv[2]; + const out = process.argv[3]; + + const res = await convert(src); + fs.writeFileSync(out, res); +}()); \ No newline at end of file diff --git a/src/apps/cif2bcif/converter.ts b/src/apps/cif2bcif/converter.ts new file mode 100644 index 0000000000000000000000000000000000000000..c8f049846ad293d862fffc95cf764573880344af --- /dev/null +++ b/src/apps/cif2bcif/converter.ts @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Iterator from 'mol-base/collections/iterator' +import CIF, { Category } from 'mol-io/reader/cif' +import TextCIFEncoder from 'mol-io/writer/cif/encoder/text' +import BinaryCIFEncoder from 'mol-io/writer/cif/encoder/binary' +import * as Encoder from 'mol-io/writer/cif/encoder' +import * as fs from 'fs' +import classify from './field-classifier' + +async function getCIF(path: string) { + const str = fs.readFileSync(path, 'utf8'); + const parsed = await CIF.parseText(str)(); + if (parsed.isError) { + throw new Error(parsed.toString()); + } + return parsed.result; +} + +function createDefinition(cat: Category): Encoder.CategoryDefinition { + return { + name: cat.name, + fields: cat.fieldNames.map(n => classify(n, cat.getField(n)!)) + } +} + +function getCategoryInstanceProvider(cat: Category): Encoder.CategoryProvider { + return function (ctx: any) { + return { + data: cat, + definition: createDefinition(cat), + keys: () => Iterator.Range(0, cat.rowCount - 1), + rowCount: cat.rowCount + }; + } +} + +export default async function convert(path: string, asText = false) { + const cif = await getCIF(path); + + const encoder = asText ? new TextCIFEncoder() : new BinaryCIFEncoder('mol* cif2bcif'); + for (const b of cif.blocks) { + encoder.startDataBlock(b.header); + for (const _c of Object.keys(b.categories)) { + encoder.writeCategory(getCategoryInstanceProvider(b.categories[_c])); + } + } + return encoder.getData(); +} + diff --git a/src/apps/cif2bcif/field-classifier.ts b/src/apps/cif2bcif/field-classifier.ts new file mode 100644 index 0000000000000000000000000000000000000000..0fe447382a76a41b5ede7eaa1a766998fad65b3c --- /dev/null +++ b/src/apps/cif2bcif/field-classifier.ts @@ -0,0 +1,30 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Column } from 'mol-base/collections/database' +import { Field } from 'mol-io/reader/cif/data-model' +import { FieldDefinition, FieldType } from 'mol-io/writer/cif/encoder' + +const intRegex = /^-?\d+$/ +const floatRegex = /^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?/ + +function classify(name: string, field: Field): FieldDefinition { + let floatCount = 0, hasString = false; + for (let i = 0, _i = field.rowCount; i < _i; i++) { + const k = field.valueKind(i); + if (k !== Column.ValueKind.Present) continue; + const v = field.str(i); + if (intRegex.test(v)) continue; + else if (floatRegex.test(v)) floatCount++; + else { hasString = true; break; } + } + + if (hasString) return { name, type: FieldType.Str, value: field.str, valueKind: field.valueKind }; + if (floatCount > 0) return { name, type: FieldType.Float, value: field.float, valueKind: field.valueKind }; + return { name, type: FieldType.Int, value: field.int, valueKind: field.valueKind }; +} + +export default classify; \ No newline at end of file diff --git a/src/mol-data/structure/export/mmcif.ts b/src/mol-data/structure/export/mmcif.ts index 087aa16a1619db8308ed5c1224b3665c169a1948..8a78bc1cb9320903071d6b552eb4a57709b8c21d 100644 --- a/src/mol-data/structure/export/mmcif.ts +++ b/src/mol-data/structure/export/mmcif.ts @@ -8,7 +8,8 @@ import { Column, Table } from 'mol-base/collections/database' import Iterator from 'mol-base/collections/iterator' import * as Encoder from 'mol-io/writer/cif/encoder' //import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif' -import CIFEncoder from 'mol-io/writer/cif/encoder/text' +import TextCIFEncoder from 'mol-io/writer/cif/encoder/text' +import BinaryCIFEncoder from 'mol-io/writer/cif/encoder/binary' import { Structure, Atom, AtomSet } from '../structure' import { Model } from '../model' import P from '../query/properties' @@ -142,13 +143,13 @@ function atomSiteProvider({ structure }: Context): Encoder.CategoryInstance { } } -function getCifString(name: string, structure: Structure) { +function to_mmCIF(name: string, structure: Structure, asBinary = false) { const models = Structure.getModels(structure); if (models.length !== 1) throw 'cant export stucture composed from multiple models.'; const model = models[0]; const ctx: Context = { structure, model }; - const w = new CIFEncoder(); + const w = asBinary ? new BinaryCIFEncoder('mol*') : new TextCIFEncoder(); w.startDataBlock(name); w.writeCategory(entityProvider, [ctx]); @@ -156,4 +157,4 @@ function getCifString(name: string, structure: Structure) { return w.getData(); } -export default getCifString \ No newline at end of file +export default to_mmCIF \ No newline at end of file diff --git a/src/mol-io/common/binary-cif.ts b/src/mol-io/common/binary-cif.ts new file mode 100644 index 0000000000000000000000000000000000000000..f63c05d25981e1eb1abf07ec27ce0c67696cccd9 --- /dev/null +++ b/src/mol-io/common/binary-cif.ts @@ -0,0 +1,11 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import decode from './binary-cif/decoder' + +export * from './binary-cif/encoding' +export * from './binary-cif/array-encoder' +export { decode } \ No newline at end of file diff --git a/src/mol-io/common/binary-cif/array-encoder.ts b/src/mol-io/common/binary-cif/array-encoder.ts new file mode 100644 index 0000000000000000000000000000000000000000..13f70f180b3de128739286113cfe8f73bd2ccbec --- /dev/null +++ b/src/mol-io/common/binary-cif/array-encoder.ts @@ -0,0 +1,396 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js; MIT) and MMTF (https://github.com/rcsb/mmtf-javascript/; MIT) + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import ChunkedArray from 'mol-base/collections/chunked-array' +import { Encoding, EncodedData } from './encoding' + +export interface ArrayEncoder { + and(f: ArrayEncoding.Provider): ArrayEncoder, + encode(data: ArrayLike<any>): EncodedData +} + +export class ArrayEncoderImpl implements ArrayEncoder { + and(f: ArrayEncoding.Provider) { + return new ArrayEncoderImpl(this.providers.concat([f])); + } + + encode(data: ArrayLike<any>): EncodedData { + let encoding: Encoding[] = []; + for (let p of this.providers) { + let t = p(data); + + if (!t.encodings.length) { + throw new Error('Encodings must be non-empty.'); + } + + data = t.data; + for (let e of t.encodings) { + encoding.push(e); + } + } + if (!(data instanceof Uint8Array)) { + throw new Error('The encoding must result in a Uint8Array. Fix your encoding chain.'); + } + return { + encoding, + data + } + } + + constructor(private providers: ArrayEncoding.Provider[]) { + + } +} + +export namespace ArrayEncoder { + export function by(f: ArrayEncoding.Provider): ArrayEncoder { + return new ArrayEncoderImpl([f]); + } +} + +export namespace ArrayEncoding { + export type TypedArrayCtor = { new(size: number): ArrayLike<number> & { buffer: ArrayBuffer, byteLength: number, byteOffset: number, BYTES_PER_ELEMENT: number } } + + export interface Result { + encodings: Encoding[], + data: any + } + + export type Provider = (data: any) => Result + + export function by(f: Provider): ArrayEncoder { + return new ArrayEncoderImpl([f]); + } + + function uint8(data: Uint8Array): Result { + return { + encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Uint8 }], + data + }; + } + + function int8(data: Int8Array): Result { + return { + encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Int8 }], + data: new Uint8Array(data.buffer, data.byteOffset) + }; + } + + const writers = { + [Encoding.IntDataType.Int16]: function (v: DataView, i: number, a: number) { v.setInt16(2 * i, a, true) }, + [Encoding.IntDataType.Uint16]: function (v: DataView, i: number, a: number) { v.setUint16(2 * i, a, true) }, + [Encoding.IntDataType.Int32]: function (v: DataView, i: number, a: number) { v.setInt32(4 * i, a, true) }, + [Encoding.IntDataType.Uint32]: function (v: DataView, i: number, a: number) { v.setUint32(4 * i, a, true) }, + [Encoding.FloatDataType.Float32]: function (v: DataView, i: number, a: number) { v.setFloat32(4 * i, a, true) }, + [Encoding.FloatDataType.Float64]: function (v: DataView, i: number, a: number) { v.setFloat64(8 * i, a, true) } + } + + const byteSizes = { + [Encoding.IntDataType.Int16]: 2, + [Encoding.IntDataType.Uint16]: 2, + [Encoding.IntDataType.Int32]: 4, + [Encoding.IntDataType.Uint32]: 4, + [Encoding.FloatDataType.Float32]: 4, + [Encoding.FloatDataType.Float64]: 8 + } + + export function byteArray(data: Encoding.FloatArray | Encoding.IntArray) { + let type = Encoding.getDataType(data); + + if (type === Encoding.IntDataType.Int8) return int8(data as Int8Array); + else if (type === Encoding.IntDataType.Uint8) return uint8(data as Uint8Array); + + let result = new Uint8Array(data.length * byteSizes[type]); + let w = writers[type]; + let view = new DataView(result.buffer); + for (let i = 0, n = data.length; i < n; i++) { + w(view, i, data[i]); + } + return { + encodings: [<Encoding.ByteArray>{ kind: 'ByteArray', type }], + data: result + }; + } + + function _fixedPoint(data: Encoding.FloatArray, factor: number): Result { + let srcType = Encoding.getDataType(data) as Encoding.FloatDataType; + let result = new Int32Array(data.length); + for (let i = 0, n = data.length; i < n; i++) { + result[i] = Math.round(data[i] * factor); + } + return { + encodings: [{ kind: 'FixedPoint', factor, srcType }], + data: result + }; + } + export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.FloatArray, factor); } + + function _intervalQuantizaiton(data: Encoding.FloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray): Result { + let srcType = Encoding.getDataType(data) as Encoding.FloatDataType; + if (!data.length) { + return { + encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }], + data: new Int32Array(0) + }; + } + + if (max < min) { + let t = min; + min = max; + max = t; + } + + let delta = (max - min) / (numSteps - 1); + + let output = new arrayType(data.length); + for (let i = 0, n = data.length; i < n; i++) { + let v = data[i]; + if (v <= min) output[i] = 0; + else if (v >= max) output[i] = numSteps; + else output[i] = (Math.round((v - min) / delta)) | 0; + } + + return { + encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }], + data: output + }; + } + export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray = Int32Array): Provider { + return data => _intervalQuantizaiton(data as Encoding.FloatArray, min, max, numSteps, arrayType); + } + + export function runLength(data: Encoding.IntArray): Result { + let srcType = Encoding.getDataType(data) as Encoding.IntDataType; + if (srcType === void 0) { + data = new Int32Array(data); + srcType = Encoding.IntDataType.Int32; + } + + if (!data.length) { + return { + encodings: [{ kind: 'RunLength', srcType, srcSize: 0 }], + data: new Int32Array(0) + }; + } + + // calculate output size + let fullLength = 2; + for (let i = 1, il = data.length; i < il; i++) { + if (data[i - 1] !== data[i]) { + fullLength += 2; + } + } + let output = new Int32Array(fullLength); + let offset = 0; + let runLength = 1; + for (let i = 1, il = data.length; i < il; i++) { + if (data[i - 1] !== data[i]) { + output[offset] = data[i - 1]; + output[offset + 1] = runLength; + runLength = 1; + offset += 2; + } else { + ++runLength; + } + } + output[offset] = data[data.length - 1]; + output[offset + 1] = runLength; + return { + encodings: [{ kind: 'RunLength', srcType, srcSize: data.length }], + data: output + }; + } + + export function delta(data: Int8Array | Int16Array | Int32Array): Result { + if (!Encoding.isSignedIntegerDataType(data)) { + throw new Error('Only signed integer types can be encoded using delta encoding.'); + } + + let srcType = Encoding.getDataType(data) as Encoding.IntDataType; + if (srcType === void 0) { + data = new Int32Array(data); + srcType = Encoding.IntDataType.Int32; + } + if (!data.length) { + return { + encodings: [{ kind: 'Delta', origin: 0, srcType }], + data: new (data as any).constructor(0) + }; + } + + let output = new (data as any).constructor(data.length); + let origin = data[0]; + output[0] = data[0]; + for (let i = 1, n = data.length; i < n; i++) { + output[i] = data[i] - data[i - 1]; + } + output[0] = 0; + return { + encodings: [{ kind: 'Delta', origin, srcType }], + data: output + }; + } + + function isSigned(data: Int32Array) { + for (let i = 0, n = data.length; i < n; i++) { + if (data[i] < 0) return true; + } + return false; + } + + function packingSize(data: Int32Array, upperLimit: number) { + let lowerLimit = -upperLimit - 1; + let size = 0; + for (let i = 0, n = data.length; i < n; i++) { + let value = data[i]; + if (value === 0) { + size += 1; + } else if (value > 0) { + size += Math.ceil(value / upperLimit); + if (value % upperLimit === 0) size += 1; + } else { + size += Math.ceil(value / lowerLimit); + if (value % lowerLimit === 0) size += 1; + } + } + return size; + } + + function determinePacking(data: Int32Array): { isSigned: boolean, size: number, bytesPerElement: number } { + let signed = isSigned(data); + let size8 = signed ? packingSize(data, 0x7F) : packingSize(data, 0xFF); + let size16 = signed ? packingSize(data, 0x7FFF) : packingSize(data, 0xFFFF); + + if (data.length * 4 < size16 * 2) { + // 4 byte packing is the most effective + return { + isSigned: signed, + size: data.length, + bytesPerElement: 4 + }; + } else if (size16 * 2 < size8) { + // 2 byte packing is the most effective + return { + isSigned: signed, + size: size16, + bytesPerElement: 2 + } + } else { + // 1 byte packing is the most effective + return { + isSigned: signed, + size: size8, + bytesPerElement: 1 + } + }; + } + + function _integerPacking(data: Int32Array, packing: { isSigned: boolean, size: number, bytesPerElement: number }): Result { + let upperLimit = packing.isSigned + ? (packing.bytesPerElement === 1 ? 0x7F : 0x7FFF) + : (packing.bytesPerElement === 1 ? 0xFF : 0xFFFF); + + let lowerLimit = -upperLimit - 1; + let n = data.length; + let packed = packing.isSigned + ? packing.bytesPerElement === 1 ? new Int8Array(packing.size) : new Int16Array(packing.size) + : packing.bytesPerElement === 1 ? new Uint8Array(packing.size) : new Uint16Array(packing.size); + let j = 0; + for (let i = 0; i < n; i++) { + let value = data[i]; + if (value >= 0) { + while (value >= upperLimit) { + packed[j] = upperLimit; + ++j; + value -= upperLimit; + } + } else { + while (value <= lowerLimit) { + packed[j] = lowerLimit; + ++j; + value -= lowerLimit; + } + } + packed[j] = value; + ++j; + } + + let result = byteArray(packed); + return { + encodings: [{ + kind: 'IntegerPacking', + byteCount: packing.bytesPerElement, + isUnsigned: !packing.isSigned, + srcSize: n + }, + result.encodings[0] + ], + data: result.data + }; + } + + /** + * Packs Int32 array. The packing level is determined automatically to either 1-, 2-, or 4-byte words. + */ + export function integerPacking(data: Int32Array): Result { + if (!(data instanceof Int32Array)) { + throw new Error('Integer packing can only be applied to Int32 data.'); + } + + let packing = determinePacking(data); + + if (packing.bytesPerElement === 4) { + // no packing done, Int32 encoding will be used + return byteArray(data); + } + + return _integerPacking(data, packing); + } + + export function stringArray(data: string[]): Result { + let map: any = Object.create(null); + let strings: string[] = []; + let accLength = 0; + let offsets = ChunkedArray.create<number>(s => new Int32Array(s), 1, 1024, true) + let output = new Int32Array(data.length); + + ChunkedArray.add(offsets, 0); + let i = 0; + for (let s of data) { + // handle null strings. + if (s === null || s === void 0) { + output[i++] = -1; + continue; + } + + let index = map[s]; + if (index === void 0) { + // increment the length + accLength += s.length; + + // store the string and index + index = strings.length; + strings[index] = s; + map[s] = index; + + // write the offset + ChunkedArray.add(offsets, accLength); + } + output[i++] = index; + } + + let encOffsets = ArrayEncoder.by(delta).and(integerPacking).encode(ChunkedArray.compact(offsets)); + let encOutput = ArrayEncoder.by(delta).and(runLength).and(integerPacking).encode(output); + + return { + encodings: [{ kind: 'StringArray', dataEncoding: encOutput.encoding, stringData: strings.join(''), offsetEncoding: encOffsets.encoding, offsets: encOffsets.data }], + data: encOutput.data + }; + } +} \ No newline at end of file diff --git a/src/mol-io/reader/cif/binary/decoder.ts b/src/mol-io/common/binary-cif/decoder.ts similarity index 100% rename from src/mol-io/reader/cif/binary/decoder.ts rename to src/mol-io/common/binary-cif/decoder.ts diff --git a/src/mol-io/reader/cif/binary/encoding.ts b/src/mol-io/common/binary-cif/encoding.ts similarity index 100% rename from src/mol-io/reader/cif/binary/encoding.ts rename to src/mol-io/common/binary-cif/encoding.ts diff --git a/src/mol-io/utils/msgpack/decode.ts b/src/mol-io/common/msgpack/decode.ts similarity index 100% rename from src/mol-io/utils/msgpack/decode.ts rename to src/mol-io/common/msgpack/decode.ts diff --git a/src/mol-io/utils/msgpack/encode.ts b/src/mol-io/common/msgpack/encode.ts similarity index 100% rename from src/mol-io/utils/msgpack/encode.ts rename to src/mol-io/common/msgpack/encode.ts diff --git a/src/mol-io/utils/utf8.ts b/src/mol-io/common/utf8.ts similarity index 100% rename from src/mol-io/utils/utf8.ts rename to src/mol-io/common/utf8.ts diff --git a/src/mol-io/reader/cif/binary/field.ts b/src/mol-io/reader/cif/binary/field.ts index c11f1909c565f9807a11d53ab239716f8441a276..5847e0ab3aabe2621abe65fb7d8f5116767545f6 100644 --- a/src/mol-io/reader/cif/binary/field.ts +++ b/src/mol-io/reader/cif/binary/field.ts @@ -6,8 +6,7 @@ import { Column, ColumnHelpers } from 'mol-base/collections/database' import * as Data from '../data-model' -import { EncodedColumn } from './encoding' -import decode from './decoder' +import { EncodedColumn, decode } from '../../../common/binary-cif' import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser' function wrap(o: Data.Field) { @@ -58,22 +57,4 @@ export default function Field(column: EncodedColumn): Data.Field { ? params => ColumnHelpers.typedArrayWindow(data, params) : params => ColumnHelpers.createAndFillArray(rowCount, float, params) }); -} - -// return wrap({ -// '@array': data, -// isDefined: true, -// rowCount, -// str: str as any, -// int, -// float, -// valueKind, -// areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB], -// toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), -// toIntArray: isNumeric -// ? params => ColumnHelpers.typedArrayWindow(data, params) -// : params => ColumnHelpers.createAndFillArray(rowCount, int, params), -// toFloatArray: isNumeric -// ? params => ColumnHelpers.typedArrayWindow(data, params) -// : params => ColumnHelpers.createAndFillArray(rowCount, float, params) -// }); \ No newline at end of file +} \ No newline at end of file diff --git a/src/mol-io/reader/cif/binary/parser.ts b/src/mol-io/reader/cif/binary/parser.ts index 0d0614a7692608c39befd53af1e1c0471a4b1fb2..03b9e71c7548e6c578f233ebb0ac94ed1486d591 100644 --- a/src/mol-io/reader/cif/binary/parser.ts +++ b/src/mol-io/reader/cif/binary/parser.ts @@ -5,10 +5,10 @@ */ import * as Data from '../data-model' -import * as Encoding from './encoding' +import { EncodedCategory, EncodedFile } from '../../../common/binary-cif' import Field from './field' import Result from '../../result' -import decodeMsgPack from '../../../utils/msgpack/decode' +import decodeMsgPack from '../../../common/msgpack/decode' import Computation from 'mol-base/computation' function checkVersions(min: number[], current: number[]) { @@ -18,12 +18,14 @@ function checkVersions(min: number[], current: number[]) { return true; } -function Category(data: Encoding.EncodedCategory): Data.Category { +function Category(data: EncodedCategory): Data.Category { const map = Object.create(null); const cache = Object.create(null); for (const col of data.columns) map[col.name] = col; return { rowCount: data.rowCount, + name: data.name, + fieldNames: data.columns.map(c => c.name), getField(name) { const col = map[name]; if (!col) return void 0; @@ -39,7 +41,7 @@ export default function parse(data: Uint8Array) { const minVersion = [0, 3]; try { - const unpacked = decodeMsgPack(data) as Encoding.EncodedFile; + const unpacked = decodeMsgPack(data) as EncodedFile; if (!checkVersions(minVersion, unpacked.version.match(/(\d)\.(\d)\.\d/)!.slice(1).map(v => +v))) { return Result.error<Data.File>(`Unsupported format version. Current ${unpacked.version}, required ${minVersion.join('.')}.`); } diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index 2f72463e96da85d12d9534c973ea77d88d09e96e..4d2ffdf7fb628584722603b8bbeeca4b50c08193 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -39,15 +39,19 @@ export type Categories = { readonly [name: string]: Category } export interface Category { readonly rowCount: number, + readonly name: string, + readonly fieldNames: ReadonlyArray<string>, getField(name: string): Field | undefined } -export function Category(rowCount: number, fields: { [name: string]: Field }): Category { - return { rowCount, getField(name) { return fields[name]; } }; +export function Category(name: string, rowCount: number, fieldNames: string[], fields: { [name: string]: Field }): Category { + return { rowCount, name, fieldNames: [...fieldNames], getField(name) { return fields[name]; } }; } export namespace Category { - export const Empty: Category = { rowCount: 0, getField(name: string) { return void 0; } }; + export function empty(name: string): Category { + return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } }; + }; } /** diff --git a/src/mol-io/reader/cif/schema.ts b/src/mol-io/reader/cif/schema.ts index c4b0dc333dcb3b48e2212b6cf5f980164462ce04..ef1e95ca8a500991530db6e80b88f0c49e089f26 100644 --- a/src/mol-io/reader/cif/schema.ts +++ b/src/mol-io/reader/cif/schema.ts @@ -88,5 +88,5 @@ function createDatabase(schema: Database.Schema, frame: Data.Frame): Database<an function createTable(key: string, schema: Table.Schema, frame: Data.Frame) { const cat = frame.categories[key[0] === '_' ? key : '_' + key]; - return new CategoryTable(cat || Data.Category.Empty, schema, !!cat); + return new CategoryTable(cat || Data.Category.empty(key), schema, !!cat); } \ No newline at end of file diff --git a/src/mol-io/reader/cif/text/parser.ts b/src/mol-io/reader/cif/text/parser.ts index cf705c7aefc6f0eccf8a2d167564bb23b3de47f5..71643b34435014f8d3e2974ba7c482360c5625c1 100644 --- a/src/mol-io/reader/cif/text/parser.ts +++ b/src/mol-io/reader/cif/text/parser.ts @@ -418,6 +418,7 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D const nsStart = tokenizer.tokenStart, nsEnd = getNamespaceEnd(tokenizer); const name = getNamespace(tokenizer, nsEnd); const fields = Object.create(null); + const fieldNames: string[] = []; let readingNames = true; while (readingNames) { @@ -436,10 +437,11 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D } } fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 }, 1); + fieldNames[fieldNames.length] = fieldName; moveNext(tokenizer); } - categories[name] = Data.Category(1, fields); + categories[name] = Data.Category(name.substr(1), 1, fieldNames, fields); return { hasError: false, @@ -517,7 +519,7 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string fields[fieldNames[i]] = Field(tokens[i], rowCount); } - categories[name] = Data.Category(rowCount, fields); + categories[name] = Data.Category(name.substr(1), rowCount, fieldNames, fields); return { hasError: false, diff --git a/src/mol-io/writer/cif/encoder.ts b/src/mol-io/writer/cif/encoder.ts index c1219065ea07b06f292c16a27b6dd26b1af7a9da..64a18622ff509c86fe1ae394c239b9bf7915f898 100644 --- a/src/mol-io/writer/cif/encoder.ts +++ b/src/mol-io/writer/cif/encoder.ts @@ -7,6 +7,7 @@ import Iterator from 'mol-base/collections/iterator' import { Column } from 'mol-base/collections/database' import Encoder from '../encoder' +//import { ArrayEncoder, ArrayEncoding as E } from '../../common/binary-cif' export const enum FieldType { Str, Int, Float @@ -19,15 +20,24 @@ export interface FieldDefinition<Key = any, Data = any> { valueKind?: (key: Key, data: Data) => Column.ValueKind /** determine whether to include this field base on the context */ - shouldInclude?: (data: Data) => boolean + // TODO: + // shouldInclude?: (data: Data) => boolean } export interface FieldFormat { - decimalPlaces: number + // TODO + // textDecimalPlaces: number, + // stringEncoder: ArrayEncoder, + // numericEncoder: ArrayEncoder, + // typedArray?: E.TypedArrayCtor } export namespace FieldFormat { - export const Default: FieldFormat = { decimalPlaces: 3 }; + export const Default: FieldFormat = { + // textDecimalPlaces: 3, + // stringEncoder: ArrayEncoder.by(E.stringArray), + // numericEncoder: ArrayEncoder.by(E.byteArray) + }; } export interface CategoryDefinition<Key = any, Data = any> { diff --git a/src/mol-io/writer/cif/encoder/binary.ts b/src/mol-io/writer/cif/encoder/binary.ts new file mode 100644 index 0000000000000000000000000000000000000000..54968e5fb06c18a28e3c496f5f03b4846ce8c4ab --- /dev/null +++ b/src/mol-io/writer/cif/encoder/binary.ts @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js) + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Iterator from 'mol-base/collections/iterator' +import { Column } from 'mol-base/collections/database' +import encodeMsgPack from '../../../common/msgpack/encode' +import { + EncodedColumn, EncodedData, EncodedFile, EncodedDataBlock, EncodedCategory, ArrayEncoder, ArrayEncoding as E, VERSION +} from '../../../common/binary-cif' +import { FieldDefinition, FieldFormat, FieldType, CategoryProvider, CIFEncoder } from '../encoder' +import Writer from '../../writer' + +export default class BinaryCIFWriter<Context> implements CIFEncoder<Uint8Array, Context> { + private data: EncodedFile; + private dataBlocks: EncodedDataBlock[] = []; + private encodedData: Uint8Array; + + startDataBlock(header: string) { + this.dataBlocks.push({ + header: (header || '').replace(/[ \n\t]/g, '').toUpperCase(), + categories: [] + }); + } + + writeCategory(category: CategoryProvider, contexts?: Context[]) { + if (!this.data) { + throw new Error('The writer contents have already been encoded, no more writing.'); + } + + if (!this.dataBlocks.length) { + throw new Error('No data block created.'); + } + + const src = !contexts || !contexts.length ? [category(<any>void 0)] : contexts.map(c => category(c)); + const categories = src.filter(c => c && c.rowCount > 0); + if (!categories.length) return; + + const count = categories.reduce((a, c) => a + c.rowCount, 0); + if (!count) return; + + const first = categories[0]!; + const cat: EncodedCategory = { name: '_' + first.definition.name, columns: [], rowCount: count }; + const data = categories.map(c => ({ data: c.data, keys: () => c.keys() })); + for (const f of first.definition.fields) { + cat.columns.push(encodeField(f, data, count, FieldFormat.Default)); + } + this.dataBlocks[this.dataBlocks.length - 1].categories.push(cat); + } + + encode() { + if (this.encodedData) return; + this.encodedData = encodeMsgPack(this.data); + this.data = <any>null; + this.dataBlocks = <any>null; + } + + writeTo(writer: Writer<Uint8Array>) { + writer.write(this.encodedData); + } + + getData() { + this.encode(); + return this.encodedData; + } + + constructor(encoder: string) { + this.data = { + encoder, + version: VERSION, + dataBlocks: this.dataBlocks + }; + } +} + +function encodeField(field: FieldDefinition, data: { data: any, keys: () => Iterator<any> }[], totalCount: number, format: FieldFormat): EncodedColumn { + const isStr = field.type === FieldType.Str + let array: any[], encoder: ArrayEncoder; + + if (isStr) { + array = new Array(totalCount); + encoder = ArrayEncoder.by(E.stringArray); //format.stringEncoder; + } else { + //array = format.typedArray ? new format.typedArray(totalCount) as any : field.type === FieldType.Int ? new Int32Array(totalCount) : new Float32Array(totalCount); + array = (field.type === FieldType.Int ? new Int32Array(totalCount) : new Float32Array(totalCount)) as any; + encoder = ArrayEncoder.by(E.byteArray); + } + + const mask = new Uint8Array(totalCount); + const valueKind = field.valueKind; + const getter = field.value; + let allPresent = true; + + let offset = 0; + for (let _d = 0; _d < data.length; _d++) { + const d = data[_d].data; + const keys = data[_d].keys(); + while (keys.hasNext) { + const key = keys.move(); + const p = valueKind ? valueKind(key, d) : Column.ValueKind.Present; + if (p !== Column.ValueKind.Present) { + mask[offset] = p; + if (isStr) array[offset] = ''; + allPresent = false; + } else { + mask[offset] = Column.ValueKind.Present; + array[offset] = getter(key, d); + } + offset++; + } + } + + const encoded = encoder.encode(array); + + let maskData: EncodedData | undefined = void 0; + + if (!allPresent) { + const maskRLE = ArrayEncoder.by(E.runLength).and(E.byteArray).encode(mask); + if (maskRLE.data.length < mask.length) { + maskData = maskRLE; + } else { + maskData = ArrayEncoder.by(E.byteArray).encode(mask); + } + } + + return { + name: field.name, + data: encoded, + mask: maskData + }; +} \ No newline at end of file diff --git a/src/perf-tests/structure.ts b/src/perf-tests/structure.ts index af1fa2f27e50f8b991b5c2cb7d86bcc2aceaef89..60d27832cb793fcc86fafeb166f5729735964e23 100644 --- a/src/perf-tests/structure.ts +++ b/src/perf-tests/structure.ts @@ -13,7 +13,7 @@ import CIF from 'mol-io/reader/cif' import { Structure, Model, Queries as Q, Atom, AtomSet, Selection } from 'mol-data/structure' import { OrderedSet as OrdSet, Segmentation } from 'mol-base/collections/integer' -import toMmCIFString from 'mol-data/structure/export/mmcif' +import to_mmCIF from 'mol-data/structure/export/mmcif' require('util.promisify').shim(); const readFileAsync = util.promisify(fs.readFile); @@ -237,10 +237,15 @@ export namespace PropertyAccess { // } export async function run() { - //const { structures, models } = await readCIF('./examples/1cbs_full.bcif'); - const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif'); + const { structures, models } = await readCIF('./examples/1cbs_full.bcif'); + //const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif'); //const { structures, models } = await readCIF('e:/test/quick/3j3q_updated.cif'); + //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif'); + + // fs.writeFileSync('e:/test/molstar/3j3q.bcif', to_mmCIF('test', structures[0], true)); + // return; + // console.log(toMmCIFString('test', structures[0])); // return; @@ -280,8 +285,8 @@ export namespace PropertyAccess { chainTest: Q.pred.inSet(P.chain.auth_asym_id, ['A', 'B', 'C', 'D']), residueTest: Q.pred.eq(P.residue.auth_comp_id, 'ALA') }); - // const q0r = q(structures[0]); - // console.log(toMmCIFString('test', Selection.union(q0r))); + const q0r = q(structures[0]); + console.log(to_mmCIF('test', Selection.union(q0r))); console.time('q1') q1(structures[0]);