diff --git a/examples/1cbs_full.bcif b/examples/1cbs_full.bcif new file mode 100644 index 0000000000000000000000000000000000000000..38628c3dafaae093d7f5f6689f4bc471ef826b27 Binary files /dev/null and b/examples/1cbs_full.bcif differ diff --git a/src/reader/cif/binary/decoder.ts b/src/reader/cif/binary/decoder.ts new file mode 100644 index 0000000000000000000000000000000000000000..f6c587f37211c5f161500b997920e6b1861a5084 --- /dev/null +++ b/src/reader/cif/binary/decoder.ts @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * From CIFTools.js + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Encoding, EncodedData } from './encoding' + +/** + * Fixed point, delta, RLE, integer packing adopted from https://github.com/rcsb/mmtf-javascript/ + * by Alexander Rose <alexander.rose@weirdbyte.de>, MIT License, Copyright (c) 2016 + */ + +export default function decode(data: EncodedData): any[] { + let current: any = data.data; + for (let i = data.encoding.length - 1; i >= 0; i--) { + current = decodeStep(current, data.encoding[i]); + } + return current as any[]; +} + +function decodeStep(data: any, encoding: Encoding): any { + switch (encoding.kind) { + case 'ByteArray': { + switch (encoding.type) { + case Encoding.IntDataType.Uint8: return data; + case Encoding.IntDataType.Int8: return int8(data); + case Encoding.IntDataType.Int16: return int16(data); + case Encoding.IntDataType.Uint16: return uint16(data); + case Encoding.IntDataType.Int32: return int32(data); + case Encoding.IntDataType.Uint32: return uint32(data); + case Encoding.FloatDataType.Float32: return float32(data); + case Encoding.FloatDataType.Float64: return float64(data); + default: throw new Error('Unsupported ByteArray type.') + } + } + case 'FixedPoint': return fixedPoint(data, encoding); + case 'IntervalQuantization': return intervalQuantization(data, encoding); + case 'RunLength': return runLength(data, encoding); + case 'Delta': return delta(data, encoding); + case 'IntegerPacking': return integerPacking(data, encoding); + case 'StringArray': return stringArray(data, encoding); + } +} + +function getIntArray(type: Encoding.IntDataType, size: number) { + switch (type) { + case Encoding.IntDataType.Int8: return new Int8Array(size); + case Encoding.IntDataType.Int16: return new Int16Array(size); + case Encoding.IntDataType.Int32: return new Int32Array(size); + case Encoding.IntDataType.Uint8: return new Uint8Array(size); + case Encoding.IntDataType.Uint16: return new Uint16Array(size); + case Encoding.IntDataType.Uint32: return new Uint32Array(size); + default: throw new Error('Unsupported integer data type.'); + } +} + +function getFloatArray(type: Encoding.FloatDataType, size: number) { + switch (type) { + case Encoding.FloatDataType.Float32: return new Float32Array(size); + case Encoding.FloatDataType.Float64: return new Float64Array(size); + default: throw new Error('Unsupported floating data type.'); + } +} + +/* http://stackoverflow.com/questions/7869752/javascript-typed-arrays-and-endianness */ +const isLittleEndian = (function () { + const arrayBuffer = new ArrayBuffer(2); + const uint8Array = new Uint8Array(arrayBuffer); + const uint16array = new Uint16Array(arrayBuffer); + uint8Array[0] = 0xAA; + uint8Array[1] = 0xBB; + if (uint16array[0] === 0xBBAA) return true; + return false; +})(); + +function int8(data: Uint8Array) { return new Int8Array(data.buffer, data.byteOffset); } + +function flipByteOrder(data: Uint8Array, bytes: number) { + let buffer = new ArrayBuffer(data.length); + let ret = new Uint8Array(buffer); + for (let i = 0, n = data.length; i < n; i += bytes) { + for (let j = 0; j < bytes; j++) { + ret[i + bytes - j - 1] = data[i + j]; + } + } + return buffer; +} + +function view<T>(data: Uint8Array, byteSize: number, c: new (buffer: ArrayBuffer) => T) { + if (isLittleEndian) return new c(data.buffer); + return new c(flipByteOrder(data, byteSize)); +} + +function int16(data: Uint8Array) { return view(data, 2, Int16Array); } +function uint16(data: Uint8Array) { return view(data, 2, Uint16Array); } +function int32(data: Uint8Array) { return view(data, 4, Int32Array); } +function uint32(data: Uint8Array) { return view(data, 4, Uint32Array); } +function float32(data: Uint8Array) { return view(data, 4, Float32Array); } +function float64(data: Uint8Array) { return view(data, 8, Float64Array); } + +function fixedPoint(data: Int32Array, encoding: Encoding.FixedPoint) { + let n = data.length; + let output = getFloatArray(encoding.srcType, n); + let f = 1 / encoding.factor; + for (let i = 0; i < n; i++) { + output[i] = f * data[i]; + } + return output; +} + +function intervalQuantization(data: Int32Array, encoding: Encoding.IntervalQuantization) { + let n = data.length; + let output = getFloatArray(encoding.srcType, n); + let delta = (encoding.max - encoding.min) / (encoding.numSteps - 1) + let min = encoding.min; + for (let i = 0; i < n; i++) { + output[i] = min + delta * data[i]; + } + return output; +} + +function runLength(data: Int32Array, encoding: Encoding.RunLength) { + let output = getIntArray(encoding.srcType, encoding.srcSize); + let dataOffset = 0; + for (let i = 0, il = data.length; i < il; i += 2) { + let value = data[i]; // value to be repeated + let length = data[i + 1]; // number of repeats + for (let j = 0; j < length; ++j) { + output[dataOffset++] = value; + } + } + return output; +} + +function delta(data: (Int8Array | Int16Array | Int32Array), encoding: Encoding.Delta) { + let n = data.length; + let output = getIntArray(encoding.srcType, n); + if (!n) return output; + output[0] = data[0] + (encoding.origin | 0); + for (let i = 1; i < n; ++i) { + output[i] = data[i] + output[i - 1]; + } + return output; +} + +function integerPackingSigned(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) { + let upperLimit = encoding.byteCount === 1 ? 0x7F : 0x7FFF; + let lowerLimit = -upperLimit - 1; + let n = data.length; + let output = new Int32Array(encoding.srcSize); + let i = 0; + let j = 0; + while (i < n) { + let value = 0, t = data[i]; + while (t === upperLimit || t === lowerLimit) { + value += t; + i++; + t = data[i]; + } + value += t; + output[j] = value; + i++; + j++; + } + return output; +} + +function integerPackingUnsigned(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) { + let upperLimit = encoding.byteCount === 1 ? 0xFF : 0xFFFF; + let n = data.length; + let output = new Int32Array(encoding.srcSize); + let i = 0; + let j = 0; + while (i < n) { + let value = 0, t = data[i]; + while (t === upperLimit) { + value += t; + i++; + t = data[i]; + } + value += t; + output[j] = value; + i++; + j++; + } + return output; +} + +function integerPacking(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) { + return encoding.isUnsigned ? integerPackingUnsigned(data, encoding) : integerPackingSigned(data, encoding); +} + +function stringArray(data: Uint8Array, encoding: Encoding.StringArray) { + let str = encoding.stringData; + let offsets = decode({ encoding: encoding.offsetEncoding, data: encoding.offsets }); + let indices = decode({ encoding: encoding.dataEncoding, data }); + let cache: any = Object.create(null); + let result = new Array(indices.length); + let offset = 0; + for (let i of indices) { + if (i < 0) { + result[offset++] = null; + continue; + } + let v = cache[i]; + if (v === void 0) { + v = str.substring(offsets[i], offsets[i + 1]); + cache[i] = v; + } + result[offset++] = v; + } + return result; +} \ No newline at end of file diff --git a/src/reader/cif/binary/encoding.ts b/src/reader/cif/binary/encoding.ts new file mode 100644 index 0000000000000000000000000000000000000000..c9369e96445ae0aec351c990a3b5f20cedbaa17e --- /dev/null +++ b/src/reader/cif/binary/encoding.ts @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * From CIFTools.js + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export const VERSION = '0.3.0'; + +export type Encoding = + | Encoding.ByteArray + | Encoding.FixedPoint + | Encoding.RunLength + | Encoding.Delta + | Encoding.IntervalQuantization + | Encoding.IntegerPacking + | Encoding.StringArray; + +export interface EncodedFile { + version: string, + encoder: string, + dataBlocks: EncodedDataBlock[] +} + +export interface EncodedDataBlock { + header: string, + categories: EncodedCategory[], +} + +export interface EncodedCategory { + name: string, + rowCount: number, + columns: EncodedColumn[], +} + +export interface EncodedColumn { + name: string, + data: EncodedData, + + /** + * The mask represents the presence or absent of particular "CIF value". + * If the mask is not set, every value is present. + * + * 0 = Value is present + * 1 = . = value not specified + * 2 = ? = value unknown + */ + mask?: EncodedData +} + +export interface EncodedData { + encoding: Encoding[], + data: Uint8Array +} + +export namespace Encoding { + + export const enum IntDataType { + Int8 = 1, + Int16 = 2, + Int32 = 3, + Uint8 = 4, + Uint16 = 5, + Uint32 = 6, + } + + export const enum FloatDataType { + Float32 = 32, + Float64 = 33 + } + + export type DataType = IntDataType | FloatDataType + + export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array + export type FloatArray = Float32Array | Float64Array + + export function getDataType(data: IntArray | FloatArray): DataType { + let srcType: DataType; + if (data instanceof Int8Array) srcType = Encoding.IntDataType.Int8; + else if (data instanceof Int16Array) srcType = Encoding.IntDataType.Int16; + else if (data instanceof Int32Array) srcType = Encoding.IntDataType.Int32; + else if (data instanceof Uint8Array) srcType = Encoding.IntDataType.Uint8; + else if (data instanceof Uint16Array) srcType = Encoding.IntDataType.Uint16; + else if (data instanceof Uint32Array) srcType = Encoding.IntDataType.Uint32; + else if (data instanceof Float32Array) srcType = Encoding.FloatDataType.Float32; + else if (data instanceof Float64Array) srcType = Encoding.FloatDataType.Float64; + else throw new Error('Unsupported integer data type.'); + return srcType; + } + + export function isSignedIntegerDataType(data: IntArray) { + return data instanceof Int8Array || data instanceof Int16Array || data instanceof Int32Array; + } + + // type[] -> Uint8[] + export interface ByteArray { + kind: 'ByteArray', + type: DataType + } + + // (Float32 | Float64)[] -> Int32[] + export interface FixedPoint { + kind: 'FixedPoint', + factor: number, + srcType: FloatDataType + } + + // (Float32|Float64)[] -> Int32 + export interface IntervalQuantization { + kind: 'IntervalQuantization', + min: number, + max: number, + numSteps: number, + srcType: FloatDataType + } + + // (Uint8 | Int8 | Int16 | Int32)[] -> Int32[] + export interface RunLength { + kind: 'RunLength', + srcType: IntDataType, + srcSize: number + } + + // T=(Int8Array | Int16Array | Int32Array)[] -> T[] + export interface Delta { + kind: 'Delta', + origin: number, + srcType: IntDataType + } + + // Int32[] -> (Int8 | Int16 | Uint8 | Uint16)[] + export interface IntegerPacking { + kind: 'IntegerPacking', + byteCount: number, + isUnsigned: boolean, + srcSize: number + } + + // string[] -> Uint8[] + // stores 0 and indices of ends of strings: + // stringData = '123456' + // offsets = [0,2,5,6] + // encodes ['12','345','6'] + export interface StringArray { + kind: 'StringArray', + dataEncoding: Encoding[], + stringData: string, + offsetEncoding: Encoding[], + offsets: Uint8Array + } + +} \ No newline at end of file diff --git a/src/reader/cif/binary/field.ts b/src/reader/cif/binary/field.ts index 0ffdd02fcbce683e436c0030ffe0517135c6ceda..974dd522b05f41515bf7b664d906911d3b41e0db 100644 --- a/src/reader/cif/binary/field.ts +++ b/src/reader/cif/binary/field.ts @@ -1 +1,53 @@ -// TODO \ No newline at end of file +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Column from '../../common/column' +import * as Data from '../data-model' +import { EncodedColumn } from './encoding' +import decode from './decoder' +import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser' + +export default function Field(column: EncodedColumn): Data.Field { + const mask = column.mask ? decode(column.mask) as number[] : void 0; + const data = decode(column.data); + const isNumeric = (data as any).buffer && (data as any).byteLength && (data as any).BYTES_PER_ELEMENT; + + const str: Data.Field['str'] = isNumeric + ? mask + ? row => mask[row] === Data.ValuePresence.Present ? '' + data[row] : '' + : row => '' + data[row] + : mask + ? row => mask[row] === Data.ValuePresence.Present ? data[row] : '' + : row => data[row]; + + const int: Data.Field['int'] = isNumeric + ? row => data[row] + : row => { const v = data[row]; return fastParseInt(v, 0, v.length); }; + + const float: Data.Field['float'] = isNumeric + ? row => data[row] + : row => { const v = data[row]; return fastParseFloat(v, 0, v.length); }; + + const presence: Data.Field['presence'] = mask + ? row => mask[row] + : row => Data.ValuePresence.Present; + + const rowCount = data.length; + + return { + isDefined: true, + rowCount, + str, + int, + float, + presence, + areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB], + stringEquals(row, v) { return str(row) === v; }, + toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); }, + toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); }, + toFloatArray(params) { return Column.createAndFillArray(rowCount, float, params); } + }; +} \ No newline at end of file diff --git a/src/reader/cif/binary/parser.ts b/src/reader/cif/binary/parser.ts index 0ffdd02fcbce683e436c0030ffe0517135c6ceda..1cdbd04b1f16d02bea1c883a637f8a9bd91eb81a 100644 --- a/src/reader/cif/binary/parser.ts +++ b/src/reader/cif/binary/parser.ts @@ -1 +1,49 @@ -// TODO \ No newline at end of file +/* + * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as Data from '../data-model' +import * as Encoding from './encoding' +import Field from './field' +import Result from '../../result' +import decodeMsgPack from '../../../utils/msgpack/decode' + +function checkVersions(min: number[], current: number[]) { + for (let i = 0; i < 2; i++) { + if (min[i] > current[i]) return false; + } + return true; +} + +function Category(data: Encoding.EncodedCategory): Data.Category { + const map = Object.create(null); + for (const col of data.columns) map[col.name] = col; + return { + rowCount: data.rowCount, + getField(name) { + const col = map[name]; + return col ? Field(col) : Data.DefaultUndefinedField(data.rowCount); + } + } +} + +export default function parse(data: Uint8Array): Result<Data.File> { + const minVersion = [0, 3]; + + try { + const unpacked = decodeMsgPack(data) as Encoding.EncodedFile; + if (!checkVersions(minVersion, unpacked.version.match(/(\d)\.(\d)\.\d/)!.slice(1).map(v => +v))) { + return Result.error<Data.File>(`Unsupported format version. Current ${unpacked.version}, required ${minVersion.join('.')}.`); + } + const file = Data.File(unpacked.dataBlocks.map(block => { + const cats = Object.create(null); + for (const cat of block.categories) cats[cat.name] = Category(cat); + return Data.Block(cats, block.header); + })); + return Result.success(file); + } catch (e) { + return Result.error<Data.File>('' + e); + } +} \ No newline at end of file diff --git a/src/reader/cif/index.ts b/src/reader/cif/index.ts index c3882b9e823fadc6954691f9adc8f0a61bbde936..e7dcc0f880df91790107951e808c52f2603c721e 100644 --- a/src/reader/cif/index.ts +++ b/src/reader/cif/index.ts @@ -5,14 +5,18 @@ */ import parseText from './text/parser' +import parseBinary from './binary/parser' import { Block } from './data-model' import { apply as applySchema } from './schema' import mmCIF from './schema/mmcif' export default { parseText, + parseBinary, applySchema, schema: { mmCIF: (block: Block) => applySchema(mmCIF, block) } -} \ No newline at end of file +} + +export * from './data-model' \ No newline at end of file diff --git a/src/script.ts b/src/script.ts index 0ac94deb95fa56d622535aa3b3075a89da19ad86..ba82c95d2522de77b017782b321e37100dfe51ac 100644 --- a/src/script.ts +++ b/src/script.ts @@ -72,32 +72,49 @@ export function _gro() { }); } +function runCIF(input: string | Uint8Array) { + console.time('parseCIF'); + const parsed = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input); + console.timeEnd('parseCIF'); + if (parsed.isError) { + console.log(parsed); + return; + } + + const data = parsed.result.blocks[0]; + const atom_site = data.categories._atom_site; + console.log(atom_site.getField('Cartn_x')!.float(0)); + //console.log(atom_site.getField('label_atom_id')!.toStringArray()); + + const mmcif = CIF.schema.mmCIF(data); + console.log(mmcif.atom_site.Cartn_x.value(0)); + console.log(mmcif.entity.type.toArray()); + console.log(mmcif.pdbx_struct_oper_list.matrix.value(0)); +} + export function _cif() { - const path = `./examples/1cbs_updated.cif`; - //const path = 'c:/test/quick/3j3q.cif'; + let path = `./examples/1cbs_updated.cif`; + //path = 'c:/test/quick/3j3q.cif'; fs.readFile(path, 'utf8', function (err, input) { if (err) { return console.log(err); } + console.log('------------------'); + console.log('Text CIF:'); + runCIF(input); + }); - console.time('parseCIF'); - const parsed = CIF.parseText(input); - console.timeEnd('parseCIF'); - if (parsed.isError) { - console.log(parsed); - return; + path = `./examples/1cbs_full.bcif`; + //const path = 'c:/test/quick/3j3q.cif'; + fs.readFile(path, function (err, input) { + if (err) { + return console.log(err); } - - const data = parsed.result.blocks[0]; - - const atom_site = data.categories._atom_site; - console.log(atom_site.getField('Cartn_x')!.float(0)); - //console.log(atom_site.getField('label_atom_id')!.toStringArray()); - - const mmcif = CIF.schema.mmCIF(data); - console.log(mmcif.atom_site.Cartn_x.value(0)); - console.log(mmcif.entity.type.toArray()); - console.log(mmcif.pdbx_struct_oper_list.matrix.value(0)); + console.log('------------------'); + console.log('BinaryCIF:'); + const data = new Uint8Array(input.byteLength); + for (let i = 0; i < input.byteLength; i++) data[i] = input[i]; + runCIF(input); }); }