Skip to content
Snippets Groups Projects
Commit e987d73d authored by David Sehnal's avatar David Sehnal
Browse files

BinaryCIF support

parent 8b1552d3
No related branches found
No related tags found
No related merge requests found
File added
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* From CIFTools.js
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { Encoding, EncodedData } from './encoding'
/**
* Fixed point, delta, RLE, integer packing adopted from https://github.com/rcsb/mmtf-javascript/
* by Alexander Rose <alexander.rose@weirdbyte.de>, MIT License, Copyright (c) 2016
*/
export default function decode(data: EncodedData): any[] {
let current: any = data.data;
for (let i = data.encoding.length - 1; i >= 0; i--) {
current = decodeStep(current, data.encoding[i]);
}
return current as any[];
}
function decodeStep(data: any, encoding: Encoding): any {
switch (encoding.kind) {
case 'ByteArray': {
switch (encoding.type) {
case Encoding.IntDataType.Uint8: return data;
case Encoding.IntDataType.Int8: return int8(data);
case Encoding.IntDataType.Int16: return int16(data);
case Encoding.IntDataType.Uint16: return uint16(data);
case Encoding.IntDataType.Int32: return int32(data);
case Encoding.IntDataType.Uint32: return uint32(data);
case Encoding.FloatDataType.Float32: return float32(data);
case Encoding.FloatDataType.Float64: return float64(data);
default: throw new Error('Unsupported ByteArray type.')
}
}
case 'FixedPoint': return fixedPoint(data, encoding);
case 'IntervalQuantization': return intervalQuantization(data, encoding);
case 'RunLength': return runLength(data, encoding);
case 'Delta': return delta(data, encoding);
case 'IntegerPacking': return integerPacking(data, encoding);
case 'StringArray': return stringArray(data, encoding);
}
}
function getIntArray(type: Encoding.IntDataType, size: number) {
switch (type) {
case Encoding.IntDataType.Int8: return new Int8Array(size);
case Encoding.IntDataType.Int16: return new Int16Array(size);
case Encoding.IntDataType.Int32: return new Int32Array(size);
case Encoding.IntDataType.Uint8: return new Uint8Array(size);
case Encoding.IntDataType.Uint16: return new Uint16Array(size);
case Encoding.IntDataType.Uint32: return new Uint32Array(size);
default: throw new Error('Unsupported integer data type.');
}
}
function getFloatArray(type: Encoding.FloatDataType, size: number) {
switch (type) {
case Encoding.FloatDataType.Float32: return new Float32Array(size);
case Encoding.FloatDataType.Float64: return new Float64Array(size);
default: throw new Error('Unsupported floating data type.');
}
}
/* http://stackoverflow.com/questions/7869752/javascript-typed-arrays-and-endianness */
const isLittleEndian = (function () {
const arrayBuffer = new ArrayBuffer(2);
const uint8Array = new Uint8Array(arrayBuffer);
const uint16array = new Uint16Array(arrayBuffer);
uint8Array[0] = 0xAA;
uint8Array[1] = 0xBB;
if (uint16array[0] === 0xBBAA) return true;
return false;
})();
function int8(data: Uint8Array) { return new Int8Array(data.buffer, data.byteOffset); }
function flipByteOrder(data: Uint8Array, bytes: number) {
let buffer = new ArrayBuffer(data.length);
let ret = new Uint8Array(buffer);
for (let i = 0, n = data.length; i < n; i += bytes) {
for (let j = 0; j < bytes; j++) {
ret[i + bytes - j - 1] = data[i + j];
}
}
return buffer;
}
function view<T>(data: Uint8Array, byteSize: number, c: new (buffer: ArrayBuffer) => T) {
if (isLittleEndian) return new c(data.buffer);
return new c(flipByteOrder(data, byteSize));
}
function int16(data: Uint8Array) { return view(data, 2, Int16Array); }
function uint16(data: Uint8Array) { return view(data, 2, Uint16Array); }
function int32(data: Uint8Array) { return view(data, 4, Int32Array); }
function uint32(data: Uint8Array) { return view(data, 4, Uint32Array); }
function float32(data: Uint8Array) { return view(data, 4, Float32Array); }
function float64(data: Uint8Array) { return view(data, 8, Float64Array); }
function fixedPoint(data: Int32Array, encoding: Encoding.FixedPoint) {
let n = data.length;
let output = getFloatArray(encoding.srcType, n);
let f = 1 / encoding.factor;
for (let i = 0; i < n; i++) {
output[i] = f * data[i];
}
return output;
}
function intervalQuantization(data: Int32Array, encoding: Encoding.IntervalQuantization) {
let n = data.length;
let output = getFloatArray(encoding.srcType, n);
let delta = (encoding.max - encoding.min) / (encoding.numSteps - 1)
let min = encoding.min;
for (let i = 0; i < n; i++) {
output[i] = min + delta * data[i];
}
return output;
}
function runLength(data: Int32Array, encoding: Encoding.RunLength) {
let output = getIntArray(encoding.srcType, encoding.srcSize);
let dataOffset = 0;
for (let i = 0, il = data.length; i < il; i += 2) {
let value = data[i]; // value to be repeated
let length = data[i + 1]; // number of repeats
for (let j = 0; j < length; ++j) {
output[dataOffset++] = value;
}
}
return output;
}
function delta(data: (Int8Array | Int16Array | Int32Array), encoding: Encoding.Delta) {
let n = data.length;
let output = getIntArray(encoding.srcType, n);
if (!n) return output;
output[0] = data[0] + (encoding.origin | 0);
for (let i = 1; i < n; ++i) {
output[i] = data[i] + output[i - 1];
}
return output;
}
function integerPackingSigned(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) {
let upperLimit = encoding.byteCount === 1 ? 0x7F : 0x7FFF;
let lowerLimit = -upperLimit - 1;
let n = data.length;
let output = new Int32Array(encoding.srcSize);
let i = 0;
let j = 0;
while (i < n) {
let value = 0, t = data[i];
while (t === upperLimit || t === lowerLimit) {
value += t;
i++;
t = data[i];
}
value += t;
output[j] = value;
i++;
j++;
}
return output;
}
function integerPackingUnsigned(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) {
let upperLimit = encoding.byteCount === 1 ? 0xFF : 0xFFFF;
let n = data.length;
let output = new Int32Array(encoding.srcSize);
let i = 0;
let j = 0;
while (i < n) {
let value = 0, t = data[i];
while (t === upperLimit) {
value += t;
i++;
t = data[i];
}
value += t;
output[j] = value;
i++;
j++;
}
return output;
}
function integerPacking(data: (Int8Array | Int16Array), encoding: Encoding.IntegerPacking) {
return encoding.isUnsigned ? integerPackingUnsigned(data, encoding) : integerPackingSigned(data, encoding);
}
function stringArray(data: Uint8Array, encoding: Encoding.StringArray) {
let str = encoding.stringData;
let offsets = decode({ encoding: encoding.offsetEncoding, data: encoding.offsets });
let indices = decode({ encoding: encoding.dataEncoding, data });
let cache: any = Object.create(null);
let result = new Array(indices.length);
let offset = 0;
for (let i of indices) {
if (i < 0) {
result[offset++] = null;
continue;
}
let v = cache[i];
if (v === void 0) {
v = str.substring(offsets[i], offsets[i + 1]);
cache[i] = v;
}
result[offset++] = v;
}
return result;
}
\ No newline at end of file
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* From CIFTools.js
* @author David Sehnal <david.sehnal@gmail.com>
*/
export const VERSION = '0.3.0';
export type Encoding =
| Encoding.ByteArray
| Encoding.FixedPoint
| Encoding.RunLength
| Encoding.Delta
| Encoding.IntervalQuantization
| Encoding.IntegerPacking
| Encoding.StringArray;
export interface EncodedFile {
version: string,
encoder: string,
dataBlocks: EncodedDataBlock[]
}
export interface EncodedDataBlock {
header: string,
categories: EncodedCategory[],
}
export interface EncodedCategory {
name: string,
rowCount: number,
columns: EncodedColumn[],
}
export interface EncodedColumn {
name: string,
data: EncodedData,
/**
* The mask represents the presence or absent of particular "CIF value".
* If the mask is not set, every value is present.
*
* 0 = Value is present
* 1 = . = value not specified
* 2 = ? = value unknown
*/
mask?: EncodedData
}
export interface EncodedData {
encoding: Encoding[],
data: Uint8Array
}
export namespace Encoding {
export const enum IntDataType {
Int8 = 1,
Int16 = 2,
Int32 = 3,
Uint8 = 4,
Uint16 = 5,
Uint32 = 6,
}
export const enum FloatDataType {
Float32 = 32,
Float64 = 33
}
export type DataType = IntDataType | FloatDataType
export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array
export type FloatArray = Float32Array | Float64Array
export function getDataType(data: IntArray | FloatArray): DataType {
let srcType: DataType;
if (data instanceof Int8Array) srcType = Encoding.IntDataType.Int8;
else if (data instanceof Int16Array) srcType = Encoding.IntDataType.Int16;
else if (data instanceof Int32Array) srcType = Encoding.IntDataType.Int32;
else if (data instanceof Uint8Array) srcType = Encoding.IntDataType.Uint8;
else if (data instanceof Uint16Array) srcType = Encoding.IntDataType.Uint16;
else if (data instanceof Uint32Array) srcType = Encoding.IntDataType.Uint32;
else if (data instanceof Float32Array) srcType = Encoding.FloatDataType.Float32;
else if (data instanceof Float64Array) srcType = Encoding.FloatDataType.Float64;
else throw new Error('Unsupported integer data type.');
return srcType;
}
export function isSignedIntegerDataType(data: IntArray) {
return data instanceof Int8Array || data instanceof Int16Array || data instanceof Int32Array;
}
// type[] -> Uint8[]
export interface ByteArray {
kind: 'ByteArray',
type: DataType
}
// (Float32 | Float64)[] -> Int32[]
export interface FixedPoint {
kind: 'FixedPoint',
factor: number,
srcType: FloatDataType
}
// (Float32|Float64)[] -> Int32
export interface IntervalQuantization {
kind: 'IntervalQuantization',
min: number,
max: number,
numSteps: number,
srcType: FloatDataType
}
// (Uint8 | Int8 | Int16 | Int32)[] -> Int32[]
export interface RunLength {
kind: 'RunLength',
srcType: IntDataType,
srcSize: number
}
// T=(Int8Array | Int16Array | Int32Array)[] -> T[]
export interface Delta {
kind: 'Delta',
origin: number,
srcType: IntDataType
}
// Int32[] -> (Int8 | Int16 | Uint8 | Uint16)[]
export interface IntegerPacking {
kind: 'IntegerPacking',
byteCount: number,
isUnsigned: boolean,
srcSize: number
}
// string[] -> Uint8[]
// stores 0 and indices of ends of strings:
// stringData = '123456'
// offsets = [0,2,5,6]
// encodes ['12','345','6']
export interface StringArray {
kind: 'StringArray',
dataEncoding: Encoding[],
stringData: string,
offsetEncoding: Encoding[],
offsets: Uint8Array
}
}
\ No newline at end of file
// TODO
\ No newline at end of file
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Column from '../../common/column'
import * as Data from '../data-model'
import { EncodedColumn } from './encoding'
import decode from './decoder'
import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser'
export default function Field(column: EncodedColumn): Data.Field {
const mask = column.mask ? decode(column.mask) as number[] : void 0;
const data = decode(column.data);
const isNumeric = (data as any).buffer && (data as any).byteLength && (data as any).BYTES_PER_ELEMENT;
const str: Data.Field['str'] = isNumeric
? mask
? row => mask[row] === Data.ValuePresence.Present ? '' + data[row] : ''
: row => '' + data[row]
: mask
? row => mask[row] === Data.ValuePresence.Present ? data[row] : ''
: row => data[row];
const int: Data.Field['int'] = isNumeric
? row => data[row]
: row => { const v = data[row]; return fastParseInt(v, 0, v.length); };
const float: Data.Field['float'] = isNumeric
? row => data[row]
: row => { const v = data[row]; return fastParseFloat(v, 0, v.length); };
const presence: Data.Field['presence'] = mask
? row => mask[row]
: row => Data.ValuePresence.Present;
const rowCount = data.length;
return {
isDefined: true,
rowCount,
str,
int,
float,
presence,
areValuesEqual: (rowA, rowB) => data[rowA] === data[rowB],
stringEquals(row, v) { return str(row) === v; },
toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); },
toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); },
toFloatArray(params) { return Column.createAndFillArray(rowCount, float, params); }
};
}
\ No newline at end of file
// TODO
\ No newline at end of file
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Data from '../data-model'
import * as Encoding from './encoding'
import Field from './field'
import Result from '../../result'
import decodeMsgPack from '../../../utils/msgpack/decode'
function checkVersions(min: number[], current: number[]) {
for (let i = 0; i < 2; i++) {
if (min[i] > current[i]) return false;
}
return true;
}
function Category(data: Encoding.EncodedCategory): Data.Category {
const map = Object.create(null);
for (const col of data.columns) map[col.name] = col;
return {
rowCount: data.rowCount,
getField(name) {
const col = map[name];
return col ? Field(col) : Data.DefaultUndefinedField(data.rowCount);
}
}
}
export default function parse(data: Uint8Array): Result<Data.File> {
const minVersion = [0, 3];
try {
const unpacked = decodeMsgPack(data) as Encoding.EncodedFile;
if (!checkVersions(minVersion, unpacked.version.match(/(\d)\.(\d)\.\d/)!.slice(1).map(v => +v))) {
return Result.error<Data.File>(`Unsupported format version. Current ${unpacked.version}, required ${minVersion.join('.')}.`);
}
const file = Data.File(unpacked.dataBlocks.map(block => {
const cats = Object.create(null);
for (const cat of block.categories) cats[cat.name] = Category(cat);
return Data.Block(cats, block.header);
}));
return Result.success(file);
} catch (e) {
return Result.error<Data.File>('' + e);
}
}
\ No newline at end of file
......@@ -5,14 +5,18 @@
*/
import parseText from './text/parser'
import parseBinary from './binary/parser'
import { Block } from './data-model'
import { apply as applySchema } from './schema'
import mmCIF from './schema/mmcif'
export default {
parseText,
parseBinary,
applySchema,
schema: {
mmCIF: (block: Block) => applySchema(mmCIF, block)
}
}
\ No newline at end of file
}
export * from './data-model'
\ No newline at end of file
......@@ -72,32 +72,49 @@ export function _gro() {
});
}
function runCIF(input: string | Uint8Array) {
console.time('parseCIF');
const parsed = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
console.timeEnd('parseCIF');
if (parsed.isError) {
console.log(parsed);
return;
}
const data = parsed.result.blocks[0];
const atom_site = data.categories._atom_site;
console.log(atom_site.getField('Cartn_x')!.float(0));
//console.log(atom_site.getField('label_atom_id')!.toStringArray());
const mmcif = CIF.schema.mmCIF(data);
console.log(mmcif.atom_site.Cartn_x.value(0));
console.log(mmcif.entity.type.toArray());
console.log(mmcif.pdbx_struct_oper_list.matrix.value(0));
}
export function _cif() {
const path = `./examples/1cbs_updated.cif`;
//const path = 'c:/test/quick/3j3q.cif';
let path = `./examples/1cbs_updated.cif`;
//path = 'c:/test/quick/3j3q.cif';
fs.readFile(path, 'utf8', function (err, input) {
if (err) {
return console.log(err);
}
console.log('------------------');
console.log('Text CIF:');
runCIF(input);
});
console.time('parseCIF');
const parsed = CIF.parseText(input);
console.timeEnd('parseCIF');
if (parsed.isError) {
console.log(parsed);
return;
path = `./examples/1cbs_full.bcif`;
//const path = 'c:/test/quick/3j3q.cif';
fs.readFile(path, function (err, input) {
if (err) {
return console.log(err);
}
const data = parsed.result.blocks[0];
const atom_site = data.categories._atom_site;
console.log(atom_site.getField('Cartn_x')!.float(0));
//console.log(atom_site.getField('label_atom_id')!.toStringArray());
const mmcif = CIF.schema.mmCIF(data);
console.log(mmcif.atom_site.Cartn_x.value(0));
console.log(mmcif.entity.type.toArray());
console.log(mmcif.pdbx_struct_oper_list.matrix.value(0));
console.log('------------------');
console.log('BinaryCIF:');
const data = new Uint8Array(input.byteLength);
for (let i = 0; i < input.byteLength; i++) data[i] = input[i];
runCIF(input);
});
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment