-
David Sehnal authoredDavid Sehnal authored
array-encoder.ts 13.42 KiB
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js; MIT) and MMTF (https://github.com/rcsb/mmtf-javascript/; MIT)
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { ChunkedArray } from 'mol-data/util'
import { Encoding, EncodedData } from './encoding'
export interface ArrayEncoder {
and(f: ArrayEncoding.Provider): ArrayEncoder,
encode(data: ArrayLike<any>): EncodedData
}
export class ArrayEncoderImpl implements ArrayEncoder {
and(f: ArrayEncoding.Provider) {
return new ArrayEncoderImpl(this.providers.concat([f]));
}
encode(data: ArrayLike<any>): EncodedData {
let encoding: Encoding[] = [];
for (let p of this.providers) {
let t = p(data);
if (!t.encodings.length) {
throw new Error('Encodings must be non-empty.');
}
data = t.data;
for (let e of t.encodings) {
encoding.push(e);
}
}
if (!(data instanceof Uint8Array)) {
throw new Error('The encoding must result in a Uint8Array. Fix your encoding chain.');
}
return {
encoding,
data
}
}
constructor(private providers: ArrayEncoding.Provider[]) {
}
}
export namespace ArrayEncoder {
export function by(f: ArrayEncoding.Provider): ArrayEncoder {
return new ArrayEncoderImpl([f]);
}
}
export namespace ArrayEncoding {
export type TypedArrayCtor = { new(size: number): ArrayLike<number> & { buffer: ArrayBuffer, byteLength: number, byteOffset: number, BYTES_PER_ELEMENT: number } }
export interface Result {
encodings: Encoding[],
data: any
}
export type Provider = (data: any) => Result
export function by(f: Provider): ArrayEncoder {
return new ArrayEncoderImpl([f]);
}
function uint8(data: Uint8Array): Result {
return {
encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Uint8 }],
data
};
}
function int8(data: Int8Array): Result {
return {
encodings: [{ kind: 'ByteArray', type: Encoding.IntDataType.Int8 }],
data: new Uint8Array(data.buffer, data.byteOffset)
};
}
const writers = {
[Encoding.IntDataType.Int16]: function (v: DataView, i: number, a: number) { v.setInt16(2 * i, a, true) },
[Encoding.IntDataType.Uint16]: function (v: DataView, i: number, a: number) { v.setUint16(2 * i, a, true) },
[Encoding.IntDataType.Int32]: function (v: DataView, i: number, a: number) { v.setInt32(4 * i, a, true) },
[Encoding.IntDataType.Uint32]: function (v: DataView, i: number, a: number) { v.setUint32(4 * i, a, true) },
[Encoding.FloatDataType.Float32]: function (v: DataView, i: number, a: number) { v.setFloat32(4 * i, a, true) },
[Encoding.FloatDataType.Float64]: function (v: DataView, i: number, a: number) { v.setFloat64(8 * i, a, true) }
}
const byteSizes = {
[Encoding.IntDataType.Int16]: 2,
[Encoding.IntDataType.Uint16]: 2,
[Encoding.IntDataType.Int32]: 4,
[Encoding.IntDataType.Uint32]: 4,
[Encoding.FloatDataType.Float32]: 4,
[Encoding.FloatDataType.Float64]: 8
}
export function byteArray(data: Encoding.FloatArray | Encoding.IntArray) {
let type = Encoding.getDataType(data);
if (type === Encoding.IntDataType.Int8) return int8(data as Int8Array);
else if (type === Encoding.IntDataType.Uint8) return uint8(data as Uint8Array);
let result = new Uint8Array(data.length * byteSizes[type]);
let w = writers[type];
let view = new DataView(result.buffer);
for (let i = 0, n = data.length; i < n; i++) {
w(view, i, data[i]);
}
return {
encodings: [<Encoding.ByteArray>{ kind: 'ByteArray', type }],
data: result
};
}
function _fixedPoint(data: Encoding.FloatArray, factor: number): Result {
let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
let result = new Int32Array(data.length);
for (let i = 0, n = data.length; i < n; i++) {
result[i] = Math.round(data[i] * factor);
}
return {
encodings: [{ kind: 'FixedPoint', factor, srcType }],
data: result
};
}
export function fixedPoint(factor: number): Provider { return data => _fixedPoint(data as Encoding.FloatArray, factor); }
function _intervalQuantizaiton(data: Encoding.FloatArray, min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray): Result {
let srcType = Encoding.getDataType(data) as Encoding.FloatDataType;
if (!data.length) {
return {
encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
data: new Int32Array(0)
};
}
if (max < min) {
let t = min;
min = max;
max = t;
}
let delta = (max - min) / (numSteps - 1);
let output = new arrayType(data.length);
for (let i = 0, n = data.length; i < n; i++) {
let v = data[i];
if (v <= min) output[i] = 0;
else if (v >= max) output[i] = numSteps;
else output[i] = (Math.round((v - min) / delta)) | 0;
}
return {
encodings: [{ kind: 'IntervalQuantization', min, max, numSteps, srcType }],
data: output
};
}
export function intervalQuantizaiton(min: number, max: number, numSteps: number, arrayType: new (size: number) => Encoding.IntArray = Int32Array): Provider {
return data => _intervalQuantizaiton(data as Encoding.FloatArray, min, max, numSteps, arrayType);
}
export function runLength(data: Encoding.IntArray): Result {
let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
if (srcType === void 0) {
data = new Int32Array(data);
srcType = Encoding.IntDataType.Int32;
}
if (!data.length) {
return {
encodings: [{ kind: 'RunLength', srcType, srcSize: 0 }],
data: new Int32Array(0)
};
}
// calculate output size
let fullLength = 2;
for (let i = 1, il = data.length; i < il; i++) {
if (data[i - 1] !== data[i]) {
fullLength += 2;
}
}
let output = new Int32Array(fullLength);
let offset = 0;
let runLength = 1;
for (let i = 1, il = data.length; i < il; i++) {
if (data[i - 1] !== data[i]) {
output[offset] = data[i - 1];
output[offset + 1] = runLength;
runLength = 1;
offset += 2;
} else {
++runLength;
}
}
output[offset] = data[data.length - 1];
output[offset + 1] = runLength;
return {
encodings: [{ kind: 'RunLength', srcType, srcSize: data.length }],
data: output
};
}
export function delta(data: Int8Array | Int16Array | Int32Array): Result {
if (!Encoding.isSignedIntegerDataType(data)) {
throw new Error('Only signed integer types can be encoded using delta encoding.');
}
let srcType = Encoding.getDataType(data) as Encoding.IntDataType;
if (srcType === void 0) {
data = new Int32Array(data);
srcType = Encoding.IntDataType.Int32;
}
if (!data.length) {
return {
encodings: [{ kind: 'Delta', origin: 0, srcType }],
data: new (data as any).constructor(0)
};
}
let output = new (data as any).constructor(data.length);
let origin = data[0];
output[0] = data[0];
for (let i = 1, n = data.length; i < n; i++) {
output[i] = data[i] - data[i - 1];
}
output[0] = 0;
return {
encodings: [{ kind: 'Delta', origin, srcType }],
data: output
};
}
function isSigned(data: Int32Array) {
for (let i = 0, n = data.length; i < n; i++) {
if (data[i] < 0) return true;
}
return false;
}
function packingSize(data: Int32Array, upperLimit: number) {
let lowerLimit = -upperLimit - 1;
let size = 0;
for (let i = 0, n = data.length; i < n; i++) {
let value = data[i];
if (value === 0) {
size += 1;
} else if (value > 0) {
size += Math.ceil(value / upperLimit);
if (value % upperLimit === 0) size += 1;
} else {
size += Math.ceil(value / lowerLimit);
if (value % lowerLimit === 0) size += 1;
}
}
return size;
}
function determinePacking(data: Int32Array): { isSigned: boolean, size: number, bytesPerElement: number } {
let signed = isSigned(data);
let size8 = signed ? packingSize(data, 0x7F) : packingSize(data, 0xFF);
let size16 = signed ? packingSize(data, 0x7FFF) : packingSize(data, 0xFFFF);
if (data.length * 4 < size16 * 2) {
// 4 byte packing is the most effective
return {
isSigned: signed,
size: data.length,
bytesPerElement: 4
};
} else if (size16 * 2 < size8) {
// 2 byte packing is the most effective
return {
isSigned: signed,
size: size16,
bytesPerElement: 2
}
} else {
// 1 byte packing is the most effective
return {
isSigned: signed,
size: size8,
bytesPerElement: 1
}
};
}
function _integerPacking(data: Int32Array, packing: { isSigned: boolean, size: number, bytesPerElement: number }): Result {
let upperLimit = packing.isSigned
? (packing.bytesPerElement === 1 ? 0x7F : 0x7FFF)
: (packing.bytesPerElement === 1 ? 0xFF : 0xFFFF);
let lowerLimit = -upperLimit - 1;
let n = data.length;
let packed = packing.isSigned
? packing.bytesPerElement === 1 ? new Int8Array(packing.size) : new Int16Array(packing.size)
: packing.bytesPerElement === 1 ? new Uint8Array(packing.size) : new Uint16Array(packing.size);
let j = 0;
for (let i = 0; i < n; i++) {
let value = data[i];
if (value >= 0) {
while (value >= upperLimit) {
packed[j] = upperLimit;
++j;
value -= upperLimit;
}
} else {
while (value <= lowerLimit) {
packed[j] = lowerLimit;
++j;
value -= lowerLimit;
}
}
packed[j] = value;
++j;
}
let result = byteArray(packed);
return {
encodings: [{
kind: 'IntegerPacking',
byteCount: packing.bytesPerElement,
isUnsigned: !packing.isSigned,
srcSize: n
},
result.encodings[0]
],
data: result.data
};
}
/**
* Packs Int32 array. The packing level is determined automatically to either 1-, 2-, or 4-byte words.
*/
export function integerPacking(data: Int32Array): Result {
if (!(data instanceof Int32Array)) {
throw new Error('Integer packing can only be applied to Int32 data.');
}
let packing = determinePacking(data);
if (packing.bytesPerElement === 4) {
// no packing done, Int32 encoding will be used
return byteArray(data);
}
return _integerPacking(data, packing);
}
export function stringArray(data: string[]): Result {
let map: any = Object.create(null);
let strings: string[] = [];
let accLength = 0;
let offsets = ChunkedArray.create<number>(s => new Int32Array(s), 1, 1024)
let output = new Int32Array(data.length);
ChunkedArray.add(offsets, 0);
let i = 0;
for (let s of data) {
// handle null strings.
if (s === null || s === void 0) {
output[i++] = -1;
continue;
}
let index = map[s];
if (index === void 0) {
// increment the length
accLength += s.length;
// store the string and index
index = strings.length;
strings[index] = s;
map[s] = index;
// write the offset
ChunkedArray.add(offsets, accLength);
}
output[i++] = index;
}
let encOffsets = ArrayEncoder.by(delta).and(integerPacking).encode(ChunkedArray.compact(offsets));
let encOutput = ArrayEncoder.by(delta).and(runLength).and(integerPacking).encode(output);
return {
encodings: [{ kind: 'StringArray', dataEncoding: encOutput.encoding, stringData: strings.join(''), offsetEncoding: encOffsets.encoding, offsets: encOffsets.data }],
data: encOutput.data
};
}
}