diff --git a/src/apps/cif2bcif/converter.ts b/src/apps/cif2bcif/converter.ts index d61adcf2f28365db0855e0276a9dddcba6a8436f..50ffc0e6e4b901a7d89d25dc1de00a0b3d8af701 100644 --- a/src/apps/cif2bcif/converter.ts +++ b/src/apps/cif2bcif/converter.ts @@ -8,36 +8,65 @@ import CIF, { CifCategory } from 'mol-io/reader/cif' import { CifWriter } from 'mol-io/writer/cif' import * as fs from 'fs' import classify from './field-classifier' +import { Progress, Task, RuntimeContext } from 'mol-task'; -async function getCIF(path: string) { +function showProgress(p: Progress) { + process.stdout.write(`\r${new Array(80).join(' ')}`); + process.stdout.write(`\r${Progress.format(p)}`); +} + +async function getCIF(ctx: RuntimeContext, path: string) { const str = fs.readFileSync(path, 'utf8'); - const parsed = await CIF.parseText(str).run(); + const parsed = await CIF.parseText(str).runInContext(ctx); if (parsed.isError) { throw new Error(parsed.toString()); } return parsed.result; } -function getCategoryInstanceProvider(cat: CifCategory): CifWriter.Category.Provider { +function getCategoryInstanceProvider(cat: CifCategory, fields: CifWriter.Field[]): CifWriter.Category.Provider { return function (ctx: any) { return { data: cat, name: cat.name, - fields: cat.fieldNames.map(n => classify(n, cat.getField(n)!)), + fields, rowCount: cat.rowCount }; } } -export default async function convert(path: string, asText = false) { - const cif = await getCIF(path); +export default function convert(path: string, asText = false) { + return Task.create<Uint8Array>('BinaryCIF', async ctx => { + const cif = await getCIF(ctx, path); - const encoder = CifWriter.createEncoder({ binary: !asText, encoderName: 'mol* cif2bcif' }); - for (const b of cif.blocks) { - encoder.startDataBlock(b.header); - for (const c of b.categoryNames) { - encoder.writeCategory(getCategoryInstanceProvider(b.categories[c])); + const encoder = CifWriter.createEncoder({ binary: !asText, encoderName: 'mol* cif2bcif' }); + + let maxProgress = 0; + for (const b of cif.blocks) { + maxProgress += b.categoryNames.length; + for (const c of b.categoryNames) maxProgress += b.categories[c].fieldNames.length; } - } - return encoder.getData(); -} + + let current = 0; + for (const b of cif.blocks) { + encoder.startDataBlock(b.header); + for (const c of b.categoryNames) { + const cat = b.categories[c]; + const fields: CifWriter.Field[] = []; + for (const f of cat.fieldNames) { + fields.push(classify(f, cat.getField(f)!)) + current++; + if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: maxProgress }); + } + + encoder.writeCategory(getCategoryInstanceProvider(b.categories[c], fields)); + current++; + if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: maxProgress }); + } + } + await ctx.update('Exporting...'); + const ret = encoder.getData() as Uint8Array; + await ctx.update('Done.'); + return ret; + }).run(showProgress, 250); +} \ No newline at end of file diff --git a/src/apps/cif2bcif/field-classifier.ts b/src/apps/cif2bcif/field-classifier.ts index a6e51b2b265f5c6451e49912250d216327e52dd2..f7caac96ae1c49f58d8f5dd1edb25772d133d52b 100644 --- a/src/apps/cif2bcif/field-classifier.ts +++ b/src/apps/cif2bcif/field-classifier.ts @@ -7,9 +7,172 @@ import { Column } from 'mol-data/db' import { CifField } from 'mol-io/reader/cif/data-model' import { CifWriter } from 'mol-io/writer/cif' +import { ArrayEncoder, ArrayEncoding as E } from 'mol-io/common/binary-cif'; + +namespace IntClassifier { + function packSize(value: number, upperLimit: number) { + return value >= 0 + ? Math.ceil((value + 1) / upperLimit) + : Math.ceil((value + 1) / (-upperLimit - 1)); + } + + type IntColumnInfo = { signed: boolean, limit8: number, limit16: number }; + + function getInfo(data: number[]): IntColumnInfo { + let signed = false; + for (let i = 0, n = data.length; i < n; i++) { + if (data[i] < 0) { + signed = true; + break; + } + } + return signed ? { signed, limit8: 0x7F, limit16: 0x7FFF } : { signed, limit8: 0xFF, limit16: 0xFFFF }; + } + + type SizeInfo = { pack8: number, pack16: number, count: number } + function SizeInfo(): SizeInfo { return { pack8: 0, pack16: 0, count: 0 } }; + + function incSize({ limit8, limit16 }: IntColumnInfo, info: SizeInfo, value: number) { + info.pack8 += packSize(value, limit8); + info.pack16 += packSize(value, limit16); + info.count += 1; + } + + function incSizeSigned(info: SizeInfo, value: number) { + info.pack8 += packSize(value, 0x7F); + info.pack16 += packSize(value, 0x7FFF); + info.count += 1; + } + + function byteSize(info: SizeInfo) { + if (info.count * 4 < info.pack16 * 2) return { length: info.count * 4, elem: 4 }; + if (info.pack16 * 2 < info.pack8) return { length: info.pack16 * 2, elem: 2 }; + return { length: info.pack8, elem: 1 }; + } + + function packingSize(data: number[], info: IntColumnInfo) { + const size = SizeInfo(); + for (let i = 0, n = data.length; i < n; i++) { + incSize(info, size, data[i]); + } + return { ...byteSize(size), kind: 'pack' }; + } + + function deltaSize(data: number[], info: IntColumnInfo) { + const size = SizeInfo(); + let prev = data[0]; + for (let i = 1, n = data.length; i < n; i++) { + incSizeSigned(size, data[i] - prev); + prev = data[i]; + } + return { ...byteSize(size), kind: 'delta' }; + } + + function rleSize(data: number[], info: IntColumnInfo) { + const size = SizeInfo(); + let run = 1; + for (let i = 1, n = data.length; i < n; i++) { + if (data[i - 1] !== data[i]) { + incSize(info, size, data[i - 1]); + incSize(info, size, run); + run = 1; + } else { + run++; + } + } + incSize(info, size, data[data.length - 1]); + incSize(info, size, run); + + return { ...byteSize(size), kind: 'rle' }; + } + + function deltaRleSize(data: number[], info: IntColumnInfo) { + const size = SizeInfo(); + let run = 1, prev = 0, prevValue = 0; + for (let i = 1, n = data.length; i < n; i++) { + const v = data[i] - prev; + if (prevValue !== v) { + incSizeSigned(size, prevValue); + incSizeSigned(size, run); + run = 1; + } else { + run++; + } + prevValue = v; + prev = data[i]; + } + incSizeSigned(size, prevValue); + incSizeSigned(size, run); + + return { ...byteSize(size), kind: 'delta-rle' }; + } + + export function getSize(data: number[]) { + const info = getInfo(data); + const sizes = [packingSize(data, info), rleSize(data, info), deltaSize(data, info), deltaRleSize(data, info)]; + sizes.sort((a, b) => a.length - b.length); + return sizes; + } + + export function classify(data: number[], name: string): ArrayEncoder { + if (data.length < 2) return E.by(E.byteArray); + + const sizes = getSize(data); + const size = sizes[0]; + // console.log(`${name}: ${size.kind} ${size.length}b ${data.length}`); + // console.log(`${name}: ${sizes.map(s => `${s.kind}: ${s.length}b`).join(' | ')}`); + + switch (size.kind) { + case 'pack': return E.by(E.integerPacking); + case 'rle': return E.by(E.runLength).and(E.integerPacking); + case 'delta': return E.by(E.delta).and(E.integerPacking); + case 'delta-rle': return E.by(E.delta).and(E.runLength).and(E.integerPacking); + } + + throw 'bug'; + } +} + +namespace FloatClassifier { + const delta = 1e-6; + function digitCount(v: number) { + let m = 1; + for (let i = 0; i < 5; i++) { + const r = Math.round(m * v) / m; + if (Math.abs(v - r) < delta) return m; + m *= 10; + } + return 10000; + } + + export function classify(data: number[], name: string) { + let dc = 10; + for (let i = 0, n = data.length; i < n; i++) dc = Math.max(dc, digitCount(data[i])); + + if (dc >= 10000) return { encoder: E.by(E.byteArray), typedArray: Float64Array }; + + const intArray = new Int32Array(data.length); + for (let i = 0, n = data.length; i < n; i++) intArray[i] = data[i] * dc; + + const sizes = IntClassifier.getSize(intArray as any); + const size = sizes[0]; + + // console.log(`>> ${name}: ${size.kind} ${size.length}b ${data.length} x${dc}`); + // console.log(` ${name}: ${sizes.map(s => `${s.kind}: ${s.length}b`).join(' | ')}`); + + switch (size.kind) { + case 'pack': return { encoder: E.by(E.fixedPoint(dc)).and(E.integerPacking), typedArray: Float32Array }; + case 'rle': return { encoder: E.by(E.fixedPoint(dc)).and(E.runLength).and(E.integerPacking), typedArray: Float32Array }; + case 'delta': return { encoder: E.by(E.fixedPoint(dc)).and(E.delta).and(E.integerPacking), typedArray: Float32Array }; + case 'delta-rle': return { encoder: E.by(E.fixedPoint(dc)).and(E.delta).and(E.runLength).and(E.integerPacking), typedArray: Float32Array }; + } + + throw 'bug'; + } +} const intRegex = /^-?\d+$/ -const floatRegex = /^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?/ +const floatRegex = /^-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)?$/ // Classify a cif field as str, int or float based the data it contains. // To classify a field as int or float all items are checked. @@ -25,8 +188,13 @@ function classify(name: string, field: CifField): CifWriter.Field { } if (hasString) return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind }; - if (floatCount > 0) return { name, type: CifWriter.Field.Type.Float, value: field.float, valueKind: field.valueKind }; - return { name, type: CifWriter.Field.Type.Int, value: field.int, valueKind: field.valueKind }; + if (floatCount > 0) { + const { encoder, typedArray } = FloatClassifier.classify(field.toFloatArray({ array: Float64Array }) as number[], name) + return CifWriter.Field.float(name, field.float, { valueKind: field.valueKind, encoder, typedArray }); + } else { + const encoder = IntClassifier.classify(field.toIntArray({ array: Int32Array }) as number[], name); + return CifWriter.Field.int(name, field.int, { valueKind: field.valueKind, encoder, typedArray: Int32Array }); + } } export default classify; \ No newline at end of file diff --git a/src/mol-task/execution/observable.ts b/src/mol-task/execution/observable.ts index 6f2cb67c2f8297362c1ba8c3bbcdd0b920987486..8162bcdf62bc9e7f4e9aade8ac4902ad120a5a9d 100644 --- a/src/mol-task/execution/observable.ts +++ b/src/mol-task/execution/observable.ts @@ -174,6 +174,7 @@ class ObservableRuntimeContext implements RuntimeContext { const progress = this.node.progress; if (typeof update === 'string') { progress.message = update; + progress.isIndeterminate = true; } else { if (typeof update.canAbort !== 'undefined') progress.canAbort = update.canAbort; if (typeof update.message !== 'undefined') progress.message = update.message; @@ -193,7 +194,7 @@ class ObservableRuntimeContext implements RuntimeContext { this.lastUpdatedTime = now(); this.updateProgress(progress); - if (!!dontNotify || !shouldNotify(this.info, this.lastUpdatedTime)) return; + if (!!dontNotify /*|| !shouldNotify(this.info, this.lastUpdatedTime)*/) return; notifyObserver(this.info, this.lastUpdatedTime);