diff --git a/src/mol-base/utils/string-builder.ts b/src/mol-base/utils/string-builder.ts index e54453d3e42a570ecb5c169103fa9b746249398a..3de0deffc765fe169aaa8da97e18236f1debe52c 100644 --- a/src/mol-base/utils/string-builder.ts +++ b/src/mol-base/utils/string-builder.ts @@ -6,7 +6,6 @@ * @author David Sehnal <david.sehnal@gmail.com> */ - interface StringBuilder { current: string[], offset: number, @@ -63,7 +62,11 @@ namespace StringBuilder { } export function whitespace(builder: StringBuilder, len: number) { - if (len > 0) write(builder, __paddingSpaces[len]); + if (len > 0) writeSafe(builder, __paddingSpaces[len]); + } + + export function whitespace1(builder: StringBuilder) { + writeSafe(builder, ' '); } export function write(builder: StringBuilder, val: string) { @@ -108,6 +111,10 @@ namespace StringBuilder { writeSafe(builder, '' + val); } + export function writeIntegerAndSpace(builder: StringBuilder, val: number) { + writeSafe(builder, '' + val + ' '); + } + export function writeIntegerPadLeft(builder: StringBuilder, val: number, totalWidth: number) { let s = '' + val; let padding = totalWidth - s.length; diff --git a/src/mol-io/writer/cif/TODO b/src/mol-io/writer/cif/TODO deleted file mode 100644 index 936d0535ab7128ae61d5d390a10a1b3712180c43..0000000000000000000000000000000000000000 --- a/src/mol-io/writer/cif/TODO +++ /dev/null @@ -1,3 +0,0 @@ -- Make a writer that takes a database and produces a CIF/BinaryCIF file. -- Make a more generic writer that takes Iterator<Key> and column spec with value: (ctx: Ctx, key: Key) => number | string | undefined /* NotPresent */ | null /* unknown */ - - This will work with Structure.atomLocations for atom_site \ No newline at end of file diff --git a/src/mol-io/writer/cif/encoder.ts b/src/mol-io/writer/cif/encoder.ts new file mode 100644 index 0000000000000000000000000000000000000000..c1219065ea07b06f292c16a27b6dd26b1af7a9da --- /dev/null +++ b/src/mol-io/writer/cif/encoder.ts @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Iterator from 'mol-base/collections/iterator' +import { Column } from 'mol-base/collections/database' +import Encoder from '../encoder' + +export const enum FieldType { + Str, Int, Float +} + +export interface FieldDefinition<Key = any, Data = any> { + name: string, + type: FieldType, + value(key: Key, data: Data): string | number, + valueKind?: (key: Key, data: Data) => Column.ValueKind + + /** determine whether to include this field base on the context */ + shouldInclude?: (data: Data) => boolean +} + +export interface FieldFormat { + decimalPlaces: number +} + +export namespace FieldFormat { + export const Default: FieldFormat = { decimalPlaces: 3 }; +} + +export interface CategoryDefinition<Key = any, Data = any> { + name: string, + fields: FieldDefinition<Key, Data>[] +} + +export interface CategoryInstance<Key = any, Data = any> { + data: Data, + definition: CategoryDefinition<Key, Data>, + formats?: { [name: string]: Partial<FieldFormat> }, + rowCount: number, + keys(): Iterator<Key> +} + +export interface CategoryProvider { + (ctx: any): CategoryInstance +} + +export interface CIFEncoder<T, Context> extends Encoder<T> { + startDataBlock(header: string): void, + writeCategory(category: CategoryProvider, contexts?: Context[]): void, + getData(): T +} \ No newline at end of file diff --git a/src/mol-io/writer/cif/encoder/text.ts b/src/mol-io/writer/cif/encoder/text.ts new file mode 100644 index 0000000000000000000000000000000000000000..06c1d67d29f718b74c6f354e7d33fc6ee279a315 --- /dev/null +++ b/src/mol-io/writer/cif/encoder/text.ts @@ -0,0 +1,228 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js) + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Column } from 'mol-base/collections/database' +import StringBuilder from 'mol-base/utils/string-builder' +import * as Enc from '../encoder' +import Writer from '../../writer' + +export default class TextCIFEncoder<Context> implements Enc.CIFEncoder<string, Context> { + private builder = StringBuilder.create(); + private encoded = false; + private dataBlockCreated = false; + + startDataBlock(header: string) { + this.dataBlockCreated = true; + StringBuilder.write(this.builder, `data_${(header || '').replace(/[ \n\t]/g, '').toUpperCase()}\n#\n`); + } + + writeCategory(category: Enc.CategoryProvider, contexts?: Context[]) { + if (this.encoded) { + throw new Error('The writer contents have already been encoded, no more writing.'); + } + + if (!this.dataBlockCreated) { + throw new Error('No data block created.'); + } + + const categories = !contexts || !contexts.length ? [category(<any>void 0)] : contexts.map(c => category(c)); + if (!categories.length) return; + + const rowCount = categories.reduce((v, c) => v + c.rowCount, 0); + + if (rowCount === 0) return; + + if (rowCount === 1) { + writeCifSingleRecord(categories[0]!, this.builder); + } else { + writeCifLoop(categories, this.builder); + } + } + + encode() { + this.encoded = true; + } + + writeTo(stream: Writer<string>) { + const chunks = StringBuilder.getChunks(this.builder); + for (let i = 0, _i = chunks.length; i < _i; i++) { + stream.write(chunks[i]); + } + } + + getData() { + return StringBuilder.getString(this.builder); + } +} + +function writeValue(builder: StringBuilder, data: any, key: any, f: Enc.FieldDefinition): boolean { + const kind = f.valueKind; + const p = kind ? kind(key, data) : Column.ValueKind.Present; + if (p !== Column.ValueKind.Present) { + if (p === Column.ValueKind.NotPresent) writeNotPresent(builder); + else writeUnknown(builder); + } else { + const val = f.value(key, data); + const t = f.type; + if (t === Enc.FieldType.Str) { + if (isMultiline(val as string)) { + writeMultiline(builder, val as string); + return true; + } else { + return writeChecked(builder, val as string); + } + } else if (t === Enc.FieldType.Int) { + writeInteger(builder, val as number); + } else { + writeFloat(builder, val as number, 1000); + } + } + return false; +} + +function writeCifSingleRecord(category: Enc.CategoryInstance<any>, builder: StringBuilder) { + const fields = category.definition.fields; + const data = category.data; + const width = fields.reduce((w, s) => Math.max(w, s.name.length), 0) + category.definition.name.length + 6; + + const it = category.keys(); + const key = it.move(); + + for (let _f = 0; _f < fields.length; _f++) { + const f = fields[_f]; + StringBuilder.writePadRight(builder, `_${category.definition.name}.${f.name}`, width); + const multiline = writeValue(builder, data, key, f); + if (!multiline) StringBuilder.newline(builder); + } + StringBuilder.write(builder, '#\n'); +} + +function writeCifLoop(categories: Enc.CategoryInstance[], builder: StringBuilder) { + const first = categories[0]; + const fields = first.definition.fields; + const fieldCount = fields.length; + + writeLine(builder, 'loop_'); + for (let i = 0; i < fieldCount; i++) { + writeLine(builder, `_${first.definition.name}.${fields[i].name}`); + } + + for (let _c = 0; _c < categories.length; _c++) { + const category = categories[_c]; + const data = category.data; + + if (category.rowCount === 0) continue; + + const it = category.keys(); + while (it.hasNext) { + const key = it.move(); + + let multiline = false; + for (let _f = 0; _f < fieldCount; _f++) { + multiline = writeValue(builder, data, key, fields[_f]); + } + if (!multiline) StringBuilder.newline(builder); + } + } + StringBuilder.write(builder, '#\n'); +} + +function isMultiline(value: string) { + return !!value && value.indexOf('\n') >= 0; +} + +function writeLine(builder: StringBuilder, val: string) { + StringBuilder.write(builder, val); + StringBuilder.newline(builder); +} + +function writeInteger(builder: StringBuilder, val: number) { + StringBuilder.writeInteger(builder, val); + StringBuilder.whitespace1(builder); +} + +function writeFloat(builder: StringBuilder, val: number, precisionMultiplier: number) { + StringBuilder.writeFloat(builder, val, precisionMultiplier); + StringBuilder.whitespace1(builder); +} + +function writeNotPresent(builder: StringBuilder) { + StringBuilder.writeSafe(builder, '. '); +} + +function writeUnknown(builder: StringBuilder) { + StringBuilder.writeSafe(builder, '? '); +} + +function writeChecked(builder: StringBuilder, val: string) { + if (!val) { + StringBuilder.writeSafe(builder, '. '); + return false; + } + + let escape = false, escapeCharStart = '\'', escapeCharEnd = '\' '; + let hasWhitespace = false; + let hasSingle = false; + let hasDouble = false; + for (let i = 0, _l = val.length - 1; i < _l; i++) { + const c = val.charCodeAt(i); + + switch (c) { + case 9: hasWhitespace = true; break; // \t + case 10: // \n + writeMultiline(builder, val); + return true; + case 32: hasWhitespace = true; break; // ' ' + case 34: // " + if (hasSingle) { + writeMultiline(builder, val); + return true; + } + + hasDouble = true; + escape = true; + escapeCharStart = '\''; + escapeCharEnd = '\' '; + break; + case 39: // ' + if (hasDouble) { + writeMultiline(builder, val); + return true; + } + + escape = true; + hasSingle = true; + escapeCharStart = '"'; + escapeCharEnd = '" '; + break; + } + } + + const fst = val.charCodeAt(0); + if (!escape && (fst === 35 /* # */ || fst === 59 /* ; */ || hasWhitespace)) { + escapeCharStart = '\''; + escapeCharEnd = '\' '; + escape = true; + } + + if (escape) { + StringBuilder.writeSafe(builder, escapeCharStart); + StringBuilder.writeSafe(builder, val); + StringBuilder.writeSafe(builder, escapeCharEnd); + } else { + StringBuilder.writeSafe(builder, val); + StringBuilder.writeSafe(builder, ' '); + } + + return false; +} + +function writeMultiline(builder: StringBuilder, val: string) { + StringBuilder.writeSafe(builder, '\n;' + val); + StringBuilder.writeSafe(builder, '\n;\n'); +} diff --git a/src/mol-io/writer/encoder.ts b/src/mol-io/writer/encoder.ts new file mode 100644 index 0000000000000000000000000000000000000000..3ea41229c787ea0dbb2358c1f6fabab7590ab09f --- /dev/null +++ b/src/mol-io/writer/encoder.ts @@ -0,0 +1,14 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Writer from './writer' + +interface Encoder<T> { + encode(): void, + writeTo(writer: Writer<T>): void +} + +export default Encoder \ No newline at end of file diff --git a/src/mol-io/writer/writer.ts b/src/mol-io/writer/writer.ts new file mode 100644 index 0000000000000000000000000000000000000000..c1325d372d7226ab2db135e181f66f0cc9e46d6d --- /dev/null +++ b/src/mol-io/writer/writer.ts @@ -0,0 +1,15 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +interface Writer<T> { + write(data: T): boolean +} + +namespace Writer { + +} + +export default Writer \ No newline at end of file diff --git a/src/perf-tests/cif-encoder.ts b/src/perf-tests/cif-encoder.ts new file mode 100644 index 0000000000000000000000000000000000000000..9765e0c1a0e77983e39cdd09fcf6bd6923a82b76 --- /dev/null +++ b/src/perf-tests/cif-encoder.ts @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Iterator from 'mol-base/collections/iterator' +import * as Enc from 'mol-io/writer/cif/encoder' +import CW from 'mol-io/writer/cif/encoder/text' + +const category1: Enc.CategoryDefinition<number> = { + name: 'test', + fields: [{ + name: 'f1', + type: Enc.FieldType.Str, + value: i => 'v' + i + }, { + name: 'f2', + type: Enc.FieldType.Int, + value: i => i * i + }, { + name: 'f3', + type: Enc.FieldType.Float, + value: i => Math.random() + }] +} + +const category2: Enc.CategoryDefinition<number> = { + name: 'test2', + fields: [{ + name: 'e1', + type: Enc.FieldType.Str, + value: i => 'v\n' + i + }, { + name: 'e2', + type: Enc.FieldType.Int, + value: i => i * i + }, { + name: 'e3', + type: Enc.FieldType.Float, + value: i => Math.random() + }] +} + +function getInstace(ctx: { cat: Enc.CategoryDefinition<number>, rowCount: number }): Enc.CategoryInstance { + return { + data: void 0, + definition: ctx.cat, + keys: () => Iterator.Range(0, ctx.rowCount - 1), + rowCount: ctx.rowCount + } +} + +const w = new CW(); + +w.startDataBlock('test'); +w.writeCategory(getInstace, [{ rowCount: 5, cat: category1 }]); +w.writeCategory(getInstace, [{ rowCount: 1, cat: category2 }]); +console.log(w.getData()); diff --git a/src/perf-tests/string-builder.ts b/src/perf-tests/string-builder.ts index 7493d4d397f910978b8d1b71b638311468d9eee1..41b269c5064c6e8da1c018f2a76d2c4c684dfc6d 100644 --- a/src/perf-tests/string-builder.ts +++ b/src/perf-tests/string-builder.ts @@ -14,32 +14,42 @@ export namespace Test { const sb = SB.create(chunkSize); for (let i = 0, _i = data.length; i < _i; i++) { SB.writeSafe(sb, data[i]); + SB.whitespace1(sb); } return sb; } - function naive(data: string[]) { - let ret = ''; - for (let i = 0, _i = data.length; i < _i; i++) ret += data[i]; - return ret; + function buildWS(data: string[], chunkSize: number): SB { + const sb = SB.create(chunkSize); + for (let i = 0, _i = data.length; i < _i; i++) { + SB.writeSafe(sb, data[i] + ' '); + } + return sb; } - function join(data: string[]) { - let ret = []; - for (let i = 0, _i = data.length; i < _i; i++) ret[i] = data[i]; - return ret.join(''); - } + // function naive(data: string[]) { + // let ret = ''; + // for (let i = 0, _i = data.length; i < _i; i++) ret += data[i]; + // return ret; + // } + + // function join(data: string[]) { + // let ret = []; + // for (let i = 0, _i = data.length; i < _i; i++) ret[i] = data[i]; + // return ret.join(''); + // } export function run() { - const data = createData(26 * 100000 * 2); + const data = createData(26 * 100000); const N = 512; const suite = new B.Suite(); suite - .add(`naive`, () => naive(data)) // cras - .add(`join`, () => join(data)) + // .add(`naive`, () => naive(data)) + // .add(`join`, () => join(data)) //.add(`${N} chunks`, () => SB.getChunks(build(data, N))) .add(`${N} str`, () => SB.getString(build(data, N))) + .add(`${N} str ws`, () => SB.getString(buildWS(data, N))) .on('cycle', (e: any) => console.log(String(e.target))) .run(); }