diff --git a/src/mol-io/writer/_spec/cif.spec.ts b/src/mol-io/writer/_spec/cif.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..fba3f3c51ce81a7b14f07a9d420afe878fda4568 --- /dev/null +++ b/src/mol-io/writer/_spec/cif.spec.ts @@ -0,0 +1,183 @@ +import * as Data from '../../reader/cif/data-model' +import { CifWriter } from '../cif'; +import decodeMsgPack from '../../common/msgpack/decode' +import { EncodedFile, EncodedCategory } from '../../common/binary-cif'; +import Field from '../../reader/cif/binary/field'; +import * as C from '../cif/encoder'; + +const cartn_x = Data.CifField.ofNumbers([1.001, 1.002, 1.003, 1.004, 1.005, 1.006, 1.007, 1.008, 1.009]); +const cartn_y = Data.CifField.ofNumbers([-3.0, -2.666, -2.3333, -2.0, -1.666, -1.333, -1.0, -0.666, -0.333]); +const cartn_z = Data.CifField.ofNumbers([1, 2, 3, 4, 5, 6, 7, 8, 9].map(i => Math.sqrt(i))); +const label_seq_id = Data.CifField.ofNumbers([1, 2, 3, 6, 11, 23, 47, 106, 235]); +const atom_site = Data.CifCategory.ofFields('atom_site', { 'Cartn_x': cartn_x, 'Cartn_y': cartn_y, 'Cartn_z': cartn_z, 'label_seq_id': label_seq_id }); +const field1 = Data.CifField.ofNumbers([1, 2, 3, 6, 11, 23, 47, 106, 235]); +const field2 = Data.CifField.ofNumbers([-1, -2, -3, -6, -11, -23, -47, -106, -235]); +const other_fields = Data.CifCategory.ofFields('other_fields', { 'field1': field1, 'field2': field2 }); + +const encoding_aware_encoder = CifWriter.createEncoder({ + binary: true, + binaryAutoClassifyEncoding: true, + binaryEncodingPovider: CifWriter.createEncodingProviderFromJsonConfig([ + { + 'categoryName': 'atom_site', + 'columnName': 'Cartn_y', + 'encoding': 'rle', + 'precision': 0 + }, + { + 'categoryName': 'atom_site', + 'columnName': 'Cartn_z', + 'encoding': 'delta', + 'precision': 1 + }, + { + 'categoryName': 'atom_site', + 'columnName': 'label_seq_id', + 'encoding': 'delta-rle' + } + ]) +}); + +describe('encoding-config', () => { + const decoded = process(encoding_aware_encoder); + + const decoded_atom_site = decoded.blocks[0].categories['atom_site']; + const decoded_cartn_x = decoded_atom_site.getField('Cartn_x')!; + const decoded_cartn_y = decoded_atom_site.getField('Cartn_y')!; + const decoded_cartn_z = decoded_atom_site.getField('Cartn_z')!; + const decoded_label_seq_id = decoded_atom_site.getField('label_seq_id')!; + + const delta = 0.001; + function assert(e: ArrayLike<number>, a: ArrayLike<number>) { + expect(e.length).toBe(a.length); + for (let i = 0; i < e.length; i++) { + expect(Math.abs(e[i] - a[i])).toBeLessThan(delta); + } + } + + function join(field: Data.CifField) { + return field.binaryEncoding!.map(e => e.kind).join(); + } + + it('strategy', () => { + expect(join(decoded_cartn_x)).toBe('FixedPoint,Delta,IntegerPacking,ByteArray'); + expect(join(decoded_cartn_y)).toBe('FixedPoint,RunLength,IntegerPacking,ByteArray'); + expect(join(decoded_cartn_z)).toBe('FixedPoint,Delta,IntegerPacking,ByteArray'); + expect(join(decoded_label_seq_id)).toBe('Delta,RunLength,IntegerPacking,ByteArray'); + }); + + it('precision', () => { + assert(decoded_cartn_x.toFloatArray(), cartn_x.toFloatArray()); + assert(decoded_cartn_y.toFloatArray(), cartn_y.toFloatArray().map(d => Math.round(d))); + assert(decoded_cartn_z.toFloatArray(), cartn_z.toFloatArray().map(d => Math.round(d * 10) / 10)); + assert(decoded_label_seq_id.toIntArray(), label_seq_id.toIntArray()); + }); +}); + +const filter_aware_encoder1 = CifWriter.createEncoder({ + binary: true, + binaryAutoClassifyEncoding: true +}); +filter_aware_encoder1.setFilter(C.Category.filterOf('atom_site\n' + +'\n' + +'atom_site.Cartn_x\n' + +'atom_site.Cartn_y\n')); + +const filter_aware_encoder2 = CifWriter.createEncoder({ + binary: true +}); +filter_aware_encoder2.setFilter(C.Category.filterOf('!atom_site\n' + +'\n' + +'!other_fields.field2\n')); + +describe('filtering-config', () => { + const decoded1 = process(filter_aware_encoder1); + + const atom_site1 = decoded1.blocks[0].categories['atom_site']; + const cartn_x1 = atom_site1.getField('Cartn_x'); + const cartn_y1 = atom_site1.getField('Cartn_y'); + const cartn_z1 = atom_site1.getField('Cartn_z'); + const label_seq_id1 = atom_site1.getField('label_seq_id'); + const fields1 = decoded1.blocks[0].categories['other_fields']; + + it('whitelist-filtering', () => { + expect(atom_site1).toBeDefined(); + expect(cartn_x1).toBeDefined(); + expect(cartn_y1).toBeDefined(); + expect(cartn_z1).toBeUndefined(); + expect(label_seq_id1).toBeUndefined(); + expect(fields1).toBeUndefined(); + }); + + const decoded2 = process(filter_aware_encoder2); + + const atom_site2 = decoded2.blocks[0].categories['atom_site']; + const fields2 = decoded2.blocks[0].categories['other_fields']; + const field12 = fields2.getField('field1'); + const field22 = fields2.getField('field2'); + + it('blacklist-filtering', () => { + expect(atom_site2).toBeUndefined(); + expect(fields2).toBeDefined(); + expect(field12).toBeDefined(); + expect(field22).toBeUndefined(); + }); +}); + +function process(encoder: C.Encoder) { + encoder.startDataBlock('test'); + + for (const cat of [atom_site, other_fields]) { + const fields: CifWriter.Field[] = []; + for (const f of cat.fieldNames) { + fields.push(wrap(f, cat.getField(f)!)) + } + encoder.writeCategory(getCategoryInstanceProvider(cat, fields)); + } + + const encoded = encoder.getData() as Uint8Array; + + const unpacked = decodeMsgPack(encoded) as EncodedFile; + return Data.CifFile(unpacked.dataBlocks.map(block => { + const cats = Object.create(null); + for (const cat of block.categories) cats[cat.name.substr(1)] = Category(cat); + return Data.CifBlock(block.categories.map(c => c.name.substr(1)), cats, block.header); + })); +} + + +function getCategoryInstanceProvider(cat: Data.CifCategory, fields: CifWriter.Field[]): CifWriter.Category { + return { + name: cat.name, + instance: () => CifWriter.categoryInstance(fields, { data: cat, rowCount: cat.rowCount }) + }; +} + +function wrap(name: string, field: Data.CifField): CifWriter.Field { + const type = Data.getCifFieldType(field); + if (type['@type'] === 'str') { + return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind }; + } else if (type['@type'] === 'float') { + return { name, type: CifWriter.Field.Type.Float, value: field.float, valueKind: field.valueKind }; + } else { + return { name, type: CifWriter.Field.Type.Int, value: field.int, valueKind: field.valueKind }; + } +} + +function Category(data: EncodedCategory): Data.CifCategory { + const map = Object.create(null); + const cache = Object.create(null); + for (const col of data.columns) map[col.name] = col; + return { + rowCount: data.rowCount, + name: data.name.substr(1), + fieldNames: data.columns.map(c => c.name), + getField(name) { + const col = map[name]; + if (!col) return void 0; + if (!!cache[name]) return cache[name]; + cache[name] = Field(col); + return cache[name]; + } + } +} \ No newline at end of file diff --git a/src/mol-io/writer/cif.ts b/src/mol-io/writer/cif.ts index c191d8a4bdeca25d9c377abe354980931bcad2f3..76298baf837774d20b1f727053960da10d2247fc 100644 --- a/src/mol-io/writer/cif.ts +++ b/src/mol-io/writer/cif.ts @@ -53,5 +53,64 @@ export namespace CifWriter { return ff && ff.binaryEncoding ? ArrayEncoder.fromEncoding(ff.binaryEncoding) : void 0; } } + }; + + export function createEncodingProviderFromJsonConfig(hints: EncodingStrategyHint[]): EncodingProvider { + return { + get(c, f) { + for (let i = 0; i < hints.length; i++) { + const hint = hints[i]; + if (hint.categoryName === c && hint.columnName === f) { + return resolveEncoding(hint); + } + } + } + } + } + + function resolveEncoding(hint: EncodingStrategyHint): ArrayEncoder { + const precision: number | undefined = hint.precision; + if (precision !== void 0) { + const multiplier = Math.pow(10, precision); + const fixedPoint = E.by(E.fixedPoint(multiplier)); + switch (hint.encoding) { + case 'pack': + return fixedPoint.and(E.integerPacking); + case 'rle': + return fixedPoint.and(E.runLength).and(E.integerPacking); + case 'delta': + return fixedPoint.and(E.delta).and(E.integerPacking); + case 'delta-rle': + return fixedPoint.and(E.delta).and(E.runLength).and(E.integerPacking); + }; + } else { + switch (hint.encoding) { + case 'pack': + return E.by(E.integerPacking); + case 'rle': + return E.by(E.runLength).and(E.integerPacking); + case 'delta': + return E.by(E.delta).and(E.integerPacking); + case 'delta-rle': + return E.by(E.delta).and(E.runLength).and(E.integerPacking); + } + } + throw new Error('cannot be reached'); } -} \ No newline at end of file +} + +/** + * Defines the information needed to encode certain fields: category and column name as well as encoding tag, precision is optional and identifies float columns. + */ +export interface EncodingStrategyHint { + categoryName: string, + columnName: string, + // TODO would be nice to infer strategy and precision if needed + encoding: EncodingType, + /** + * number of decimal places to keep - must be specified to float columns + */ + precision?: number +} + +type EncodingType = 'pack' | 'rle' | 'delta' | 'delta-rle' \ No newline at end of file diff --git a/src/mol-io/writer/cif/encoder.ts b/src/mol-io/writer/cif/encoder.ts index f0d95d18b7543d92f48c50d7f5f7897ed371befa..9c0b8a743f3377edd90527b13977b95165fbfe03 100644 --- a/src/mol-io/writer/cif/encoder.ts +++ b/src/mol-io/writer/cif/encoder.ts @@ -132,6 +132,60 @@ export namespace Category { includeField(categoryName: string, fieldName: string): boolean, } + export function filterOf(directives: string): Filter { + const cat_whitelist: string[] = []; + const cat_blacklist: string[] = []; + const field_whitelist: string[] = []; + const field_blacklist: string[] = []; + + for (let d of directives.split(/[\r\n]+/)) { + d = d.trim(); + // allow for empty lines in config + if (d.length === 0) continue; + // let ! denote blacklisted entries + const blacklist = /^!/.test(d); + if (blacklist) d = d.substr(1); + const split = d.split(/\./); + const field = split[1]; + const list = blacklist ? (field ? field_blacklist : cat_blacklist) : (field ? field_whitelist : cat_whitelist); + + list[list.length] = d; + + // ensure categories are aware about whitelisted columns + if (field && !cat_whitelist.includes(split[0])) { + cat_whitelist[cat_whitelist.length] = split[0]; + } + } + + const wlcatcol = field_whitelist.map(it => it.split('.')[0]); + // blacklist has higher priority + return { + includeCategory(cat) { + // block if category in black + if (cat_blacklist.includes(cat)) { + return false; + } else { + // if there is a whitelist, the category has to be explicitly allowed + return cat_whitelist.length <= 0 || + // otherwise include if whitelist contains category + cat_whitelist.indexOf(cat) !== -1; + } + }, + includeField(cat, field) { + // column names are assumed to follow the pattern 'category_name.column_name' + const full = cat + '.' + field; + if (field_blacklist.includes(full)) { + return false; + } else { + // if for this category no whitelist entries exist + return !wlcatcol.includes(cat) || + // otherwise must be specifically allowed + field_whitelist.includes(full); + } + } + } + } + export const DefaultFilter: Filter = { includeCategory(cat) { return true; }, includeField(cat, field) { return true; }