Skip to content
Snippets Groups Projects
Unverified Commit ca866cfa authored by Alexander Rose's avatar Alexander Rose Committed by GitHub
Browse files

Merge pull request #17 from JonStargaryen/encoding-config

Encoding and precision config
parents cc344257 115824bb
Branches
Tags
No related merge requests found
import * as Data from '../../reader/cif/data-model'
import { CifWriter } from '../cif';
import decodeMsgPack from '../../common/msgpack/decode'
import { EncodedFile, EncodedCategory } from '../../common/binary-cif';
import Field from '../../reader/cif/binary/field';
import * as C from '../cif/encoder';
const cartn_x = Data.CifField.ofNumbers([1.001, 1.002, 1.003, 1.004, 1.005, 1.006, 1.007, 1.008, 1.009]);
const cartn_y = Data.CifField.ofNumbers([-3.0, -2.666, -2.3333, -2.0, -1.666, -1.333, -1.0, -0.666, -0.333]);
const cartn_z = Data.CifField.ofNumbers([1, 2, 3, 4, 5, 6, 7, 8, 9].map(i => Math.sqrt(i)));
const label_seq_id = Data.CifField.ofNumbers([1, 2, 3, 6, 11, 23, 47, 106, 235]);
const atom_site = Data.CifCategory.ofFields('atom_site', { 'Cartn_x': cartn_x, 'Cartn_y': cartn_y, 'Cartn_z': cartn_z, 'label_seq_id': label_seq_id });
const field1 = Data.CifField.ofNumbers([1, 2, 3, 6, 11, 23, 47, 106, 235]);
const field2 = Data.CifField.ofNumbers([-1, -2, -3, -6, -11, -23, -47, -106, -235]);
const other_fields = Data.CifCategory.ofFields('other_fields', { 'field1': field1, 'field2': field2 });
const encoding_aware_encoder = CifWriter.createEncoder({
binary: true,
binaryAutoClassifyEncoding: true,
binaryEncodingPovider: CifWriter.createEncodingProviderFromJsonConfig([
{
'categoryName': 'atom_site',
'columnName': 'Cartn_y',
'encoding': 'rle',
'precision': 0
},
{
'categoryName': 'atom_site',
'columnName': 'Cartn_z',
'encoding': 'delta',
'precision': 1
},
{
'categoryName': 'atom_site',
'columnName': 'label_seq_id',
'encoding': 'delta-rle'
}
])
});
describe('encoding-config', () => {
const decoded = process(encoding_aware_encoder);
const decoded_atom_site = decoded.blocks[0].categories['atom_site'];
const decoded_cartn_x = decoded_atom_site.getField('Cartn_x')!;
const decoded_cartn_y = decoded_atom_site.getField('Cartn_y')!;
const decoded_cartn_z = decoded_atom_site.getField('Cartn_z')!;
const decoded_label_seq_id = decoded_atom_site.getField('label_seq_id')!;
const delta = 0.001;
function assert(e: ArrayLike<number>, a: ArrayLike<number>) {
expect(e.length).toBe(a.length);
for (let i = 0; i < e.length; i++) {
expect(Math.abs(e[i] - a[i])).toBeLessThan(delta);
}
}
function join(field: Data.CifField) {
return field.binaryEncoding!.map(e => e.kind).join();
}
it('strategy', () => {
expect(join(decoded_cartn_x)).toBe('FixedPoint,Delta,IntegerPacking,ByteArray');
expect(join(decoded_cartn_y)).toBe('FixedPoint,RunLength,IntegerPacking,ByteArray');
expect(join(decoded_cartn_z)).toBe('FixedPoint,Delta,IntegerPacking,ByteArray');
expect(join(decoded_label_seq_id)).toBe('Delta,RunLength,IntegerPacking,ByteArray');
});
it('precision', () => {
assert(decoded_cartn_x.toFloatArray(), cartn_x.toFloatArray());
assert(decoded_cartn_y.toFloatArray(), cartn_y.toFloatArray().map(d => Math.round(d)));
assert(decoded_cartn_z.toFloatArray(), cartn_z.toFloatArray().map(d => Math.round(d * 10) / 10));
assert(decoded_label_seq_id.toIntArray(), label_seq_id.toIntArray());
});
});
const filter_aware_encoder1 = CifWriter.createEncoder({
binary: true,
binaryAutoClassifyEncoding: true
});
filter_aware_encoder1.setFilter(C.Category.filterOf('atom_site\n' +
'\n' +
'atom_site.Cartn_x\n' +
'atom_site.Cartn_y\n'));
const filter_aware_encoder2 = CifWriter.createEncoder({
binary: true
});
filter_aware_encoder2.setFilter(C.Category.filterOf('!atom_site\n' +
'\n' +
'!other_fields.field2\n'));
describe('filtering-config', () => {
const decoded1 = process(filter_aware_encoder1);
const atom_site1 = decoded1.blocks[0].categories['atom_site'];
const cartn_x1 = atom_site1.getField('Cartn_x');
const cartn_y1 = atom_site1.getField('Cartn_y');
const cartn_z1 = atom_site1.getField('Cartn_z');
const label_seq_id1 = atom_site1.getField('label_seq_id');
const fields1 = decoded1.blocks[0].categories['other_fields'];
it('whitelist-filtering', () => {
expect(atom_site1).toBeDefined();
expect(cartn_x1).toBeDefined();
expect(cartn_y1).toBeDefined();
expect(cartn_z1).toBeUndefined();
expect(label_seq_id1).toBeUndefined();
expect(fields1).toBeUndefined();
});
const decoded2 = process(filter_aware_encoder2);
const atom_site2 = decoded2.blocks[0].categories['atom_site'];
const fields2 = decoded2.blocks[0].categories['other_fields'];
const field12 = fields2.getField('field1');
const field22 = fields2.getField('field2');
it('blacklist-filtering', () => {
expect(atom_site2).toBeUndefined();
expect(fields2).toBeDefined();
expect(field12).toBeDefined();
expect(field22).toBeUndefined();
});
});
function process(encoder: C.Encoder) {
encoder.startDataBlock('test');
for (const cat of [atom_site, other_fields]) {
const fields: CifWriter.Field[] = [];
for (const f of cat.fieldNames) {
fields.push(wrap(f, cat.getField(f)!))
}
encoder.writeCategory(getCategoryInstanceProvider(cat, fields));
}
const encoded = encoder.getData() as Uint8Array;
const unpacked = decodeMsgPack(encoded) as EncodedFile;
return Data.CifFile(unpacked.dataBlocks.map(block => {
const cats = Object.create(null);
for (const cat of block.categories) cats[cat.name.substr(1)] = Category(cat);
return Data.CifBlock(block.categories.map(c => c.name.substr(1)), cats, block.header);
}));
}
function getCategoryInstanceProvider(cat: Data.CifCategory, fields: CifWriter.Field[]): CifWriter.Category {
return {
name: cat.name,
instance: () => CifWriter.categoryInstance(fields, { data: cat, rowCount: cat.rowCount })
};
}
function wrap(name: string, field: Data.CifField): CifWriter.Field {
const type = Data.getCifFieldType(field);
if (type['@type'] === 'str') {
return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind };
} else if (type['@type'] === 'float') {
return { name, type: CifWriter.Field.Type.Float, value: field.float, valueKind: field.valueKind };
} else {
return { name, type: CifWriter.Field.Type.Int, value: field.int, valueKind: field.valueKind };
}
}
function Category(data: EncodedCategory): Data.CifCategory {
const map = Object.create(null);
const cache = Object.create(null);
for (const col of data.columns) map[col.name] = col;
return {
rowCount: data.rowCount,
name: data.name.substr(1),
fieldNames: data.columns.map(c => c.name),
getField(name) {
const col = map[name];
if (!col) return void 0;
if (!!cache[name]) return cache[name];
cache[name] = Field(col);
return cache[name];
}
}
}
\ No newline at end of file
......@@ -53,5 +53,64 @@ export namespace CifWriter {
return ff && ff.binaryEncoding ? ArrayEncoder.fromEncoding(ff.binaryEncoding) : void 0;
}
}
};
export function createEncodingProviderFromJsonConfig(hints: EncodingStrategyHint[]): EncodingProvider {
return {
get(c, f) {
for (let i = 0; i < hints.length; i++) {
const hint = hints[i];
if (hint.categoryName === c && hint.columnName === f) {
return resolveEncoding(hint);
}
}
}
}
}
function resolveEncoding(hint: EncodingStrategyHint): ArrayEncoder {
const precision: number | undefined = hint.precision;
if (precision !== void 0) {
const multiplier = Math.pow(10, precision);
const fixedPoint = E.by(E.fixedPoint(multiplier));
switch (hint.encoding) {
case 'pack':
return fixedPoint.and(E.integerPacking);
case 'rle':
return fixedPoint.and(E.runLength).and(E.integerPacking);
case 'delta':
return fixedPoint.and(E.delta).and(E.integerPacking);
case 'delta-rle':
return fixedPoint.and(E.delta).and(E.runLength).and(E.integerPacking);
};
} else {
switch (hint.encoding) {
case 'pack':
return E.by(E.integerPacking);
case 'rle':
return E.by(E.runLength).and(E.integerPacking);
case 'delta':
return E.by(E.delta).and(E.integerPacking);
case 'delta-rle':
return E.by(E.delta).and(E.runLength).and(E.integerPacking);
}
}
throw new Error('cannot be reached');
}
}
\ No newline at end of file
}
/**
* Defines the information needed to encode certain fields: category and column name as well as encoding tag, precision is optional and identifies float columns.
*/
export interface EncodingStrategyHint {
categoryName: string,
columnName: string,
// TODO would be nice to infer strategy and precision if needed
encoding: EncodingType,
/**
* number of decimal places to keep - must be specified to float columns
*/
precision?: number
}
type EncodingType = 'pack' | 'rle' | 'delta' | 'delta-rle'
\ No newline at end of file
......@@ -132,6 +132,60 @@ export namespace Category {
includeField(categoryName: string, fieldName: string): boolean,
}
export function filterOf(directives: string): Filter {
const cat_whitelist: string[] = [];
const cat_blacklist: string[] = [];
const field_whitelist: string[] = [];
const field_blacklist: string[] = [];
for (let d of directives.split(/[\r\n]+/)) {
d = d.trim();
// allow for empty lines in config
if (d.length === 0) continue;
// let ! denote blacklisted entries
const blacklist = /^!/.test(d);
if (blacklist) d = d.substr(1);
const split = d.split(/\./);
const field = split[1];
const list = blacklist ? (field ? field_blacklist : cat_blacklist) : (field ? field_whitelist : cat_whitelist);
list[list.length] = d;
// ensure categories are aware about whitelisted columns
if (field && !cat_whitelist.includes(split[0])) {
cat_whitelist[cat_whitelist.length] = split[0];
}
}
const wlcatcol = field_whitelist.map(it => it.split('.')[0]);
// blacklist has higher priority
return {
includeCategory(cat) {
// block if category in black
if (cat_blacklist.includes(cat)) {
return false;
} else {
// if there is a whitelist, the category has to be explicitly allowed
return cat_whitelist.length <= 0 ||
// otherwise include if whitelist contains category
cat_whitelist.indexOf(cat) !== -1;
}
},
includeField(cat, field) {
// column names are assumed to follow the pattern 'category_name.column_name'
const full = cat + '.' + field;
if (field_blacklist.includes(full)) {
return false;
} else {
// if for this category no whitelist entries exist
return !wlcatcol.includes(cat) ||
// otherwise must be specifically allowed
field_whitelist.includes(full);
}
}
}
}
export const DefaultFilter: Filter = {
includeCategory(cat) { return true; },
includeField(cat, field) { return true; }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment