Skip to content
Snippets Groups Projects
data-model.ts 11.08 KiB
/**
 * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
 * @author David Sehnal <david.sehnal@gmail.com>
 * @author Alexander Rose <alexander.rose@weirdbyte.de>
 */

import { Column, ColumnHelpers } from 'mol-data/db'
import { Tensor } from 'mol-math/linear-algebra'
import { getNumberType, NumberType, parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser';
import { Encoding } from '../../common/binary-cif';
import { Tokens } from '../common/text/tokenizer';
import { areValuesEqualProvider } from '../common/text/column/token';

export interface CifFile {
    readonly name?: string,
    readonly blocks: ReadonlyArray<CifBlock>
}

export function CifFile(blocks: ArrayLike<CifBlock>, name?: string): CifFile {
    return { name, blocks: blocks as any };
}

export interface CifFrame {
    readonly header: string,
    // Category names stored separately so that the ordering can be preserved.
    readonly categoryNames: ReadonlyArray<string>,
    readonly categories: CifCategories
}

export interface CifBlock extends CifFrame {
    readonly saveFrames: CifFrame[]
}

export function CifBlock(categoryNames: string[], categories: CifCategories, header: string, saveFrames: CifFrame[] = []): CifBlock {
    return { categoryNames, header, categories, saveFrames };
}

export function CifSafeFrame(categoryNames: string[], categories: CifCategories, header: string): CifFrame {
    return { categoryNames, header, categories };
}

export type CifCategories = { readonly [name: string]: CifCategory }

export interface CifCategory {
    readonly rowCount: number,
    readonly name: string,
    readonly fieldNames: ReadonlyArray<string>,
    getField(name: string): CifField | undefined
}

export function CifCategory(name: string, rowCount: number, fieldNames: string[], fields: { [name: string]: CifField }): CifCategory {
    return { rowCount, name, fieldNames: [...fieldNames], getField(name) { return fields[name]; } };
}

export namespace CifCategory {
    export function empty(name: string): CifCategory {
        return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } };
    };

    export type SomeFields<S> = { [P in keyof S]?: CifField }
    export type Fields<S> = { [P in keyof S]: CifField }

    export function ofFields(name: string, fields: { [name: string]: CifField | undefined }): CifCategory {
        const fieldNames = Object.keys(fields);
        return {
            rowCount: fieldNames.length > 0 ? fields[fieldNames[0]]!.rowCount : 0,
            name,
            fieldNames,
            getField(name) { return fields[name]; }
        };
    }
}

/**
 * Implementation note:
 * Always implement without using "this." in any of the interface functions.
 * This is to ensure that the functions can invoked without having to "bind" them.
 */
export interface CifField {
    readonly __array: ArrayLike<any> | undefined,
    readonly binaryEncoding: Encoding[] | undefined,
    readonly isDefined: boolean,
    readonly rowCount: number,

    str(row: number): string,
    int(row: number): number,
    float(row: number): number,

    valueKind(row: number): Column.ValueKind,

    areValuesEqual(rowA: number, rowB: number): boolean,

    toStringArray(params?: Column.ToArrayParams<string>): ReadonlyArray<string>,
    toIntArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>,
    toFloatArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>
}

export namespace CifField {
    export function ofString(value: string) {
        return ofStrings([value]);
    }

    export function ofStrings(values: string[]): CifField {
        const rowCount = values.length;
        const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };
        const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; };
        const float: CifField['float'] = row => { const v = values[row]; return fastParseFloat(v, 0, v.length) || 0; };
        const valueKind: CifField['valueKind'] = row => {
            const v = values[row], l = v.length;
            if (l > 1) return Column.ValueKind.Present;
            if (l === 0) return Column.ValueKind.NotPresent;
            const c = v.charCodeAt(0);
            if (c === 46 /* . */) return Column.ValueKind.NotPresent;
            if (c === 63 /* ? */) return Column.ValueKind.Unknown;
            return Column.ValueKind.Present;
        };

        return {
            __array: void 0,
            binaryEncoding: void 0,
            isDefined: true,
            rowCount,
            str,
            int,
            float,
            valueKind,
            areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
            toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
            toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
            toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
        }
    }

    export function ofNumbers(values: ArrayLike<number>): CifField {
        const rowCount = values.length;
        const str: CifField['str'] = row => { return '' + values[row]; };
        const float: CifField['float'] = row => values[row];
        const valueKind: CifField['valueKind'] = row => Column.ValueKind.Present;
        return {
            __array: void 0,
            binaryEncoding: void 0,
            isDefined: true,
            rowCount,
            str,
            int: float,
            float,
            valueKind,
            areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
            toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
            toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params),
            toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
        }
    }

    export function ofTokens(tokens: Tokens): CifField {
        const { data, indices, count: rowCount } = tokens;

        const str: CifField['str'] = row => {
            const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
            if (ret === '.' || ret === '?') return '';
            return ret;
        };

        const int: CifField['int'] = row => {
            return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
        };

        const float: CifField['float'] = row => {
            return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
        };

        const valueKind: CifField['valueKind'] = row => {
            const s = indices[2 * row], l = indices[2 * row + 1] - s;
            if (l > 1) return Column.ValueKind.Present;
            if (l === 0) return Column.ValueKind.NotPresent;
            const v = data.charCodeAt(s);
            if (v === 46 /* . */) return Column.ValueKind.NotPresent;
            if (v === 63 /* ? */) return Column.ValueKind.Unknown;
            return Column.ValueKind.Present;
        };

        return {
            __array: void 0,
            binaryEncoding: void 0,
            isDefined: true,
            rowCount,
            str,
            int,
            float,
            valueKind,
            areValuesEqual: areValuesEqualProvider(tokens),
            toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
            toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
            toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
        }
    }

    export function ofColumn(column: Column<any>): CifField {
        const { rowCount, valueKind, areValuesEqual } = column;
        
        let str: CifField['str']
        let int: CifField['int']
        let float: CifField['float']

        switch (column.schema.valueType) {
            case 'float':
            case 'int':
                str = row => { return '' + column.value(row); };
                int = row => column.value(row);
                float = row => column.value(row);
                break
            case 'str':
                str = row => column.value(row);
                int = row => { const v = column.value(row); return fastParseInt(v, 0, v.length) || 0; };
                float = row => { const v = column.value(row); return fastParseFloat(v, 0, v.length) || 0; };
                break
            default:
                throw new Error('unsupported')
        }
                

        return {
            __array: void 0,
            binaryEncoding: void 0,
            isDefined: true,
            rowCount,
            str,
            int,
            float,
            valueKind,
            areValuesEqual,
            toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
            toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
            toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
        }
    }
}

export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data {
    const ret = space.create();
    const offset = zeroIndexed ? 0 : 1;

    if (space.rank === 1) {
        const rows = space.dimensions[0];
        for (let i = 0; i < rows; i++) {
            const f = category.getField(`${field}[${i + offset}]`);
            space.set(ret, i, !!f ? f.float(row) : 0.0);
        }
    } else if (space.rank === 2) {
        const rows = space.dimensions[0], cols = space.dimensions[1];
        for (let i = 0; i < rows; i++) {
            for (let j = 0; j < cols; j++) {
                const f = category.getField(`${field}[${i + offset}][${j + offset}]`);
                space.set(ret, i, j, !!f ? f.float(row) : 0.0);
            }
        }
    } else if (space.rank === 3) {
        const d0 = space.dimensions[0], d1 = space.dimensions[1], d2 = space.dimensions[2];
        for (let i = 0; i < d0; i++) {
            for (let j = 0; j < d1; j++) {
                for (let k = 0; k < d2; k++) {
                    const f = category.getField(`${field}[${i + offset}][${j + offset}][${k + offset}]`);
                    space.set(ret, i, j, k, !!f ? f.float(row) : 0.0);
                }
            }
        }
    } else throw new Error('Tensors with rank > 3 or rank 0 are currently not supported.');
    return ret;
}

export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Schema.Float | Column.Schema.Str {
    let floatCount = 0, hasString = false, undefinedCount = 0;
    for (let i = 0, _i = field.rowCount; i < _i; i++) {
        const k = field.valueKind(i);
        if (k !== Column.ValueKind.Present) {
            undefinedCount++;
            continue;
        }
        const type = getNumberType(field.str(i));
        if (type === NumberType.Int) continue;
        else if (type === NumberType.Float) floatCount++;
        else { hasString = true; break; }
    }

    if (hasString || undefinedCount === field.rowCount) return Column.Schema.str;
    if (floatCount > 0) return Column.Schema.float;
    return Column.Schema.int;
}