From 6979ae82546ac1030e45f0768651caddd9d755ee Mon Sep 17 00:00:00 2001
From: David Sehnal <david.sehnal@gmail.com>
Date: Tue, 3 Oct 2017 14:20:26 +0200
Subject: [PATCH] mmCIF schema

---
 src/reader/cif/data-model.ts                  |  22 ++
 src/reader/cif/index.ts                       |  15 ++
 src/reader/cif/schema.ts                      |  50 ++--
 src/reader/cif/schema/mmcif.ts                | 246 ++++++++++++++++++
 src/reader/cif/text-field.ts                  |  58 ++---
 src/reader/cif/text-parser.ts                 |  10 +-
 src/reader/common/column.ts                   |  16 +-
 src/reader/common/text/column/__token.ts      | 114 --------
 src/reader/common/text/column/fixed.ts        |  32 +--
 src/reader/common/text/column/token.ts        |  55 ++++
 src/reader/common/text/tokenizer.ts           |  61 +++--
 src/reader/gro/parser.ts                      |  12 +-
 src/reader/spec/cif.spec.ts                   |   4 +-
 ...xed-column.spec.ts => text-column.spec.ts} |  18 +-
 src/script.ts                                 |  11 +-
 15 files changed, 487 insertions(+), 237 deletions(-)
 create mode 100644 src/reader/cif/index.ts
 delete mode 100644 src/reader/common/text/column/__token.ts
 create mode 100644 src/reader/common/text/column/token.ts
 rename src/reader/spec/{fixed-column.spec.ts => text-column.spec.ts} (65%)

diff --git a/src/reader/cif/data-model.ts b/src/reader/cif/data-model.ts
index 642cf5621..4033df03a 100644
--- a/src/reader/cif/data-model.ts
+++ b/src/reader/cif/data-model.ts
@@ -83,4 +83,26 @@ export function DefaultUndefinedField(rowCount: number): Field {
         toIntArray: (p) => Column.createArray(rowCount, p).array,
         toFloatArray: (p) => Column.createArray(rowCount, p).array
     };
+}
+
+export function getMatrix(category: Category, field: string, rows: number, cols: number, row: number) {
+    const ret: number[][] = [];
+    for (let i = 0; i < rows; i++) {
+        const r: number[] = [];
+        for (let j = 0; j < cols; j++) {
+            const f = category.getField(`${field}[${i + 1}][${j + 1}]`);
+            r[j] = f ? f.float(row) : 0.0;
+        }
+        ret[i] = r;
+    }
+    return ret;
+}
+
+export function getVector(category: Category, field: string, rows: number, row: number) {
+    const ret: number[] = [];
+    for (let i = 0; i < rows; i++) {
+        const f = category.getField(`${field}[${i + 1}]`);
+        ret[i] = f ? f.float(row) : 0.0;
+    }
+    return ret;
 }
\ No newline at end of file
diff --git a/src/reader/cif/index.ts b/src/reader/cif/index.ts
new file mode 100644
index 000000000..c7ba6c3d5
--- /dev/null
+++ b/src/reader/cif/index.ts
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import parseText from './text-parser'
+import { apply as applySchema } from './schema'
+import mmCIF from './schema/mmcif'
+
+export default {
+    parseText,
+    applySchema,
+    schema: { mmCIF }
+}
\ No newline at end of file
diff --git a/src/reader/cif/schema.ts b/src/reader/cif/schema.ts
index 1244827e1..aea1576e9 100644
--- a/src/reader/cif/schema.ts
+++ b/src/reader/cif/schema.ts
@@ -6,6 +6,7 @@
 
 import * as Data from './data-model'
 import * as Column from '../common/column'
+import StringPool from '../../utils/short-string-pool'
 
 /**
  * A schema defines the shape of categories and fields.
@@ -47,27 +48,21 @@ export type Category<Fields> = Fields & {
 }
 
 export namespace Category {
-    export type Schema = { '@alias'?: string } & { [field: string]: Field.Schema<any> }
+    export type Schema = { [field: string]: Field.Schema<any> }
     export type Instance<T extends Schema> = Category<{ [F in keyof T]: Column.Column<T[F]['type']> }>
 }
 
-// export interface Field<T> {
-//     readonly isDefined: boolean,
-//     value(row: number): T,
-//     presence(row: number): Data.ValuePresence,
-//     areValuesEqual(rowA: number, rowB: number): boolean,
-//     stringEquals(row: number, value: string | null): boolean,
-//     /** Converts the selected row range to an array. ctor might or might not be called depedning on the source data format. */
-//     toArray(params?: Column.ToArrayParams): ReadonlyArray<T>
-// }
-
 export namespace Field {
-    export interface Schema<T> { type: T, ctor: (field: Data.Field) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
+    export interface Schema<T> { type: T, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>, undefinedField: (c: number) => Data.Field, alias?: string };
     export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string }
 
+    export function alias(name: string): Schema<any> { return { alias: name } as any; }
+    export function pooledStr(spec?: Spec) { return createSchema(spec, PooledStr); }
     export function str(spec?: Spec) { return createSchema(spec, Str); }
     export function int(spec?: Spec) { return createSchema(spec, Int); }
     export function float(spec?: Spec) { return createSchema(spec, Float); }
+    export function vector(rows: number, spec?: Spec) { return createSchema(spec, Vector(rows)); }
+    export function matrix(rows: number, cols: number, spec?: Spec) { return createSchema(spec, Matrix(rows, cols)); }
 
     function create<T>(field: Data.Field, value: (row: number) => T, toArray: Column.Column<T>['toArray']): Column.Column<T> {
         const presence = field.presence;
@@ -81,11 +76,31 @@ export namespace Field {
         };
     }
 
+    function PooledStr(field: Data.Field) {
+        const pool = StringPool.create();
+        const value = (row: number) => StringPool.get(pool, field.str(row));
+        const array = (params?: Column.ToArrayParams) => Column.createAndFillArray(field.rowCount, value, params);
+        return create<string>(field, value, array);
+    }
     function Str(field: Data.Field) { return create(field, field.str, field.toStringArray); }
     function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); }
     function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); }
 
-    function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Column.Column<T>): Schema<T> {
+    function Vector(rows: number) {
+        return function(field: Data.Field, category: Data.Category, key: string) {
+            const value = (row: number) => Data.getVector(category, key, rows, row);
+            return create(field, value, params => Column.createAndFillArray(field.rowCount, value, params));
+        }
+    }
+
+    function Matrix(rows: number, cols: number) {
+        return function(field: Data.Field, category: Data.Category, key: string) {
+            const value = (row: number) => Data.getMatrix(category, key, rows, cols, row);
+            return create(field, value, params => Column.createAndFillArray(field.rowCount, value, params));
+        }
+    }
+
+    function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field, category: Data.Category, key: string) => Column.Column<T>): Schema<T> {
         return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || Data.DefaultUndefinedField, alias: spec && spec.alias };
     }
 }
@@ -111,8 +126,9 @@ class _Category implements Category<any> { // tslint:disable-line:class-name
             Object.defineProperty(this, k, {
                 get: function() {
                     if (cache[k]) return cache[k];
-                    const field = _category.getField(s.alias || k) || s.undefinedField(_category.rowCount);
-                    cache[k] = s.ctor(field);
+                    const name = s.alias || k;
+                    const field = _category.getField(name) || s.undefinedField(_category.rowCount);
+                    cache[k] = s.ctor(field, _category, name);
                     return cache[k];
                 },
                 enumerable: true,
@@ -127,6 +143,8 @@ function createBlock(schema: Block.Schema, block: Data.Block): any {
 }
 
 function createCategory(key: string, schema: Category.Schema, block: Data.Block) {
-    const cat = block.categories[schema['@alias'] || key];
+    const alias = (schema['@alias'] && schema['@alias'].alias) || key;
+    const name = alias[0] === '_' ? alias : '_' + alias;
+    const cat = block.categories[name];
     return new _Category(cat || Data.Category.Empty, schema, !!cat);
 }
\ No newline at end of file
diff --git a/src/reader/cif/schema/mmcif.ts b/src/reader/cif/schema/mmcif.ts
index e69de29bb..fdca87dc9 100644
--- a/src/reader/cif/schema/mmcif.ts
+++ b/src/reader/cif/schema/mmcif.ts
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Field } from '../schema'
+
+const pooledStr = Field.pooledStr();
+const str = Field.str();
+const int = Field.int();
+const float = Field.float();
+
+const entry = {
+    id: str
+}
+
+const entity = {
+    id: str,
+    type: str as Field.Schema<'polymer' | 'non-polymer' | 'water'>,
+    src_method: str,
+    pdbx_description: str,
+    formula_weight: float,
+    pdbx_number_of_molecules: int,
+    details: str,
+    pdbx_mutation: str,
+    pdbx_fragment: str,
+    pdbx_ec: str
+}
+
+const exptl = {
+    entry_id: str,
+    method: str
+}
+
+const cell = {
+    entry_id: str,
+    length_a: float,
+    length_b: float,
+    length_c: float,
+    angle_alpha: float,
+    angle_beta: float,
+    angle_gamma: float,
+    Z_PDB: int,
+    pdbx_unique_axis: str
+}
+
+const symmetry = {
+    entry_id: str,
+    space_group_name_HM: Field.str({ alias: 'space_group_name_H-M' }),
+    pdbx_full_space_group_name_HM: Field.str({ alias: 'pdbx_full_space_group_name_H-M' }),
+    cell_setting: str,
+    Int_Tables_number: int,
+    space_group_name_Hall: str
+}
+
+const struct_conf = {
+    conf_type_id: str,
+    id: str,
+    pdbx_PDB_helix_id: int,
+    beg_label_comp_id: pooledStr,
+    beg_label_asym_id: pooledStr,
+    beg_label_seq_id: int,
+    pdbx_beg_PDB_ins_code: pooledStr,
+    end_label_comp_id: pooledStr,
+    end_label_asym_id: pooledStr,
+    end_label_seq_id: int,
+    pdbx_end_PDB_ins_code: pooledStr,
+    beg_auth_comp_id: pooledStr,
+    beg_auth_asym_id: pooledStr,
+    beg_auth_seq_id: int,
+    end_auth_comp_id: pooledStr,
+    end_auth_asym_id: pooledStr,
+    end_auth_seq_id: int,
+    pdbx_PDB_helix_class: int,
+    details: str,
+    pdbx_PDB_helix_length: int
+}
+
+const struct_sheet_range = {
+    sheet_id: pooledStr,
+    id: int,
+    beg_label_comp_id: pooledStr,
+    beg_label_asym_id: pooledStr,
+    beg_label_seq_id: int,
+    pdbx_beg_PDB_ins_code: pooledStr,
+    end_label_comp_id: pooledStr,
+    end_label_asym_id: pooledStr,
+    end_label_seq_id: int,
+    pdbx_end_PDB_ins_code: pooledStr,
+    beg_auth_comp_id: pooledStr,
+    beg_auth_asym_id: pooledStr,
+    beg_auth_seq_id: int,
+    end_auth_comp_id: pooledStr,
+    end_auth_asym_id: pooledStr,
+    end_auth_seq_id: int
+}
+
+type StructConnTypeId =
+    | 'covale'
+    | 'covale_base'
+    | 'covale_phosphate'
+    | 'covale_sugar'
+    | 'disulf'
+    | 'hydrog'
+    | 'metalc'
+    | 'mismat'
+    | 'modres'
+    | 'saltbr'
+
+type BondValueOrder =
+    | 'SING'
+    | 'DOUB'
+    | 'TRIP'
+    | 'QUAD'
+
+const struct_conn = {
+    id: str,
+    conn_type_id: pooledStr as Field.Schema<StructConnTypeId>,
+    pdbx_PDB_id: str,
+    ptnr1_label_asym_id: pooledStr,
+    ptnr1_label_comp_id: pooledStr,
+    ptnr1_label_seq_id: int,
+    ptnr1_label_atom_id: pooledStr,
+    pdbx_ptnr1_label_alt_id: pooledStr,
+    pdbx_ptnr1_PDB_ins_code: pooledStr,
+    pdbx_ptnr1_standard_comp_id: pooledStr,
+    ptnr1_symmetry: pooledStr,
+    ptnr2_label_asym_id: pooledStr,
+    ptnr2_label_comp_id: pooledStr,
+    ptnr2_label_seq_id: int,
+    ptnr2_label_atom_id: pooledStr,
+    pdbx_ptnr2_label_alt_id: pooledStr,
+    pdbx_ptnr2_PDB_ins_code: pooledStr,
+    ptnr1_auth_asym_id: pooledStr,
+    ptnr1_auth_comp_id: pooledStr,
+    ptnr1_auth_seq_id: int,
+    ptnr2_auth_asym_id: pooledStr,
+    ptnr2_auth_comp_id: pooledStr,
+    ptnr2_auth_seq_id: int,
+    ptnr2_symmetry: pooledStr,
+    pdbx_ptnr3_label_atom_id: pooledStr,
+    pdbx_ptnr3_label_seq_id: int,
+    pdbx_ptnr3_label_comp_id: pooledStr,
+    pdbx_ptnr3_label_asym_id: pooledStr,
+    pdbx_ptnr3_label_alt_id: pooledStr,
+    pdbx_ptnr3_PDB_ins_code: pooledStr,
+    details: pooledStr,
+    pdbx_dist_value: float,
+    pdbx_value_order: pooledStr as Field.Schema<BondValueOrder>
+}
+
+const struct_conn_type = {
+    id: str as Field.Schema<StructConnTypeId>,
+    criteria: str,
+    reference: str
+}
+
+const chem_comp_bond = {
+    comp_id: pooledStr,
+    pdbx_stereo_config: pooledStr,
+    pdbx_ordinal: int,
+    pdbx_aromatic_flag: pooledStr as Field.Schema<'Y' | 'N'>,
+    atom_id_1: pooledStr,
+    atom_id_2: pooledStr,
+    value_order: pooledStr as Field.Schema<BondValueOrder>
+}
+
+const pdbx_struct_assembly = {
+    id: str,
+    details: str,
+    method_details: str,
+    oligomeric_details: str,
+    oligomeric_count: int
+}
+
+const pdbx_struct_assembly_gen = {
+    assembly_id: str,
+    oper_expression: str,
+    asym_id_list: str
+}
+
+const pdbx_struct_oper_list = {
+    id: str,
+    type: str,
+    name: str,
+    symmetry_operation: str,
+    matrix: Field.matrix(3, 3),
+    vector: Field.vector(3)
+}
+
+const pdbx_struct_mod_residue = {
+    id: int,
+    label_asym_id: pooledStr,
+    label_seq_id: int,
+    label_comp_id: pooledStr,
+    auth_asym_id: pooledStr,
+    auth_seq_id: int,
+    auth_comp_id: pooledStr,
+    PDB_ins_code: pooledStr,
+    parent_comp_id: pooledStr,
+    details: str
+}
+
+const atom_site = {
+    group_PDB: pooledStr,
+    id: int,
+    type_symbol: pooledStr,
+    label_atom_id: pooledStr,
+    label_alt_id: pooledStr,
+    label_comp_id: pooledStr,
+    label_asym_id: pooledStr,
+    label_entity_id: pooledStr,
+    label_seq_id: int,
+    pdbx_PDB_ins_code: pooledStr,
+    pdbx_formal_charge: pooledStr,
+    Cartn_x: float,
+    Cartn_y: float,
+    Cartn_z: float,
+    occupancy: float,
+    B_iso_or_equiv: float,
+    auth_atom_id: pooledStr,
+    auth_comp_id: pooledStr,
+    auth_asym_id: pooledStr,
+    auth_seq_id: int,
+    pdbx_PDB_model_num: int
+}
+
+const schema = {
+    entry,
+    entity,
+    exptl,
+    cell,
+    symmetry,
+    struct_conf,
+    struct_sheet_range,
+    struct_conn,
+    struct_conn_type,
+    chem_comp_bond,
+    pdbx_struct_assembly,
+    pdbx_struct_assembly_gen,
+    pdbx_struct_oper_list,
+    pdbx_struct_mod_residue,
+    atom_site
+};
+export default schema;
\ No newline at end of file
diff --git a/src/reader/cif/text-field.ts b/src/reader/cif/text-field.ts
index f09893691..c67cc8cf7 100644
--- a/src/reader/cif/text-field.ts
+++ b/src/reader/cif/text-field.ts
@@ -5,30 +5,31 @@
  */
 
 import * as Column from '../common/column'
+import * as TokenColumn from '../common/text/column/token'
+import { Tokens } from '../common/text/tokenizer'
 import * as Data from './data-model'
 import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser'
-import StringPool from '../../utils/short-string-pool'
 
-export default function CifTextField(data: string, tokens: ArrayLike<number>, rowCount: number): Data.Field {
-    const stringPool = StringPool.create();
+export default function CifTextField(tokens: Tokens, rowCount: number): Data.Field {
+    const { data, indices } = tokens;
 
     const str: Data.Field['str'] = row => {
-        const ret = StringPool.get(stringPool, data.substring(tokens[2 * row], tokens[2 * row + 1]));
+        const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
         if (ret === '.' || ret === '?') return '';
         return ret;
     };
 
     const int: Data.Field['int'] = row => {
-        return fastParseInt(data, tokens[2 * row], tokens[2 * row + 1]) || 0;
+        return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
     };
 
     const float: Data.Field['float'] = row => {
-        return fastParseFloat(data, tokens[2 * row], tokens[2 * row + 1]) || 0;
+        return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
     };
 
     const presence: Data.Field['presence'] = row => {
-        const s = tokens[2 * row];
-        if (tokens[2 * row + 1] - s !== 1) return Data.ValuePresence.Present;
+        const s = indices[2 * row];
+        if (indices[2 * row + 1] - s !== 1) return Data.ValuePresence.Present;
         const v = data.charCodeAt(s);
         if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
         if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
@@ -42,43 +43,20 @@ export default function CifTextField(data: string, tokens: ArrayLike<number>, ro
         int,
         float,
         presence,
-        areValuesEqual(rowA, rowB) {
-            const aS = tokens[2 * rowA], bS = tokens[2 * rowB];
-            const len = tokens[2 * rowA + 1] - aS;
-            if (len !== tokens[2 *  rowB + 1] - bS) return false;
-            for (let i = 0; i < len; i++) {
-                if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
-                    return false;
-                }
-            }
-            return true;
-        },
-        stringEquals(row, value) {
-            const s = tokens[2 * row];
-            if (!value) return presence(row) !== Data.ValuePresence.Present;
+        areValuesEqual: TokenColumn.areValuesEqualProvider(tokens),
+        stringEquals(row, v) {
+            const s = indices[2 * row];
+            const value = v || '';
+            if (!value && presence(row) !== Data.ValuePresence.Present) return true;
             const len = value.length;
-            if (len !== tokens[2 * row + 1] - s) return false;
+            if (len !== indices[2 * row + 1] - s) return false;
             for (let i = 0; i < len; i++) {
                 if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
             }
             return true;
         },
-        toStringArray(params) {
-            const { array, start } = Column.createArray(rowCount, params);
-            return fillArrayValues(str, array, start);
-        },
-        toIntArray(params) {
-            const { array, start } = Column.createArray(rowCount, params);
-            return fillArrayValues(int, array, start);
-        },
-        toFloatArray(params) {
-            const { array, start } = Column.createArray(rowCount, params);
-            return fillArrayValues(float, array, start);
-        }
+        toStringArray(params) { return Column.createAndFillArray(rowCount, str, params); },
+        toIntArray(params) { return Column.createAndFillArray(rowCount, int, params); },
+        toFloatArray(params)  { return Column.createAndFillArray(rowCount, float, params); }
     }
-}
-
-function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
-    for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
-    return target;
 }
\ No newline at end of file
diff --git a/src/reader/cif/text-parser.ts b/src/reader/cif/text-parser.ts
index 7ea6a0d9b..4e37ddb56 100644
--- a/src/reader/cif/text-parser.ts
+++ b/src/reader/cif/text-parser.ts
@@ -24,7 +24,7 @@
 
 import * as Data from './data-model'
 import Field from './text-field'
-import { Tokens } from '../common/text/tokenizer'
+import { Tokens, TokenBuilder } from '../common/text/tokenizer'
 import Result from '../result'
 
 /**
@@ -430,7 +430,7 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
                 errorMessage: 'Expected value.'
             }
         }
-        fields[fieldName] = Field(tokenizer.data, [tokenizer.currentTokenStart, tokenizer.currentTokenEnd], 1);
+        fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.currentTokenStart, tokenizer.currentTokenEnd], count: 1 }, 1);
         moveNext(tokenizer);
     }
 
@@ -461,11 +461,11 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
     const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
     const tokens: Tokens[] = [];
     const fieldCount = fieldNames.length;
-    for (let i = 0; i < fieldCount; i++) tokens[i] = Tokens.create(rowCountEstimate);
+    for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate);
 
     let tokenCount = 0;
     while (tokenizer.currentTokenType === CifTokenType.Value) {
-        Tokens.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
+        TokenBuilder.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
         moveNext(tokenizer);
     }
 
@@ -480,7 +480,7 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
     const rowCount = (tokenCount / fieldCount) | 0;
     const fields = Object.create(null);
     for (let i = 0; i < fieldCount; i++) {
-        fields[fieldNames[i]] = Field(tokenizer.data, tokens[i].indices, rowCount);
+        fields[fieldNames[i]] = Field(tokens[i], rowCount);
     }
 
     categories[name] = Data.Category(rowCount, fields);
diff --git a/src/reader/common/column.ts b/src/reader/common/column.ts
index dab5ac063..1c27a911e 100644
--- a/src/reader/common/column.ts
+++ b/src/reader/common/column.ts
@@ -53,4 +53,18 @@ export function createArray(rowCount: number, params?: ToArrayParams) {
     const s = typeof start !== 'undefined' ? Math.max(Math.min(start, rowCount - 1), 0) : 0;
     const e = typeof end !== 'undefined' ? Math.min(end, rowCount) : rowCount;
     return { array: new c(e - s) as any[], start: s, end: e };
-}
\ No newline at end of file
+}
+
+/** A helped function for Column.toArray */
+export function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
+    for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
+    return target;
+}
+
+/** A helped function for Column.toArray */
+export function createAndFillArray(rowCount: number, value: (row: number) => any, params?: ToArrayParams) {
+    const { array, start } = createArray(rowCount, params);
+    return fillArrayValues(value, array, start);
+}
+
+
diff --git a/src/reader/common/text/column/__token.ts b/src/reader/common/text/column/__token.ts
deleted file mode 100644
index 87326c26d..000000000
--- a/src/reader/common/text/column/__token.ts
+++ /dev/null
@@ -1,114 +0,0 @@
-// /*
-//  * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
-//  *
-//  * @author David Sehnal <david.sehnal@gmail.com>
-//  */
-
-// import * as Data from '../../../../data/data'
-// import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../number-parser'
-// import { Tokens } from '../tokenizer'
-// import ShortStringPool from '../../../../utils/short-string-pool'
-
-// export function createTokenFields(data: string, fields: string[], tokens: Tokens): { [name: string]: Data.Field } {
-//     const fi: TokenFieldInfo = { data, fieldCount: fields.length, tokens: tokens.indices };
-//     const categoryFields = Object.create(null);
-//     for (let i = 0; i < fi.fieldCount; ++i) {
-//         categoryFields[fields[i]] = TokenField(fi, i);
-//     }
-//     return categoryFields;
-// }
-
-// export interface TokenFieldInfo {
-//     data: string,
-//     tokens: ArrayLike<number>,
-//     fieldCount: number,
-//     isCif?: boolean
-// }
-
-// export function TokenField(info: TokenFieldInfo, index: number): Data.Field {
-//     const { data, tokens, fieldCount, isCif = false } = info;
-//     const stringPool = ShortStringPool.create();
-
-//     const str: Data.Field['str'] = isCif ? row => {
-//         const i = (row * fieldCount + index) * 2;
-//         const ret = ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
-//         if (ret === '.' || ret === '?') return null;
-//         return ret;
-//     } : row => {
-//         const i = (row * fieldCount + index) * 2;
-//         return ShortStringPool.get(stringPool, data.substring(tokens[i], tokens[i + 1]));
-//     };
-
-//     const int: Data.Field['int'] = row => {
-//         const i = (row * fieldCount + index) * 2;
-//         return fastParseInt(data, tokens[i], tokens[i + 1]) || 0;
-//     };
-
-//     const float: Data.Field['float'] = row => {
-//         const i = (row * fieldCount + index) * 2;
-//         return fastParseFloat(data, tokens[i], tokens[i + 1]) || 0;
-//     };
-
-//     const presence: Data.Field['presence'] = isCif ? row => {
-//         const i = 2 * (row * fieldCount + index);
-//         const s = tokens[i];
-//         if (tokens[i + 1] - s !== 1) return Data.ValuePresence.Present;
-//         const v = data.charCodeAt(s);
-//         if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
-//         if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
-//         return Data.ValuePresence.Present;
-//     } : row => {
-//         const i = 2 * (row * fieldCount + index);
-//         return tokens[i] === tokens[i + 1] ? Data.ValuePresence.NotSpecified : Data.ValuePresence.Present
-//     };
-
-//     return {
-//         isDefined: true,
-//         str,
-//         int,
-//         float,
-//         value: str,
-//         presence,
-//         areValuesEqual: (rowA, rowB) => {
-//             const aI = (rowA * fieldCount + index) * 2, aS = tokens[aI];
-//             const bI = (rowB * fieldCount + index) * 2, bS = tokens[bI];
-//             const len = tokens[aI + 1] - aS;
-//             if (len !== tokens[bI + 1] - bS) return false;
-//             for (let i = 0; i < len; i++) {
-//                 if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
-//                     return false;
-//                 }
-//             }
-//             return true;
-//         },
-//         stringEquals: (row, value) => {
-//             const aI = (row * fieldCount + index) * 2;
-//             const s = tokens[aI];
-//             if (!value) return presence(row) !== Data.ValuePresence.Present;
-//             const len = value.length;
-//             if (len !== tokens[aI + 1] - s) return false;
-//             for (let i = 0; i < len; i++) {
-//                 if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
-//             }
-//             return true;
-//         },
-//         toStringArray: (startRow, endRowExclusive, ctor) => {
-//             const count = endRowExclusive - startRow;
-//             const ret = ctor(count) as any;
-//             for (let i = 0; i < count; i++) { ret[i] = str(startRow + i); }
-//             return ret;
-//         },
-//         toIntArray: (startRow, endRowExclusive, ctor) => {
-//             const count = endRowExclusive - startRow;
-//             const ret = ctor(count) as any;
-//             for (let i = 0; i < count; i++) { ret[i] = int(startRow + i); }
-//             return ret;
-//         },
-//         toFloatArray: (startRow, endRowExclusive, ctor) => {
-//             const count = endRowExclusive - startRow;
-//             const ret = ctor(count) as any;
-//             for (let i = 0; i < count; i++) { ret[i] = float(startRow + i); }
-//             return ret;
-//         }
-//     }
-// }
\ No newline at end of file
diff --git a/src/reader/common/text/column/fixed.ts b/src/reader/common/text/column/fixed.ts
index bb307f969..7caf97e35 100644
--- a/src/reader/common/text/column/fixed.ts
+++ b/src/reader/common/text/column/fixed.ts
@@ -4,46 +4,41 @@
  * @author David Sehnal <david.sehnal@gmail.com>
  */
 
-import { Column, ColumnType, createArray } from '../../column'
-import { trimStr, Lines } from '../tokenizer'
+import { Column, ColumnType, createAndFillArray } from '../../column'
+import { trimStr, Tokens } from '../tokenizer'
 import { parseIntSkipLeadingWhitespace, parseFloatSkipLeadingWhitespace } from '../number-parser'
 import StringPool from '../../../../utils/short-string-pool'
 
-export default function FixedColumnProvider(lines: Lines) {
+export default function FixedColumnProvider(lines: Tokens) {
     return function<T extends ColumnType>(offset: number, width: number, type: T) {
         return FixedColumn(lines, offset, width, type);
     }
 }
 
-function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
-    for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
-    return target;
-}
-
-export function FixedColumn<T extends ColumnType>(lines: Lines, offset: number, width: number, type: T): Column<T['@type']> {
-    const { data, tokens, count: rowCount } = lines;
+export function FixedColumn<T extends ColumnType>(lines: Tokens, offset: number, width: number, type: T): Column<T['@type']> {
+    const { data, indices, count: rowCount } = lines;
     const { kind } = type;
     const pool = kind === 'pooled-str' ? StringPool.create() : void 0;
 
     const value: Column<T['@type']>['value'] = kind === 'str' ? row => {
-        let s = tokens[2 * row] + offset, le = tokens[2 * row + 1];
+        let s = indices[2 * row] + offset, le = indices[2 * row + 1];
         if (s >= le) return '';
         let e = s + width;
         if (e > le) e = le;
         return trimStr(data, s, e);
     } : kind === 'pooled-str' ? row => {
-        let s = tokens[2 * row] + offset, le = tokens[2 * row + 1];
+        let s = indices[2 * row] + offset, le = indices[2 * row + 1];
         if (s >= le) return '';
         let e = s + width;
         if (e > le) e = le;
         return StringPool.get(pool!, trimStr(data, s, e));
     } : kind === 'int' ? row => {
-        const s = tokens[2 * row] + offset;
-        if (s > tokens[2 * row + 1]) return 0;
+        const s = indices[2 * row] + offset;
+        if (s > indices[2 * row + 1]) return 0;
         return parseIntSkipLeadingWhitespace(data, s, s + width);
     } : row => {
-        const s = tokens[2 * row] + offset;
-        if (s > tokens[2 * row + 1]) return 0;
+        const s = indices[2 * row] + offset;
+        if (s > indices[2 * row + 1]) return 0;
         return parseFloatSkipLeadingWhitespace(data, s, s + width);
     };
     return {
@@ -51,10 +46,7 @@ export function FixedColumn<T extends ColumnType>(lines: Lines, offset: number,
         rowCount,
         value,
         isValueDefined(row) { return true; },
-        toArray(params) {
-            const { array, start } = createArray(rowCount, params);
-            return fillArrayValues(value, array, start);
-        },
+        toArray(params) { return createAndFillArray(rowCount, value, params); },
         areValuesEqual(rowA, rowB) {
             return value(rowA) === value(rowB);
         }
diff --git a/src/reader/common/text/column/token.ts b/src/reader/common/text/column/token.ts
new file mode 100644
index 000000000..c78ccc794
--- /dev/null
+++ b/src/reader/common/text/column/token.ts
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Column, ColumnType, createAndFillArray } from '../../column'
+import { Tokens } from '../tokenizer'
+import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../number-parser'
+import StringPool from '../../../../utils/short-string-pool'
+
+export default function TokenColumnProvider(tokens: Tokens) {
+    return function<T extends ColumnType>(type: T) {
+        return TokenColumn(tokens, type);
+    }
+}
+
+export function TokenColumn<T extends ColumnType>(tokens: Tokens, type: T): Column<T['@type']> {
+    const { data, indices, count: rowCount } = tokens;
+    const { kind } = type;
+    const pool = kind === 'pooled-str' ? StringPool.create() : void 0;
+
+    const value: Column<T['@type']>['value'] =
+          kind === 'str'
+        ? row => data.substring(indices[2 * row], indices[2 * row + 1])
+        : kind === 'pooled-str'
+        ? row => StringPool.get(pool!, data.substring(indices[2 * row], indices[2 * row + 1]))
+        : kind === 'int'
+        ? row => fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0
+        : row => fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
+
+    return {
+        isDefined: true,
+        rowCount,
+        value,
+        isValueDefined(row) { return true; },
+        toArray(params) { return createAndFillArray(rowCount, value, params); },
+        areValuesEqual: areValuesEqualProvider(tokens)
+    };
+}
+
+export function areValuesEqualProvider(tokens: Tokens) {
+    const { data, indices } = tokens;
+    return function(rowA: number, rowB: number) {
+        const aS = indices[2 * rowA], bS = indices[2 * rowB];
+        const len = indices[2 * rowA + 1] - aS;
+        if (len !== indices[2 *  rowB + 1] - bS) return false;
+        for (let i = 0; i < len; i++) {
+            if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
\ No newline at end of file
diff --git a/src/reader/common/text/tokenizer.ts b/src/reader/common/text/tokenizer.ts
index 97fb00ef7..326497591 100644
--- a/src/reader/common/text/tokenizer.ts
+++ b/src/reader/common/text/tokenizer.ts
@@ -17,10 +17,10 @@ export interface Tokenizer {
     currentTokenEnd: number
 }
 
-export interface Lines {
+export interface Tokens {
     data: string,
     count: number,
-    tokens: ArrayLike<number>
+    indices: ArrayLike<number>
 }
 
 export function Tokenizer(data: string): Tokenizer {
@@ -80,15 +80,21 @@ export namespace Tokenizer {
     }
 
     /** Advance the state by the given number of lines and return line starts/ends as tokens. */
-    export function readLines(state: Tokenizer, count: number): Lines {
-        const lineTokens = Tokens.create(count * 2);
+    export function readLine(state: Tokenizer): string {
+        markLine(state);
+        return getTokenString(state);
+    }
+
+    /** Advance the state by the given number of lines and return line starts/ends as tokens. */
+    export function readLines(state: Tokenizer, count: number): Tokens {
+        const lineTokens = TokenBuilder.create(state, count * 2);
 
         for (let i = 0; i < count; i++) {
             markLine(state);
-            Tokens.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd);
+            TokenBuilder.addUnchecked(lineTokens, state.currentTokenStart, state.currentTokenEnd);
         }
 
-        return { data: state.data, count, tokens: lineTokens.indices };
+        return { data: state.data, count, indices: lineTokens.indices };
     }
 
     /**
@@ -170,38 +176,43 @@ export function trimStr(data: string, start: number, end: number) {
     return data.substring(s, e + 1);
 }
 
-export interface Tokens {
-    indicesLenMinus2: number,
-    count: number,
-    indices: Uint32Array
-}
+export namespace TokenBuilder {
+    interface Builder extends Tokens {
+        offset: number,
+        indices: Uint32Array,
+        indicesLenMinus2: number
+    }
 
-export namespace Tokens {
-    function resize(tokens: Tokens) {
+    function resize(builder: Builder) {
         // scale the size using golden ratio, because why not.
-        const newBuffer = new Uint32Array((1.61 * tokens.indices.length) | 0);
-        newBuffer.set(tokens.indices);
-        tokens.indices = newBuffer;
-        tokens.indicesLenMinus2 = (newBuffer.length - 2) | 0;
+        const newBuffer = new Uint32Array((1.61 * builder.indices.length) | 0);
+        newBuffer.set(builder.indices);
+        builder.indices = newBuffer;
+        builder.indicesLenMinus2 = (newBuffer.length - 2) | 0;
     }
 
     export function add(tokens: Tokens, start: number, end: number) {
-        if (tokens.count > tokens.indicesLenMinus2) {
-            resize(tokens);
+        const builder = tokens as Builder;
+        if (builder.offset > builder.indicesLenMinus2) {
+            resize(builder);
         }
-        tokens.indices[tokens.count++] = start;
-        tokens.indices[tokens.count++] = end;
+        builder.indices[builder.offset++] = start;
+        builder.indices[builder.offset++] = end;
+        tokens.count++;
     }
 
     export function addUnchecked(tokens: Tokens, start: number, end: number) {
-        tokens.indices[tokens.count++] = start;
-        tokens.indices[tokens.count++] = end;
+        (tokens as Builder).indices[(tokens as Builder).offset++] = start;
+        (tokens as Builder).indices[(tokens as Builder).offset++] = end;
+        tokens.count++;
     }
 
-    export function create(size: number): Tokens {
-        return {
+    export function create(tokenizer: Tokenizer, size: number): Tokens {
+        return <Builder>{
+            data: tokenizer.data,
             indicesLenMinus2: (size - 2) | 0,
             count: 0,
+            offset: 0,
             indices: new Uint32Array(size)
         }
     }
diff --git a/src/reader/gro/parser.ts b/src/reader/gro/parser.ts
index bca4df98a..28a1e26ca 100644
--- a/src/reader/gro/parser.ts
+++ b/src/reader/gro/parser.ts
@@ -40,14 +40,11 @@ function State(tokenizer: Tokenizer): State {
  */
 function handleTitleString(state: State) {
     const { tokenizer, header } = state;
-    Tokenizer.markLine(tokenizer);
-
-    let line = Tokenizer.getTokenString(tokenizer);
+    let line = Tokenizer.readLine(tokenizer);
 
     // skip potential empty lines...
     if (line.trim().length === 0) {
-        Tokenizer.markLine(tokenizer);
-        line = Tokenizer.getTokenString(tokenizer);
+        line = Tokenizer.readLine(tokenizer);
     }
 
     const timeOffset = line.lastIndexOf('t=');
@@ -92,7 +89,7 @@ function handleAtoms(state: State): Schema.Atoms {
     const { tokenizer, numberOfAtoms } = state;
     const lines = Tokenizer.readLines(tokenizer, numberOfAtoms);
 
-    const positionSample = tokenizer.data.substring(lines.tokens[0], lines.tokens[1]).substring(20);
+    const positionSample = tokenizer.data.substring(lines.indices[0], lines.indices[1]).substring(20);
     const precisions = positionSample.match(/\.\d+/g)!;
     const hasVelocities = precisions.length === 6;
 
@@ -133,8 +130,7 @@ function handleAtoms(state: State): Schema.Atoms {
  */
 function handleBoxVectors(state: State) {
     const { tokenizer } = state;
-    Tokenizer.markLine(tokenizer);
-    const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g);
+    const values = Tokenizer.readLine(tokenizer).trim().split(/\s+/g);
     state.header.box = [+values[0], +values[1], +values[2]];
 }
 
diff --git a/src/reader/spec/cif.spec.ts b/src/reader/spec/cif.spec.ts
index 80b4c1388..87950cf64 100644
--- a/src/reader/spec/cif.spec.ts
+++ b/src/reader/spec/cif.spec.ts
@@ -10,8 +10,8 @@ import * as Schema from '../cif/schema'
 
 const columnData = `123abc`;
 
-const intField = TextField(columnData, [0, 1, 1, 2, 2, 3], 3);
-const strField = TextField(columnData, [3, 4, 4, 5, 5, 6], 3);
+const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3);
+const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3);
 
 const testBlock = Data.Block({
     'atoms': Data.Category(3, {
diff --git a/src/reader/spec/fixed-column.spec.ts b/src/reader/spec/text-column.spec.ts
similarity index 65%
rename from src/reader/spec/fixed-column.spec.ts
rename to src/reader/spec/text-column.spec.ts
index c91bfb742..e37d18d6a 100644
--- a/src/reader/spec/fixed-column.spec.ts
+++ b/src/reader/spec/text-column.spec.ts
@@ -6,6 +6,7 @@
  */
 
 import FixedColumn from '../common/text/column/fixed'
+import TokenColumn from '../common/text/column/token'
 import { ColumnType } from '../common/column'
 
 const lines = [
@@ -16,7 +17,7 @@ const lines = [
     ' 5'
 ]
 
-const data = lines.join('\n');
+const linesData = lines.join('\n');
 
 const linesTokens = (function () {
     const tokens: number[] = [];
@@ -25,12 +26,12 @@ const linesTokens = (function () {
         tokens.push(last, last + l.length);
         last += l.length + 1;
     }
-    if (tokens[tokens.length - 1] > data.length) tokens[tokens.length - 1] = data.length;
+    if (tokens[tokens.length - 1] > linesData.length) tokens[tokens.length - 1] = linesData.length;
     return tokens;
 }());
 
 describe('fixed text column', () => {
-    const col = FixedColumn({ data, tokens: linesTokens, count: lines.length });
+    const col = FixedColumn({ data: linesData, indices: linesTokens, count: lines.length });
     const col1 = col(0, 5, ColumnType.float);
     const col2 = col(5, 4, ColumnType.str);
     it('number', () => {
@@ -48,3 +49,14 @@ describe('fixed text column', () => {
         expect(col2.value(4)).toBe('');
     })
 });
+
+describe('token text column', () => {
+    const tokensData = '321';
+    const col = TokenColumn({ data: tokensData, indices: [0, 1, 1, 2, 2, 3], count: 3 });
+    const col1 = col(ColumnType.int);
+    it('number', () => {
+        expect(col1.value(0)).toBe(3);
+        expect(col1.value(1)).toBe(2);
+        expect(col1.value(2)).toBe(1);
+    })
+});
diff --git a/src/script.ts b/src/script.ts
index ea36fe76a..06615a362 100644
--- a/src/script.ts
+++ b/src/script.ts
@@ -8,7 +8,7 @@
 import * as fs from 'fs'
 
 import Gro from './reader/gro/parser'
-import CIF from './reader/cif/text-parser'
+import CIF from './reader/cif/index'
 
 // const file = '1crn.gro'
 // const file = 'water.gro'
@@ -81,7 +81,7 @@ export function _cif() {
         }
 
         console.time('parseCIF');
-        const parsed = CIF(input);
+        const parsed = CIF.parseText(input);
         console.timeEnd('parseCIF');
         if (parsed.isError) {
             console.log(parsed);
@@ -92,7 +92,12 @@ export function _cif() {
 
         const atom_site = data.categories._atom_site;
         console.log(atom_site.getField('Cartn_x')!.float(0));
-        console.log(atom_site.getField('label_atom_id')!.toStringArray());
+        //console.log(atom_site.getField('label_atom_id')!.toStringArray());
+
+        const mmcif = CIF.applySchema(CIF.schema.mmCIF, data);
+        console.log(mmcif.atom_site.Cartn_x.value(0));
+        console.log(mmcif.entity.type.toArray());
+        console.log(mmcif.pdbx_struct_oper_list.matrix.value(0));
     });
 }
 
-- 
GitLab