From c6c2994267ff50b0f7e566e9a17424485529aee8 Mon Sep 17 00:00:00 2001
From: David Sehnal <david.sehnal@gmail.com>
Date: Thu, 9 Nov 2017 19:03:54 +0100
Subject: [PATCH] assembly parsing from mmCIF

---
 src/mol-math/geometry/symmetry-operator.ts    |   2 +-
 src/mol-math/linear-algebra/3d.ts             |  27 +++-
 .../structure/model/formats/mmcif.ts          |   5 +-
 .../structure/model/formats/mmcif/assembly.ts | 149 ++++++++++++++++++
 .../structure/model/properties/symmetry.ts    |  14 +-
 src/perf-tests/structure.ts                   |  10 +-
 6 files changed, 191 insertions(+), 16 deletions(-)
 create mode 100644 src/mol-model/structure/model/formats/mmcif/assembly.ts

diff --git a/src/mol-math/geometry/symmetry-operator.ts b/src/mol-math/geometry/symmetry-operator.ts
index 1e3a57de3..2e7bfabc8 100644
--- a/src/mol-math/geometry/symmetry-operator.ts
+++ b/src/mol-math/geometry/symmetry-operator.ts
@@ -24,7 +24,7 @@ namespace SymmetryOperator {
     export function create(name: string, matrix: Mat4, hkl?: number[]): SymmetryOperator {
         const _hkl = hkl ? Vec3.create(hkl[0], hkl[1], hkl[2]) : Vec3.zero();
         if (Mat4.isIdentity(matrix)) return { name, matrix, inverse: Mat4.identity(), isIdentity: true, hkl: _hkl };
-        if (!Mat4.isRotationAndTranslation(matrix)) throw new Error('Symmetry operator must be a composition of rotation and translation.');
+        if (!Mat4.isRotationAndTranslation(matrix)) throw new Error(`Symmetry operator (${name}) must be a composition of rotation and translation.`);
         return { name, matrix, inverse: Mat4.invert(Mat4.zero(), matrix), isIdentity: false, hkl: _hkl };
     }
 
diff --git a/src/mol-math/linear-algebra/3d.ts b/src/mol-math/linear-algebra/3d.ts
index f989a61c0..ed1a97900 100644
--- a/src/mol-math/linear-algebra/3d.ts
+++ b/src/mol-math/linear-algebra/3d.ts
@@ -214,7 +214,7 @@ export namespace Mat4 {
         return mul(out, mul(out, a, b), c);
     }
 
-    export function translate(out: Mat4, a: Mat4, v: Mat4) {
+    export function translate(out: Mat4, a: Mat4, v: Vec3) {
         const x = v[0], y = v[1], z = v[2];
         let a00: number, a01: number, a02: number, a03: number,
             a10: number, a11: number, a12: number, a13: number,
@@ -243,7 +243,7 @@ export namespace Mat4 {
         return out;
     }
 
-    export function fromTranslation(out: Mat4, v: Mat4) {
+    export function fromTranslation(out: Mat4, v: Vec3) {
         out[0] = 1;
         out[1] = 0;
         out[2] = 0;
@@ -263,6 +263,13 @@ export namespace Mat4 {
         return out;
     }
 
+    export function setTranslation(out: Mat4, v: Vec3) {
+        out[12] = v[0];
+        out[13] = v[1];
+        out[14] = v[2];
+        return out;
+    }
+
     export function rotate(out: Mat4, a: Mat4, rad: number, axis: Mat4) {
         let x = axis[0], y = axis[1], z = axis[2],
             len = Math.sqrt(x * x + y * y + z * z),
@@ -432,15 +439,23 @@ export namespace Mat4 {
         return b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06;
     }
 
-    export function isRotationAndTranslation(a: Mat4) {
+    export function isRotationAndTranslation(a: Mat4, eps?: number) {
+        return _isRotationAndTranslation(a, typeof eps !== 'undefined' ? eps : EPSILON.Value)
+    }
+
+    function _isRotationAndTranslation(a: Mat4, eps: number) {
         const a00 = a[0], a01 = a[1], a02 = a[2], a03 = a[3],
             a10 = a[4], a11 = a[5], a12 = a[6], a13 = a[7],
             a20 = a[8], a21 = a[9], a22 = a[10], a23 = a[11],
             /* a30 = a[12], a31 = a[13], a32 = a[14],*/ a33 = a[15];
 
-        if (a33 !== 1 || a03 !== 0 || a13 !== 0 || a23 !== 0) return false;
-        const det3x3 = a00 * (a11 * a22 - a21 * a21) - a01 * (a10 * a22 - a12 * a20) + a02 * (a10 * a21 - a11 * a20);
-        if (det3x3 < 1 - EPSILON.Value || det3x3 > 1 + EPSILON.Value) return false;
+        if (a33 !== 1 || a03 !== 0 || a13 !== 0 || a23 !== 0) {
+            return false;
+        }
+        const det3x3 = a00 * (a11 * a22 - a12 * a21) - a01 * (a10 * a22 - a12 * a20) + a02 * (a10 * a21 - a11 * a20);
+        if (det3x3 < 1 - eps || det3x3 > 1 + eps) {
+            return false;
+        }
         return true;
     }
 }
diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts
index 01974ab7a..db14e11e7 100644
--- a/src/mol-model/structure/model/formats/mmcif.ts
+++ b/src/mol-model/structure/model/formats/mmcif.ts
@@ -14,6 +14,7 @@ import Conformation from '../properties/conformation'
 import Symmetry from '../properties/symmetry'
 import findHierarchyKeys from '../utils/hierarchy-keys'
 import { ElementSymbol} from '../types'
+import createAssemblies from './mmcif/assembly'
 
 import mmCIF_Format = Format.mmCIF
 
@@ -77,8 +78,8 @@ function getConformation({ data }: mmCIF_Format, bounds: Interval): Conformation
     }
 }
 
-function getSymmetry({ data }: mmCIF_Format): Symmetry {
-    return Symmetry.Empty;
+function getSymmetry(format: mmCIF_Format): Symmetry {
+    return { assemblies: createAssemblies(format) };
 }
 
 function isHierarchyDataEqual(a: Hierarchy.Hierarchy, b: Hierarchy.Data) {
diff --git a/src/mol-model/structure/model/formats/mmcif/assembly.ts b/src/mol-model/structure/model/formats/mmcif/assembly.ts
new file mode 100644
index 000000000..35bbb7d65
--- /dev/null
+++ b/src/mol-model/structure/model/formats/mmcif/assembly.ts
@@ -0,0 +1,149 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import { Mat4, Tensor } from 'mol-math/linear-algebra'
+import SymmetryOperator from 'mol-math/geometry/symmetry-operator'
+import Format from '../../format'
+import { Assembly, OperatorGroup, OperatorGroups } from '../../properties/symmetry'
+import { Queries as Q } from '../../../query'
+
+import mmCIF_Format = Format.mmCIF
+
+export default function create(format: mmCIF_Format): ReadonlyArray<Assembly> {
+    const { pdbx_struct_assembly } = format.data;
+    if (!pdbx_struct_assembly._rowCount) return [];
+
+    const matrices = getMatrices(format);
+    const assemblies: Assembly[] = [];
+    for (let i = 0; i < pdbx_struct_assembly._rowCount; i++) {
+        assemblies[assemblies.length] = createAssembly(format, i, matrices);
+    }
+    return assemblies;
+}
+
+type Matrices = Map<string, Mat4>
+type Generator = { expression: string, asymIds: string[] }
+
+function createAssembly(format: mmCIF_Format, index: number, matrices: Matrices): Assembly {
+    const { pdbx_struct_assembly, pdbx_struct_assembly_gen } = format.data;
+
+    const id = pdbx_struct_assembly.id.value(index);
+    const details = pdbx_struct_assembly.details.value(index);
+    const generators: Generator[] = [];
+
+    const { assembly_id, oper_expression, asym_id_list } = pdbx_struct_assembly_gen;
+
+    for (let i = 0, _i = pdbx_struct_assembly_gen._rowCount; i < _i; i++) {
+        if (assembly_id.value(i) !== id) continue;
+        generators[generators.length] = {
+            expression: oper_expression.value(i),
+            asymIds: asym_id_list.value(i).split(',').map(x => x.trim()).filter(x => !!x)
+        };
+    }
+
+    return Assembly.create(id, details, operatorGroupsProvider(generators, matrices));
+}
+
+function operatorGroupsProvider(generators: Generator[], matrices: Matrices): () => OperatorGroups {
+    return () => {
+        const groups: OperatorGroup[] = [];
+
+        let operatorOffset = 0;
+        for (let i = 0; i < generators.length; i++) {
+            const gen = generators[i];
+            const operatorList = parseOperatorList(gen.expression);
+            const operatorNames = expandOperators(operatorList);
+            const operators = getAssemblyOperators(matrices, operatorNames, operatorOffset);
+            const selector = Q.generators.atoms({ chainTest: Q.pred.and(
+                Q.pred.eq(Q.props.unit.operator_name, SymmetryOperator.DefaultName),
+                Q.pred.inSet(Q.props.chain.label_asym_id, gen.asymIds)
+            )});
+            groups[groups.length] = { selector, operators };
+            operatorOffset += operators.length;
+        }
+
+        return groups;
+    }
+}
+
+function getMatrices({ data }: mmCIF_Format): Matrices {
+    const { pdbx_struct_oper_list } = data;
+    const { id, matrix, vector, _schema } = pdbx_struct_oper_list;
+    const matrices = new Map<string, Mat4>();
+
+    for (let i = 0, _i = pdbx_struct_oper_list._rowCount; i < _i; i++) {
+        const m = Tensor.toMat4(_schema.matrix.space, matrix.value(i));
+        const t = Tensor.toVec3(_schema.vector.space, vector.value(i));
+        Mat4.setTranslation(m, t);
+        Mat4.setValue(m, 3, 3, 1);
+        matrices.set(id.value(i), m);
+    }
+
+    return matrices;
+}
+
+function expandOperators(operatorList: string[][]) {
+    const ops: string[][] = [];
+    const currentOp: string[] = [];
+    for (let i = 0; i < operatorList.length; i++) currentOp[i] = '';
+    expandOperators1(operatorList, ops, operatorList.length - 1, currentOp);
+    return ops;
+}
+
+function expandOperators1(operatorNames: string[][], list: string[][], i: number, current: string[]) {
+    if (i < 0) {
+        list[list.length] = current.slice(0);
+        return;
+    }
+
+    let ops = operatorNames[i], len = ops.length;
+    for (let j = 0; j < len; j++) {
+        current[i] = ops[j];
+        expandOperators1(operatorNames, list, i - 1, current);
+    }
+}
+
+function getAssemblyOperators(matrices: Matrices, operatorNames: string[][], startIndex: number) {
+    const operators: SymmetryOperator[] = [];
+
+    let index = startIndex;
+    for (let op of operatorNames) {
+        let m = Mat4.identity();
+        for (let i = 0; i < op.length; i++) {
+            Mat4.mul(m, m, matrices.get(op[i])!);
+        }
+        index++;
+        operators[operators.length] = SymmetryOperator.create(`A-${index}`, m);
+    }
+
+    return operators;
+}
+
+function parseOperatorList(value: string): string[][] {
+    // '(X0)(1-5)' becomes [['X0'], ['1', '2', '3', '4', '5']]
+    // kudos to Glen van Ginkel.
+
+    const oeRegex = /\(?([^\(\)]+)\)?]*/g, groups: string[] = [], ret: string[][] = [];
+
+    let g: any;
+    while (g = oeRegex.exec(value)) groups[groups.length] = g[1];
+
+    groups.forEach(g => {
+        const group: string[] = [];
+        g.split(',').forEach(e => {
+            const dashIndex = e.indexOf('-');
+            if (dashIndex > 0) {
+                const from = parseInt(e.substring(0, dashIndex)), to = parseInt(e.substr(dashIndex + 1));
+                for (let i = from; i <= to; i++) group[group.length] = i.toString();
+            } else {
+                group[group.length] = e.trim();
+            }
+        });
+        ret[ret.length] = group;
+    });
+
+    return ret;
+}
\ No newline at end of file
diff --git a/src/mol-model/structure/model/properties/symmetry.ts b/src/mol-model/structure/model/properties/symmetry.ts
index 4a5403ad4..994d358de 100644
--- a/src/mol-model/structure/model/properties/symmetry.ts
+++ b/src/mol-model/structure/model/properties/symmetry.ts
@@ -8,12 +8,13 @@ import SymmetryOperator from 'mol-math/geometry/symmetry-operator'
 import { Query } from '../../query'
 import { Model } from '../../model'
 
+/** Determine an atom set and a list of operators that should be applied to that set  */
 export interface OperatorGroup {
-    readonly query: Query,
+    readonly selector: Query,
     readonly operators: ReadonlyArray<SymmetryOperator>
 }
 
-export type OperatorGroups = ReadonlyArray<ReadonlyArray<OperatorGroup>>
+export type OperatorGroups = ReadonlyArray<OperatorGroup>
 
 export class Assembly {
     readonly id: string;
@@ -33,7 +34,7 @@ export class Assembly {
 }
 
 export namespace Assembly {
-    export function create(id: string, details: string, operatorsProvider: () => OperatorGroups) {
+    export function create(id: string, details: string, operatorsProvider: () => OperatorGroups): Assembly {
         return new Assembly(id, details, operatorsProvider);
     }
 }
@@ -45,7 +46,12 @@ interface Symmetry {
 namespace Symmetry {
     export const Empty: Symmetry = { assemblies: [] };
 
-    export function findAssembly(model: Model): Assembly | undefined {
+    export function findAssembly(model: Model, id: string): Assembly | undefined {
+        const { assemblies } = model.symmetry;
+        const _id = id.toLocaleLowerCase();
+        for (let i = 0; i < assemblies.length; i++) {
+            if (assemblies[i].id.toLowerCase() === _id) return assemblies[i];
+        }
         return void 0;
     }
 }
diff --git a/src/perf-tests/structure.ts b/src/perf-tests/structure.ts
index 156d441f8..e610c871f 100644
--- a/src/perf-tests/structure.ts
+++ b/src/perf-tests/structure.ts
@@ -242,12 +242,16 @@ export namespace PropertyAccess {
     }
 
     export async function run() {
-        const { structures, models, mmcif } = await readCIF('./examples/1cbs_full.bcif');
+        //const { structures, models, mmcif } = await readCIF('./examples/1cbs_full.bcif');
         //const { structures, models, mmcif } = await readCIF('e:/test/quick/3j3q_full.bcif');
         //const { structures, models, mmcif } = await readCIF('e:/test/quick/1cbs_updated.cif');
+        const { structures, models/*, mmcif*/ } = await readCIF('e:/test/quick/1hrv_updated.cif');
+
+        //console.log(mmcif.pdbx_struct_oper_list.matrix.toArray());
+        // console.log(mmcif.pdbx_struct_oper_list.vector.toArray());
+
+        console.log(models[0].symmetry.assemblies);
 
-        console.log(mmcif.pdbx_struct_oper_list.matrix.toArray());
-        console.log(mmcif.pdbx_struct_oper_list.vector.toArray());
 
         //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');
 
-- 
GitLab