From a81bcf4311624b15525cfe4b376c667fbf40cbdd Mon Sep 17 00:00:00 2001
From: David Sehnal <david.sehnal@gmail.com>
Date: Sat, 2 Mar 2019 12:57:58 +0100
Subject: [PATCH] mol-model: added sourceIndex to AtomicHierarchy

---
 package-lock.json                             | Bin 609362 -> 607562 bytes
 .../structure/mmcif/atomic.ts                 |  12 +++--
 src/mol-model-formats/structure/mmcif/ihm.ts  |   1 +
 .../structure/mmcif/parser.ts                 |  41 +++++++++++++-----
 src/mol-model-formats/structure/mmcif/sort.ts |   9 +++-
 .../model/properties/atomic/hierarchy.ts      |   9 +++-
 6 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 4444de29a715c5abccb6351afde6e97dd2a96c96..00f2081bcfdf7d63fc7301f534af8309da642912 100644
GIT binary patch
delta 1504
zcmcaKL-kab>IQYM$qRJlHoJH|+dKUME2GSIR}Mz$rpXH~h)r)`XJef%@QYP+`jsWD
zg3~{6F^O*9-o<F{1(My)dX>?bV|qaVi^O*SyNm)Z(-lmaB(|3+GOdo7zCnXkV)`ys
zX3^;a^O*!DCuDO^SD4PhGToqtDSEn&F`MxAA5~28jFT_?7oT2W#l$*&LJd>?WQPsX
z)8FNBXl#$KWzvC2?yqA?0ISmK=a8D7Th9~>=KZzd;GeEl#KF5=pn+)yH%M&zoo*%$
zMz9%q*32r~9eSC>Al$w_ra2%L(-RgjvP}Qp%po$}po@`j`hQ1e!Rh)9Os3Nn%s92C
zPuR`MI$eJalhX9pD_Oaw3)nNWOcwYf0nue+&nz<i!yZPy$q%ORPEV*~@|b>LI+Nyf
z0Xt^y>Hk5J4Yf=X(_`xxIj1`~F!N47Fo}_Gdb|U(;PeX}Os3lpOk{H706Ri=9y|NA
znQWZX=a{i^PT%0g%rg1GKfdV`Y*~b+KbXlBJzZ-ehcMWx>HZTr1VKS&3ig%Wc2<?i
z2mbL)xAI|@nl8}5R5iWpH9OCAcOPb6Fn9ZjIZTo4(+~KtOH6-d#>_X}U=9Z_$g2v|
z4H}rDw!dD&w1a8+f#pn0lN~mMPUrv6%sT!3A`aQ<0#{j~O3tlfs$-sf;lJJV3Gdmt
zrz?oDv4ZrfO@1J&yj^xZ(|z{o8#b})Pe0qux^?=2Tg(#Mzi(%<&;X@}>H8Njc2D=X
z&&0d^-UFuDF4Gmdnfa$b$l?&(J~x}gmkGkZpwBEYy|0f+X!_h5Cf?}*_RK=lqw_d|
zr$1=oP}u%HkE5LtNnt`Q6Yul~TFmiCV)fe0l}OxmtJwLsYZY;n^Fjm{gs=#1|8L2x
z&p5q7msx&$o;7nGBgpj2VEJiw%styDv~aL6O>Zz@R-V2gkePk^{WgwhPO$WJu=~zi
zva@Y(n82Y3F=P2ZZ)PqeX@#}yEYmO4aBy!on8R_L8Ent=+sm2Mr%h(zn4Z(aB)+|N
z0mpNQ=ILiO7!{{0_%NwV_cLZ@+x~tL$2Jxu!{XwZxu(Bg&H?tKar^!x=Jx$bEZg@d
zu?ELa7qDlM*sghqRYV3B!Xn!<?y<IWfVkV)p0iFAo9@udsJVUrUsgZS=>n6ORX6`w
zo5VPMK?tMz<c9Uu(-k@y*|z@{VVfWTj=1e-yID6dPB-|-$g|z=AoDS%?SF5u#xZW6
zaE#fQar+q^wxgQU6$;oSwyy|dlYa}=0*dQ{(@(i`@=xzq=j5EeeiD<wWPu~r({HPD
z%4|1vWtSD0J|TxuV*0#~jFQ`%LfM^iK&}HNhq`I4?2^-;2Qf-c_kYU5vwiv@_LFSW
z1^%&1Y%f2_{&XQE7)Fx_Nl9e-N`DT%?Hg8b{Hg?p^L=Gb_U$K>IAz7a+=RufV%vo*
HIUR%n)*d77

delta 1330
zcmX@rrFv<G>IQYM=@Tw9iA}CIV&ClQ^=vPgH+{kv4$ke~9E{>klLdIir~P7O-Co<p
zSmZVR!gfZ9?QB;WjX1yxxBK5^6m)?w%N3c{L`)ZGWHQ-qRmH@_1ZGYT&E$}up0J!<
zaC&DZ2mkc{txUPoe`IoKOkdE##Il{QmPvyVqR_XFDFMRTtH&la{k#S<@AMC~Ol}ad
zD1T=D=`otjywk3*C`>P?Vv5>+ubxSc8zP%t#UVWXf)<Cs^u8(%{^|YgOfl1UTd})M
zYh%ipE?~lGGrj66lk)TiR#p+P-sula*;%Lan=-Rc=kH}ooPJ?0lg0E0s?0*uA6PPr
zOg>j*Gu^?8S$J9>lRCtL2d3;S)6cGCVxPXoikWx%gBoUm=>c}kQ2!XLVlvzQyN@ZG
z5n|E?Csx+U=egLXA86nZ-adZ<6EDZK*-WO>Wp=XhPxtEL;GO<p2Gb*m+Pp3%gYCSt
zm>M`BHfaSh%WOZeh{=c<EWe$18Pij!u?|6utkX9rvYAf5pw7rT{lWwef$cFXm{?gL
zIxE&N#e!HG+aIi9`p!OmLIS(^bOQ~>C)+P?WwOwiu28`&Io-jGy=MA>nT$N!SKnh=
z=`uYaox^c@oIW$h_8MhoSElI>q0A1TkZ75{z>e8r`;SbH4n~OJ{A`XK2y4%DCUE@3
zL%0SDnE1EHXfl`cLil>~nAo=GR&i)Cg3Xw|rjtWpx`GL($n*tX%qrXeRdc98WzJbL
zZ-MC9;Ka%@JwJ*W9KMARefOf6t+v0e<5&$*z1_i<IgJydobN6R+jfU84n>I6@`i2>
zE|84l_61Wpt}=u5Z<h~a)@7P*;Kb}Wy?-l{0>t(W5zKp7KuR5^E#$}qF$|_ZxWcTm
z-FG#!7*o4#0&}}<0?T&W1XkNvNV1-|msL~-9(Y3AFWh8p=K%2~w&y)!ogfAYXZ|2|
z!R_ilS^Y%8x~2>KWais`pouk!ar+;BwjTlzRR$W2XQnGGV&a|NSIH(Y-NBSaa=JnX
z>rqII+*e|g(T8Z)+s!JoT`!1@<t>uN1U*Kc>8`?@{L{nkvC2+g5XNM_z14x;5^DGa
z5e~`i?*rJKazL&FNA=Y0_x7^CV*{JH{qYg@XA4IooEaM7wh7D+n6?+(<6vdoo*&J`
m&$xZUDh`pt=?`Q%<+dM`=9Co!F(sy-oX0A<oy&;RK^OpVug+Ki

diff --git a/src/mol-model-formats/structure/mmcif/atomic.ts b/src/mol-model-formats/structure/mmcif/atomic.ts
index b43be7fe7..38e39f5b6 100644
--- a/src/mol-model-formats/structure/mmcif/atomic.ts
+++ b/src/mol-model-formats/structure/mmcif/atomic.ts
@@ -16,11 +16,8 @@ import { ElementSymbol } from 'mol-model/structure/model/types';
 import { Entities } from 'mol-model/structure/model/properties/common';
 import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges';
 import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived';
-import { ModelFormat } from '../format';
-import mmCIF_Format = ModelFormat.mmCIF
 import { FormatData } from './parser';
 
-
 type AtomSite = mmCIF_Database['atom_site']
 
 function findHierarchyOffsets(atom_site: AtomSite) {
@@ -45,13 +42,14 @@ function findHierarchyOffsets(atom_site: AtomSite) {
     return { residues, chains };
 }
 
-function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
+function createHierarchyData(atom_site: AtomSite, sourceIndex: Column<number>, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData {
     const atoms = Table.ofColumns(AtomsSchema, {
         type_symbol: Column.ofArray({ array: Column.mapToArray(atom_site.type_symbol, ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }),
         label_atom_id: atom_site.label_atom_id,
         auth_atom_id: atom_site.auth_atom_id,
         label_alt_id: atom_site.label_alt_id,
-        pdbx_formal_charge: atom_site.pdbx_formal_charge
+        pdbx_formal_charge: atom_site.pdbx_formal_charge,
+        sourceIndex
     });
     const residues = Table.view(atom_site, ResiduesSchema, offsets.residues);
     // Optimize the numeric columns
@@ -80,9 +78,9 @@ function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
         && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
 }
 
-export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) {
+export function getAtomicHierarchyAndConformation(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, formatData: FormatData, previous?: Model) {
     const hierarchyOffsets = findHierarchyOffsets(atom_site);
-    const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets);
+    const hierarchyData = createHierarchyData(atom_site, sourceIndex, hierarchyOffsets);
 
     if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) {
         return {
diff --git a/src/mol-model-formats/structure/mmcif/ihm.ts b/src/mol-model-formats/structure/mmcif/ihm.ts
index 731af9e3a..405fec8cf 100644
--- a/src/mol-model-formats/structure/mmcif/ihm.ts
+++ b/src/mol-model-formats/structure/mmcif/ihm.ts
@@ -21,6 +21,7 @@ export interface IHMData {
     model_name: string,
     entities: Entities,
     atom_site: mmCIF['atom_site'],
+    atom_site_sourceIndex: Column<number>,
     ihm_sphere_obj_site: mmCIF['ihm_sphere_obj_site'],
     ihm_gaussian_obj_site: mmCIF['ihm_gaussian_obj_site']
 }
diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts
index 17d4f3a5b..44835f9e9 100644
--- a/src/mol-model-formats/structure/mmcif/parser.ts
+++ b/src/mol-model-formats/structure/mmcif/parser.ts
@@ -169,8 +169,8 @@ function getFormatData(format: mmCIF_Format): FormatData {
     }
 }
 
-function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous);
+function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, formatData: FormatData, previous?: Model): Model {
+    const atomic = getAtomicHierarchyAndConformation(atom_site, sourceIndex, entities, formatData, previous);
     if (previous && atomic.sameAsPrevious) {
         return {
             ...previous,
@@ -209,7 +209,7 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities
 }
 
 function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
-    const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData);
+    const atomic = getAtomicHierarchyAndConformation(data.atom_site, data.atom_site_sourceIndex, data.entities, formatData);
     const coarse = getIHMCoarse(data, formatData);
 
     return {
@@ -255,8 +255,8 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatDat
     let modelStart = 0;
     while (modelStart < atomCount) {
         const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
-        const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
-        const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
+        const { atom_site, sourceIndex } = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
+        const model = createStandardModel(format, atom_site, sourceIndex, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
         attachProps(model);
         models.push(model);
         modelStart = modelEnd;
@@ -265,14 +265,17 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatDat
 }
 
 function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
-    const ret = new Map<number, T>()
+    const ret = new Map<number, { table: T, start: number, end: number }>()
     const rowCount = table._rowCount;
     let modelStart = 0;
     while (modelStart < rowCount) {
         const modelEnd = findModelEnd(col, modelStart);
         const id = col.value(modelStart);
-        const window = Table.window(table, table._schema, modelStart, modelEnd) as T;
-        ret.set(id, window);
+        ret.set(id, {
+            table: Table.window(table, table._schema, modelStart, modelEnd) as T,
+            start: modelStart,
+            end: modelEnd
+        });
         modelStart = modelEnd;
     }
     return ret;
@@ -286,8 +289,9 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: Fo
         throw new Error('expected _atom_site.ihm_model_id to be defined')
     }
 
-    // TODO: will IHM require sorting or will we trust it?
     const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id);
+    // TODO: will coarse IHM records require sorting or will we trust it?
+    // ==> Probably implement a sort as as well and store the sourceIndex same as with atomSite
     const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id);
     const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id);
 
@@ -296,13 +300,26 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: Fo
     const { model_id, model_name } = ihm_model_list;
     for (let i = 0; i < ihm_model_list._rowCount; i++) {
         const id = model_id.value(i);
+
+        let atom_site, atom_site_sourceIndex;
+        if (atom_sites.has(id)) {
+            const e = atom_sites.get(id)!;
+            const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, e.table, e.start, e.end);
+            atom_site = sorted;
+            atom_site_sourceIndex = sourceIndex;
+        } else {
+            atom_site = Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0);
+            atom_site_sourceIndex = Column.ofIntArray([]);
+        }
+
         const data: IHMData = {
             model_id: id,
             model_name: model_name.value(i),
             entities: entities,
-            atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0),
-            ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
-            ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
+            atom_site,
+            atom_site_sourceIndex,
+            ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)!.table : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
+            ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)!.table : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
         };
         const model = createModelIHM(format, data, formatData);
         attachProps(model);
diff --git a/src/mol-model-formats/structure/mmcif/sort.ts b/src/mol-model-formats/structure/mmcif/sort.ts
index 7cbee9e5b..868cd5dd8 100644
--- a/src/mol-model-formats/structure/mmcif/sort.ts
+++ b/src/mol-model-formats/structure/mmcif/sort.ts
@@ -9,6 +9,8 @@ import { createRangeArray, makeBuckets } from 'mol-data/util';
 import { Column, Table } from 'mol-data/db';
 import { RuntimeContext } from 'mol-task';
 
+export type SortedAtomSite = mmCIF_Database['atom_site'] & { sourceIndex: Column<number> }
+
 function isIdentity(xs: ArrayLike<number>) {
     for (let i = 0, _i = xs.length; i < _i; i++) {
         if (xs[i] !== i) return false;
@@ -36,8 +38,11 @@ export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Databas
     }
 
     if (isIdentity(indices) && indices.length === atom_site._rowCount) {
-        return atom_site;
+        return { atom_site, sourceIndex: Column.ofIntArray(indices) };
     }
 
-    return Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site'];
+    return {
+        atom_site: Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site'],
+        sourceIndex: Column.ofIntArray(indices)
+    };
 }
\ No newline at end of file
diff --git a/src/mol-model/structure/model/properties/atomic/hierarchy.ts b/src/mol-model/structure/model/properties/atomic/hierarchy.ts
index 33c599324..7262fe28a 100644
--- a/src/mol-model/structure/model/properties/atomic/hierarchy.ts
+++ b/src/mol-model/structure/model/properties/atomic/hierarchy.ts
@@ -38,7 +38,14 @@ export const AtomsSchema = {
      * The net integer charge assigned to this atom.
      * This is the formal charge assignment normally found in chemical diagrams.
      */
-    pdbx_formal_charge: mmCIF.atom_site.pdbx_formal_charge
+    pdbx_formal_charge: mmCIF.atom_site.pdbx_formal_charge,
+
+    /**
+     * The index of this atom in the input data.
+     * Required because of sorting of atoms.
+     */
+    sourceIndex: Column.Schema.int
+
     // id, occupancy and B_iso_or_equiv are part of conformation
 };
 
-- 
GitLab