From a3c790a54960733f9515faa856cbbf889aae4995 Mon Sep 17 00:00:00 2001
From: Alexander Rose <alex.rose@rcsb.org>
Date: Mon, 22 Apr 2019 14:49:21 -0700
Subject: [PATCH] handle mmcif files without entity catagory

---
 .../structure/mmcif/parser.ts                 | 52 +++++++++++++++++--
 src/mol-model/structure/model/types.ts        | 13 +++++
 src/mol-model/structure/util.ts               |  4 +-
 src/mol-plugin/util/structure-labels.ts       |  8 +--
 src/mol-theme/color/residue-name.ts           |  2 +-
 src/mol-theme/label.ts                        |  6 +--
 6 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts
index 71063f94d..bf6d29319 100644
--- a/src/mol-model-formats/structure/mmcif/parser.ts
+++ b/src/mol-model-formats/structure/mmcif/parser.ts
@@ -24,11 +24,12 @@ import { getSequence } from './sequence';
 import { sortAtomSite } from './sort';
 import { StructConn } from './bonds/struct_conn';
 import { ChemicalComponent } from 'mol-model/structure/model/properties/chemical-component';
-import { getMoleculeType, MoleculeType } from 'mol-model/structure/model/types';
+import { getMoleculeType, MoleculeType, getEntityType } from 'mol-model/structure/model/types';
 import { ModelFormat } from '../format';
 import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants';
 import mmCIF_Format = ModelFormat.mmCIF
 import { memoize1 } from 'mol-util/memoize';
+import { ElementIndex } from 'mol-model/structure/model';
 
 export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) {
     const formatData = getFormatData(format)
@@ -247,9 +248,50 @@ function findModelEnd(num: Column<number>, startIndex: number) {
     return endIndex;
 }
 
+function getEntities(format: mmCIF_Format): Entities {
+    let entityData: Table<mmCIF_Schema['entity']>
+
+    if (!format.data.entity.id.isDefined) {
+        const entityIds = new Set<string>()
+        const entityList: Partial<Table.Row<mmCIF_Schema['entity']>>[] = []
+
+        const { label_entity_id, label_comp_id } = format.data.atom_site;
+        for (let i = 0 as ElementIndex, il = format.data.atom_site._rowCount; i < il; i++) {
+            const entityId = label_entity_id.value(i);
+            if (!entityIds.has(entityId)) {
+                entityList.push({ id: entityId, type: getEntityType(label_comp_id.value(i)) })
+                entityIds.add(entityId)
+            }
+        }
+
+        const { entity_id: sphere_entity_id } = format.data.ihm_sphere_obj_site;
+        for (let i = 0 as ElementIndex, il = format.data.ihm_sphere_obj_site._rowCount; i < il; i++) {
+            const entityId = sphere_entity_id.value(i);
+            if (!entityIds.has(entityId)) {
+                entityList.push({ id: entityId, type: 'polymer' })
+                entityIds.add(entityId)
+            }
+        }
+
+        const { entity_id: gaussian_entity_id } = format.data.ihm_gaussian_obj_site;
+        for (let i = 0 as ElementIndex, il = format.data.ihm_gaussian_obj_site._rowCount; i < il; i++) {
+            const entityId = gaussian_entity_id.value(i);
+            if (!entityIds.has(entityId)) {
+                entityList.push({ id: entityId, type: 'polymer' })
+                entityIds.add(entityId)
+            }
+        }
+
+        entityData = Table.ofRows(mmCIF_Schema.entity, entityList)
+    } else {
+        entityData = format.data.entity;
+    }
+    return { data: entityData, getEntityIndex: Column.createIndexer(entityData.id) };
+}
+
 async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
     const atomCount = format.data.atom_site._rowCount;
-    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
+    const entities = getEntities(format)
 
     const models: Model[] = [];
     let modelStart = 0;
@@ -282,13 +324,13 @@ function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
 }
 
 async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
-    const { ihm_model_list } = format.data;
-    const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) };
-
     if (format.data.atom_site._rowCount && !format.data.atom_site.ihm_model_id.isDefined) {
         throw new Error('expected _atom_site.ihm_model_id to be defined')
     }
 
+    const { ihm_model_list } = format.data;
+    const entities = getEntities(format)
+
     const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id);
     // TODO: will coarse IHM records require sorting or will we trust it?
     // ==> Probably implement a sort as as well and store the sourceIndex same as with atomSite
diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts
index 8b5e1b947..b4c65a89d 100644
--- a/src/mol-model/structure/model/types.ts
+++ b/src/mol-model/structure/model/types.ts
@@ -219,6 +219,19 @@ export function getComponentType(compId: string): mmCIF_Schema['chem_comp']['typ
     }
 }
 
+export function getEntityType(compId: string): mmCIF_Schema['entity']['type']['T'] {
+    compId = compId.toUpperCase()
+    if (AminoAcidNames.has(compId) || RnaBaseNames.has(compId) || DnaBaseNames.has(compId)) {
+        return 'polymer'
+    } else if (SaccharideCompIdMap.has(compId)) {
+        return 'polymer' // TODO will be 'branched' in the future
+    } else if (WaterNames.has(compId)) {
+        return 'water'
+    } else {
+        return 'non-polymer'
+    }
+}
+
 export function isPolymer(moleculeType: MoleculeType) {
     return moleculeType === MoleculeType.protein || moleculeType === MoleculeType.DNA || moleculeType === MoleculeType.RNA || moleculeType === MoleculeType.PNA
 }
diff --git a/src/mol-model/structure/util.ts b/src/mol-model/structure/util.ts
index 87e34b6e0..92069c444 100644
--- a/src/mol-model/structure/util.ts
+++ b/src/mol-model/structure/util.ts
@@ -56,13 +56,13 @@ export function residueLabel(model: Model, rI: number) {
 export function elementLabel(model: Model, index: ElementIndex) {
     const { atoms, residues, chains, residueAtomSegments, chainAtomSegments } = model.atomicHierarchy
     const { label_atom_id } = atoms
-    const { auth_seq_id, auth_comp_id } = residues
+    const { auth_seq_id, label_comp_id } = residues
     const { auth_asym_id } = chains
 
     const residueIndex = residueAtomSegments.index[index]
     const chainIndex = chainAtomSegments.index[residueIndex]
 
-    return `[${auth_comp_id.value(residueIndex)}]${auth_seq_id.value(residueIndex)}:${auth_asym_id.value(chainIndex)}.${label_atom_id.value(index)}`
+    return `[${label_comp_id.value(residueIndex)}]${auth_seq_id.value(residueIndex)}:${auth_asym_id.value(chainIndex)}.${label_atom_id.value(index)}`
 }
 
 // const centerPos = Vec3.zero()
diff --git a/src/mol-plugin/util/structure-labels.ts b/src/mol-plugin/util/structure-labels.ts
index 932569da9..1ec7fd2b6 100644
--- a/src/mol-plugin/util/structure-labels.ts
+++ b/src/mol-plugin/util/structure-labels.ts
@@ -78,8 +78,8 @@ function getLabelDataComputed(structure: Structure, level: 'elements' | 'residue
     const l = StructureElement.create();
     const { units } = structure;
 
-    const { auth_atom_id } = StructureProperties.atom;
-    const { auth_seq_id, auth_comp_id } = StructureProperties.residue;
+    const { label_atom_id } = StructureProperties.atom;
+    const { auth_seq_id, label_comp_id } = StructureProperties.residue;
     const { auth_asym_id } = StructureProperties.chain;
     const p = Vec3.zero();
 
@@ -97,7 +97,7 @@ function getLabelDataComputed(structure: Structure, level: 'elements' | 'residue
                 l.element = elements[j];
 
                 pos(l.element, p);
-                data.texts.push(auth_atom_id(l));
+                data.texts.push(label_atom_id(l));
                 data.positions.push(Vec3.clone(p));
                 data.sizes.push(1);
                 data.depths.push(2);
@@ -124,7 +124,7 @@ function getLabelDataComputed(structure: Structure, level: 'elements' | 'residue
 
                 l.element = elements[start];
 
-                data.texts.push(`${auth_comp_id(l)} ${auth_seq_id(l)}:${auth_asym_id(l)}`);
+                data.texts.push(`${label_comp_id(l)} ${auth_seq_id(l)}:${auth_asym_id(l)}`);
                 data.positions.push(Vec3.clone(boundaryHelper.center));
                 data.sizes.push(Math.max(1, boundaryHelper.radius / 5));
                 data.depths.push(boundaryHelper.radius);
diff --git a/src/mol-theme/color/residue-name.ts b/src/mol-theme/color/residue-name.ts
index 999bf2a9b..53f601d04 100644
--- a/src/mol-theme/color/residue-name.ts
+++ b/src/mol-theme/color/residue-name.ts
@@ -75,7 +75,7 @@ export function residueNameColor(residueName: string): Color {
 
 function getAtomicCompId(unit: Unit.Atomic, element: ElementIndex) {
     const { modifiedResidues } = unit.model.properties
-    const compId = unit.model.atomicHierarchy.residues.auth_comp_id.value(unit.residueIndex[element])
+    const compId = unit.model.atomicHierarchy.residues.label_comp_id.value(unit.residueIndex[element])
     const parentId = modifiedResidues.parentId.get(compId)
     return parentId === undefined ? compId : parentId
 }
diff --git a/src/mol-theme/label.ts b/src/mol-theme/label.ts
index 69b67a6b9..baeb5d978 100644
--- a/src/mol-theme/label.ts
+++ b/src/mol-theme/label.ts
@@ -66,9 +66,9 @@ export function elementLabel(location: StructureElement) {
 
     if (Unit.isAtomic(location.unit)) {
         const asym_id = Props.chain.auth_asym_id(location)
-        const seq_id = Props.residue.auth_seq_id(location)
-        const comp_id = Props.residue.auth_comp_id(location)
-        const atom_id = Props.atom.auth_atom_id(location)
+        const seq_id = location.unit.model.atomicHierarchy.residues.auth_seq_id.isDefined ? Props.residue.auth_seq_id(location) : Props.residue.label_seq_id(location)
+        const comp_id = Props.residue.label_comp_id(location)
+        const atom_id = Props.atom.label_atom_id(location)
         const alt_id = Props.atom.label_alt_id(location)
         label = `[${comp_id}]${seq_id}:${asym_id}.${atom_id}${alt_id ? `%${alt_id}` : ''}`
     } else if (Unit.isCoarse(location.unit)) {
-- 
GitLab