From d8adf7c5defce0f01afaf5767fdb328b635b4df3 Mon Sep 17 00:00:00 2001
From: Alexander Rose <alex.rose@rcsb.org>
Date: Wed, 24 Oct 2018 15:42:52 -0700
Subject: [PATCH] get saccharide component from 'SNFG CARB SYMBOL'
 pdbx_chem_comp_identifier.type when available

---
 .../structure/model/formats/mmcif.ts          | 27 ++++++++++++++++++-
 src/mol-model/structure/model/model.ts        |  3 +++
 .../structure/carbohydrates/compute.ts        | 10 ++++---
 .../structure/carbohydrates/constants.ts      | 15 +++++++++--
 4 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts
index 309e19470..e97ee8a41 100644
--- a/src/mol-model/structure/model/formats/mmcif.ts
+++ b/src/mol-model/structure/model/formats/mmcif.ts
@@ -27,6 +27,7 @@ import { ChemicalComponent, ChemicalComponentMap } from '../properties/chemical-
 import { ComponentType, getMoleculeType } from '../types';
 
 import mmCIF_Format = Format.mmCIF
+import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, UnknownSaccharideComponent, SaccharideCompIdMap } from 'mol-model/structure/structure/carbohydrates/constants';
 
 type AtomSite = mmCIF_Database['atom_site']
 
@@ -125,17 +126,41 @@ function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap {
     return map
 }
 
+function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
+    const map = new Map<string, SaccharideComponent>();
+    const { pdbx_chem_comp_identifier } = format.data
+    if (pdbx_chem_comp_identifier._rowCount > 0) {
+        const { type, comp_id, identifier } = pdbx_chem_comp_identifier
+        for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) {
+            if (type.value(i) === 'SNFG CARB SYMBOL') {
+                const snfgName = identifier.value(i)
+                const saccharideComp = SaccharidesSnfgMap.get(snfgName)
+                if (saccharideComp) {
+                    map.set(comp_id.value(i), saccharideComp)
+                } else {
+                    console.warn(`Unknown SNFG name '${snfgName}'`)
+                }
+            }
+        }
+        return map
+    } else {
+        return SaccharideCompIdMap
+    }
+}
+
 export interface FormatData {
     modifiedResidues: Model['properties']['modifiedResidues']
     asymIdSerialMap: Model['properties']['asymIdSerialMap']
     chemicalComponentMap: Model['properties']['chemicalComponentMap']
+    saccharideComponentMap: Model['properties']['saccharideComponentMap']
 }
 
 function getFormatData(format: mmCIF_Format): FormatData {
     return {
         modifiedResidues: getModifiedResidueNameMap(format),
         asymIdSerialMap: getAsymIdSerialMap(format),
-        chemicalComponentMap: getChemicalComponentMap(format)
+        chemicalComponentMap: getChemicalComponentMap(format),
+        saccharideComponentMap: getSaccharideComponentMap(format)
     }
 }
 
diff --git a/src/mol-model/structure/model/model.ts b/src/mol-model/structure/model/model.ts
index bd9810c19..1eaad6e45 100644
--- a/src/mol-model/structure/model/model.ts
+++ b/src/mol-model/structure/model/model.ts
@@ -16,6 +16,7 @@ import { SecondaryStructure } from './properties/seconday-structure';
 
 import from_mmCIF from './formats/mmcif'
 import { ChemicalComponentMap } from './properties/chemical-component';
+import { SaccharideComponentMap } from '../structure/carbohydrates/constants';
 
 /**
  * Interface to the "source data" of the molecule.
@@ -50,6 +51,8 @@ export interface Model extends Readonly<{
         readonly asymIdSerialMap: ReadonlyMap<string, number>
         /** maps residue name to `ChemicalComponent` data */
         readonly chemicalComponentMap: ChemicalComponentMap
+        /** maps residue name to `SaccharideComponent` data */
+        readonly saccharideComponentMap: SaccharideComponentMap
     },
 
     customProperties: CustomProperties,
diff --git a/src/mol-model/structure/structure/carbohydrates/compute.ts b/src/mol-model/structure/structure/carbohydrates/compute.ts
index 1c0a7f7ea..6048fc52c 100644
--- a/src/mol-model/structure/structure/carbohydrates/compute.ts
+++ b/src/mol-model/structure/structure/carbohydrates/compute.ts
@@ -11,13 +11,13 @@ import { IntAdjacencyGraph } from 'mol-math/graph';
 import { Vec3 } from 'mol-math/linear-algebra';
 import PrincipalAxes from 'mol-math/linear-algebra/matrix/principal-axes';
 import { fillSerial } from 'mol-util/array';
-import { ResidueIndex } from '../../model';
+import { ResidueIndex, Model } from '../../model';
 import { ElementSymbol, MoleculeType } from '../../model/types';
 import { getAtomicMoleculeType, getPositionMatrix } from '../../util';
 import StructureElement from '../element';
 import Structure from '../structure';
 import Unit from '../unit';
-import { SaccharideNameMap, UnknownSaccharideComponent } from './constants';
+import { SaccharideCompIdMap, UnknownSaccharideComponent, SaccharideComponent, SaccharidesSnfgMap } from './constants';
 import { CarbohydrateElement, CarbohydrateLink, Carbohydrates, CarbohydrateTerminalLink, PartialCarbohydrateElement } from './data';
 import { UnitRings, UnitRing } from '../unit/rings';
 import { ElementIndex } from '../../model/indexing';
@@ -118,6 +118,10 @@ function filterFusedRings(unitRings: UnitRings, rings: UnitRings.Index[] | undef
     }
 }
 
+function getSaccharideComp(compId: string, model: Model): SaccharideComponent {
+    return model.properties.saccharideComponentMap.get(compId) || UnknownSaccharideComponent
+}
+
 export function computeCarbohydrates(structure: Structure): Carbohydrates {
     const links: CarbohydrateLink[] = []
     const terminalLinks: CarbohydrateTerminalLink[] = []
@@ -162,7 +166,7 @@ export function computeCarbohydrates(structure: Structure): Carbohydrates {
             while (residueIt.hasNext) {
                 const { index: residueIndex } = residueIt.move();
 
-                const saccharideComp = SaccharideNameMap.get(label_comp_id.value(residueIndex)) || UnknownSaccharideComponent
+                const saccharideComp = getSaccharideComp(label_comp_id.value(residueIndex), model)
                 if (saccharideComp === UnknownSaccharideComponent) {
                     if (getAtomicMoleculeType(unit.model, residueIndex) !== MoleculeType.saccharide) continue
                 }
diff --git a/src/mol-model/structure/structure/carbohydrates/constants.ts b/src/mol-model/structure/structure/carbohydrates/constants.ts
index d68c1f81a..5c74ddfaf 100644
--- a/src/mol-model/structure/structure/carbohydrates/constants.ts
+++ b/src/mol-model/structure/structure/carbohydrates/constants.ts
@@ -174,6 +174,15 @@ const Monosaccharides: SaccharideComponent[] = [
     { abbr: 'Psi', name: 'Psicose', color: SaccharideColors.Pink, type: SaccharideType.Assigned },
 ]
 
+export const SaccharidesSnfgMap = (function () {
+    const map = new Map<string, SaccharideComponent>()
+    for (let i = 0, il = Monosaccharides.length; i < il; ++i) {
+        const saccharide = Monosaccharides[i]
+        map.set(saccharide.abbr, saccharide)
+    }
+    return map
+})()
+
 export const MonosaccharidesColorTable: [string, Color][] = [
     ['Glc-family', SaccharideColors.Blue],
     ['Man-family', SaccharideColors.Green],
@@ -287,7 +296,7 @@ const CommonSaccharideNames: { [k: string]: string[] } = {
     Psi: [],
 }
 
-export const SaccharideNameMap = (function () {
+export const SaccharideCompIdMap = (function () {
     const map = new Map<string, SaccharideComponent>()
     for (let i = 0, il = Monosaccharides.length; i < il; ++i) {
         const saccharide = Monosaccharides[i]
@@ -299,4 +308,6 @@ export const SaccharideNameMap = (function () {
         }
     }
     return map
-})()
\ No newline at end of file
+})()
+
+export type SaccharideComponentMap = ReadonlyMap<string, SaccharideComponent>
-- 
GitLab