From 199b27a8eb963bd3e2d31afb48a625344bed2cf0 Mon Sep 17 00:00:00 2001
From: David Sehnal <david.sehnal@gmail.com>
Date: Fri, 8 Jun 2018 15:47:29 +0200
Subject: [PATCH] Support modified residues in Sequence

---
 src/mol-app/ui/visualization/sequence-view.tsx |  3 ++-
 src/mol-model/sequence/constants.ts            |  2 ++
 src/mol-model/sequence/sequence.ts             | 18 ++++++++++++++----
 src/mol-model/structure/model/formats/mmcif.ts |  6 ++++--
 .../structure/model/formats/mmcif/sequence.ts  |  6 +++---
 .../structure/model/properties/sequence.ts     |  4 ++--
 6 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/mol-app/ui/visualization/sequence-view.tsx b/src/mol-app/ui/visualization/sequence-view.tsx
index 4105dea2e..21bef90b8 100644
--- a/src/mol-app/ui/visualization/sequence-view.tsx
+++ b/src/mol-app/ui/visualization/sequence-view.tsx
@@ -42,7 +42,8 @@ class EntitySequence extends React.Component<{ ctx: Context, seq: StructureSeque
 
         const query = createQuery(this.props.seq.entityId, seqId);
         const loci = Selection.toLoci(await query(this.props.structure, SyncRuntimeContext));
-        InteractivityEvents.HighlightElementLoci.dispatch(this.props.ctx, loci);
+        if (loci.elements.length === 0) InteractivityEvents.HighlightElementLoci.dispatch(this.props.ctx, void 0);
+        else InteractivityEvents.HighlightElementLoci.dispatch(this.props.ctx, loci);
     }
 
 
diff --git a/src/mol-model/sequence/constants.ts b/src/mol-model/sequence/constants.ts
index 192d137f5..17796f599 100644
--- a/src/mol-model/sequence/constants.ts
+++ b/src/mol-model/sequence/constants.ts
@@ -7,10 +7,12 @@
 export type AminoAlphabet =
     | 'H' | 'R' | 'K' | 'I' | 'F' | 'L' | 'W' | 'A' | 'M' | 'P' | 'C' | 'N' | 'V' | 'G' | 'S' | 'Q' | 'Y' | 'D' | 'E' | 'T' | 'U' | 'O'
     | 'X' /** = Unknown */
+    | '-' /** = Gap */
 
 export type NuclecicAlphabet =
     | 'A' | 'C' | 'G' | 'T' | 'U'
     | 'X' /** = Unknown */
+    | '-' /** = Gap */
 
 // from NGL
 const ProteinOneLetterCodes: { [name: string]: AminoAlphabet }  = {
diff --git a/src/mol-model/sequence/sequence.ts b/src/mol-model/sequence/sequence.ts
index 7f99ca54c..9401390e5 100644
--- a/src/mol-model/sequence/sequence.ts
+++ b/src/mol-model/sequence/sequence.ts
@@ -29,7 +29,7 @@ namespace Sequence {
     export interface Protein extends Base<Kind.Protein, AminoAlphabet> { }
     export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { }
     export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { }
-    export interface Generic extends Base<Kind.Generic, 'X'> { }
+    export interface Generic extends Base<Kind.Generic, 'X' | '-'> { }
 
     export function create(kind: Kind, sequence: string, offset: number = 0): Sequence {
         return { kind: kind as any, sequence: sequence as any, offset };
@@ -49,11 +49,21 @@ namespace Sequence {
         return { kind: Kind.Generic, code: (v: string) => 'X' };
     }
 
-    export function ofResidueNames(residueName: Column<string>, seqId: Column<number>): Sequence {
+    function modCode(code: (name: string) => string, map: Map<string, string>): (name: string) => string {
+        return n => {
+            const ret = code(n);
+            if (ret !== 'X' || !map.has(n)) return ret;
+            return code(map.get(n)!);
+        }
+    }
+
+    export function ofResidueNames(residueName: Column<string>, seqId: Column<number>, modifiedMap?: Map<string, string>): Sequence {
         if (seqId.rowCount === 0) throw new Error('cannot be empty');
 
         const { kind, code } = determineKind(residueName);
-        return new Impl(kind, residueName, seqId, code) as Sequence;
+
+        if (!modifiedMap || modifiedMap.size === 0) return new Impl(kind, residueName, seqId, code) as Sequence;
+        return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence;
     }
 
     class Impl implements Base<any, any> {
@@ -83,7 +93,7 @@ namespace Sequence {
             const count = maxSeqId - minSeqId + 1;
             const sequenceArray = new Array(maxSeqId + 1);
             for (let i = 0; i < count; i++) {
-                sequenceArray[i] = 'X';
+                sequenceArray[i] = '-';
             }
 
             for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) {
diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts
index 48d8c4754..49bcfc52c 100644
--- a/src/mol-model/structure/model/formats/mmcif.ts
+++ b/src/mol-model/structure/model/formats/mmcif.ts
@@ -159,6 +159,8 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model):
         ? format.data.entry.id.value(0)
         : format.data._name;
 
+    const modifiedResidueNameMap = modResMap(format);
+
     return {
         id: UUID.create(),
         label,
@@ -166,13 +168,13 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model):
         modelNum: format.data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)),
         entities,
         atomicHierarchy,
-        sequence: getSequence(format.data, entities, atomicHierarchy),
+        sequence: getSequence(format.data, entities, atomicHierarchy, modifiedResidueNameMap),
         atomicConformation: getConformation(format, bounds),
         coarseHierarchy: coarse.hierarchy,
         coarseConformation: coarse.conformation,
         properties: {
             secondaryStructure: getSecondaryStructureMmCif(format.data, atomicHierarchy),
-            modifiedResidueNameMap: modResMap(format)
+            modifiedResidueNameMap
         },
         symmetry: getSymmetry(format)
     };
diff --git a/src/mol-model/structure/model/formats/mmcif/sequence.ts b/src/mol-model/structure/model/formats/mmcif/sequence.ts
index 67e617ef3..a279b6122 100644
--- a/src/mol-model/structure/model/formats/mmcif/sequence.ts
+++ b/src/mol-model/structure/model/formats/mmcif/sequence.ts
@@ -21,8 +21,8 @@ import { Sequence } from '../../../../sequence';
 // corresponding ATOM_SITE entries should reflect this
 // heterogeneity.
 
-export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): StructureSequence {
-    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy);
+export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy, modResMap: Map<string, string>): StructureSequence {
+    if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy, modResMap);
 
     const { entity_id, num, mon_id } = cif.entity_poly_seq;
 
@@ -45,7 +45,7 @@ export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHie
             entityId: id,
             compId: _compId,
             num: _num,
-            sequence: Sequence.ofResidueNames(_compId, _num)
+            sequence: Sequence.ofResidueNames(_compId, _num, modResMap)
         };
 
         sequences.push(byEntityKey[entityKey]);
diff --git a/src/mol-model/structure/model/properties/sequence.ts b/src/mol-model/structure/model/properties/sequence.ts
index 3cd6e601a..7725eb561 100644
--- a/src/mol-model/structure/model/properties/sequence.ts
+++ b/src/mol-model/structure/model/properties/sequence.ts
@@ -23,7 +23,7 @@ namespace StructureSequence {
         readonly sequence: Sequence
     }
 
-    export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): StructureSequence {
+    export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy, modResMap?: Map<string, string>): StructureSequence {
         const { label_comp_id, label_seq_id } = hierarchy.residues
         const { chainSegments, residueSegments } = hierarchy
 
@@ -52,7 +52,7 @@ namespace StructureSequence {
                 entityId: entities.data.id.value(entityKey),
                 compId,
                 num,
-                sequence: Sequence.ofResidueNames(compId, num)
+                sequence: Sequence.ofResidueNames(compId, num, modResMap)
             };
 
             sequences.push(byEntityKey[entityKey]);
-- 
GitLab