diff --git a/src/apps/structure-info/model.ts b/src/apps/structure-info/model.ts index c5888bd12fa548b52ceb8bae489bf00dc3a9c068..77e0263c238a9ae58d11d1a14fa8016ecb1b69a2 100644 --- a/src/apps/structure-info/model.ts +++ b/src/apps/structure-info/model.ts @@ -15,6 +15,7 @@ import { OrderedSet } from '../../mol-data/int'; import { openCif, downloadCif } from './helpers'; import { Vec3 } from '../../mol-math/linear-algebra'; import { trajectoryFromMmCIF } from '../../mol-model-formats/structure/mmcif'; +import { Sequence } from '../../mol-model/sequence'; async function downloadFromPdb(pdb: string) { @@ -110,9 +111,10 @@ export function printSequence(model: Model) { console.log('\nSequence\n============='); const { byEntityKey } = model.sequence; for (const key of Object.keys(byEntityKey)) { - const seq = byEntityKey[+key]; - console.log(`${seq.entityId} (${seq.sequence.kind} ${seq.num.value(0)} (offset ${seq.sequence.offset}), ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`); - console.log(`${seq.sequence.sequence}`); + const { sequence, entityId } = byEntityKey[+key]; + const { seqId, compId } = sequence + console.log(`${entityId} (${sequence.kind} ${seqId.value(0)} (offset ${sequence.offset}), ${seqId.value(seqId.rowCount - 1)}) (${compId.value(0)}, ${compId.value(compId.rowCount - 1)})`); + console.log(`${Sequence.getSequenceString(sequence)}`); } console.log(); } @@ -159,14 +161,14 @@ export function printUnits(structure: Structure) { console.log(`Coarse unit ${unit.id} ${unit.conformation.operator.name} (${Unit.isSpheres(l.unit) ? 'spheres' : 'gaussians'}): ${size} elements.`); const props = StructureProperties.coarse; - const seq = l.unit.model.sequence; + const modelSeq = l.unit.model.sequence; for (let j = 0, _j = Math.min(size, 3); j < _j; j++) { l.element = OrderedSet.getAt(elements, j); const residues: string[] = []; const start = props.seq_id_begin(l), end = props.seq_id_end(l); - const compId = seq.byEntityKey[props.entityKey(l)].compId.value; + const compId = modelSeq.byEntityKey[props.entityKey(l)].sequence.compId.value; for (let e = start; e <= end; e++) residues.push(compId(e)); console.log(`${props.asym_id(l)}:${start}-${end} (${residues.join('-')}) ${props.asym_id(l)} [${props.x(l).toFixed(2)}, ${props.y(l).toFixed(2)}, ${props.z(l).toFixed(2)}]`); } diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts index 12e6b99cf3552f3d7e90efb34aaed668f3616964..e9b3d713b50814f058574a1f7b1b335d2463bd50 100644 --- a/src/mol-model-formats/structure/mmcif/parser.ts +++ b/src/mol-model-formats/structure/mmcif/parser.ts @@ -225,7 +225,7 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, sourceIn modelNum, entities, symmetry: getSymmetry(format), - sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + sequence: getSequence(format.data, entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId), atomicHierarchy: atomic.hierarchy, atomicConformation: atomic.conformation, coarseHierarchy: coarse.hierarchy, @@ -262,7 +262,7 @@ function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatD modelNum: data.model_id, entities: data.entities, symmetry: getSymmetry(format), - sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + sequence: getSequence(format.data, data.entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId), atomicHierarchy: atomic.hierarchy, atomicConformation: atomic.conformation, coarseHierarchy: coarse.hierarchy, diff --git a/src/mol-model-formats/structure/mmcif/sequence.ts b/src/mol-model-formats/structure/mmcif/sequence.ts index dd2a48a36f90a37636c0844de79aed721b4a6189..2b455a2a74c9103ed984be005c0b93a6886c41ec 100644 --- a/src/mol-model-formats/structure/mmcif/sequence.ts +++ b/src/mol-model-formats/structure/mmcif/sequence.ts @@ -1,7 +1,8 @@ /** - * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> */ import { mmCIF_Database as mmCIF } from '../../../mol-io/reader/cif/schema/mmcif' @@ -10,19 +11,12 @@ import { Column } from '../../../mol-data/db'; import { AtomicHierarchy } from '../../../mol-model/structure/model/properties/atomic'; import { Entities } from '../../../mol-model/structure/model/properties/common'; import { Sequence } from '../../../mol-model/sequence'; +import { CoarseHierarchy } from '../../../mol-model/structure/model/properties/coarse'; -// TODO how to handle microheterogeneity -// see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html -// -// Data items in the ENTITY_POLY_SEQ category specify the sequence -// of monomers in a polymer. Allowance is made for the possibility -// of microheterogeneity in a sample by allowing a given sequence -// number to be correlated with more than one monomer ID. The -// corresponding ATOM_SITE entries should reflect this -// heterogeneity. - -export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence { - if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy, modResMap); +export function getSequence(cif: mmCIF, entities: Entities, atomicHierarchy: AtomicHierarchy, coarseHierarchy: CoarseHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence { + if (!cif.entity_poly_seq._rowCount) { + return StructureSequence.fromHierarchy(entities, atomicHierarchy, coarseHierarchy, modResMap); + } const { entity_id, num, mon_id } = cif.entity_poly_seq; @@ -37,15 +31,13 @@ export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHie i++; const id = entity_id.value(start); - const _compId = Column.window(mon_id, start, i); - const _num = Column.window(num, start, i); + const compId = Column.window(mon_id, start, i); + const seqId = Column.window(num, start, i); const entityKey = entities.getEntityIndex(id); byEntityKey[entityKey] = { entityId: id, - compId: _compId, - num: _num, - sequence: Sequence.ofResidueNames(_compId, _num, modResMap) + sequence: Sequence.ofResidueNames(compId, seqId, modResMap) }; sequences.push(byEntityKey[entityKey]); diff --git a/src/mol-model/sequence/sequence.ts b/src/mol-model/sequence/sequence.ts index 20776def8b5b1e52101e21bce263a95f295ffbd6..72a8c13e62c5d3303ba524842bb39cd224b47739 100644 --- a/src/mol-model/sequence/sequence.ts +++ b/src/mol-model/sequence/sequence.ts @@ -23,9 +23,15 @@ namespace Sequence { export interface Base<K extends Kind, Alphabet extends string> { readonly kind: K, + readonly length: number, readonly offset: number, - readonly sequence: ArrayLike<Alphabet> - readonly labels: ArrayLike<string> + + readonly code: Column<Alphabet> + readonly label: Column<string> + + readonly seqId: Column<number> + readonly compId: Column<string> + /** maps seqId to list of compIds */ readonly microHet: ReadonlyMap<number, string[]> } @@ -35,12 +41,14 @@ namespace Sequence { export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { } export interface Generic extends Base<Kind.Generic, 'X' | '-'> { } - export function create<K extends Kind, Alphabet extends string>(kind: K, sequence: Alphabet[], labels: string[], microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> { - return { kind: kind, sequence: sequence, labels, microHet, offset }; + export function create<K extends Kind, Alphabet extends string>(kind: K, code: Column<Alphabet>, label: Column<string>, seqId: Column<number>, compId: Column<string>, microHet: Map<number, string[]>, offset: number = 0): Base<K, Alphabet> { + const length = code.rowCount + return { kind, code, label, seqId, compId, microHet, offset, length }; } export function getSequenceString(seq: Sequence) { - return seq.sequence as string; + const array = seq.code.toArray() + return (array instanceof Array ? array : Array.from(array)).join('') } function determineKind(names: Column<string>) { @@ -61,39 +69,46 @@ namespace Sequence { } } - export function ofResidueNames(residueName: Column<string>, seqId: Column<number>, modifiedMap?: ReadonlyMap<string, string>): Sequence { + export function ofResidueNames(compId: Column<string>, seqId: Column<number>, modifiedMap?: ReadonlyMap<string, string>): Sequence { if (seqId.rowCount === 0) throw new Error('cannot be empty'); - const { kind, code } = determineKind(residueName); + const { kind, code } = determineKind(compId); if (!modifiedMap || modifiedMap.size === 0) { - return new Impl(kind, residueName, seqId, code) as Sequence; + return new ResidueNamesImpl(kind, compId, seqId, code) as Sequence; } - return new Impl(kind, residueName, seqId, modCode(code, modifiedMap)) as Sequence; + return new ResidueNamesImpl(kind, compId, seqId, modCode(code, modifiedMap)) as Sequence; } - class Impl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> { + class ResidueNamesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> { private _offset = 0; - private _seq: ArrayLike<Alphabet> | undefined = void 0; - private _labels: ArrayLike<string> | undefined = void 0; + private _length = 0; private _microHet: ReadonlyMap<number, string[]> | undefined = void 0; + private _code: Column<Alphabet> | undefined = undefined + private _label: Column<string> | undefined = undefined - get offset() { - if (this._seq !== void 0) return this._offset; + get code(): Column<Alphabet> { + if (this._code !== void 0) return this._code; this.create(); - return this._offset; + return this._code!; } - get sequence(): ArrayLike<Alphabet> { - if (this._seq !== void 0) return this._seq; + get label(): Column<string> { + if (this._label !== void 0) return this._label; this.create(); - return this._seq!; + return this._label!; } - get labels(): ArrayLike<string> { - if (this._labels !== void 0) return this._labels; + get offset() { + if (this._code !== void 0) return this._offset; this.create(); - return this._labels!; + return this._offset; + } + + get length() { + if (this._code !== void 0) return this._length; + this.create(); + return this._length; } get microHet(): ReadonlyMap<number, string[]> { @@ -126,8 +141,8 @@ namespace Sequence { for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) { const seqId = this.seqId.value(i) const idx = seqId - minSeqId; - const name = this.residueName.value(i); - const code = this.code(name); + const name = this.compId.value(i); + const code = this.getCode(name); // in case of MICROHETEROGENEITY `sequenceArray[idx]` may already be set if (!sequenceArray[idx] || sequenceArray[idx] === '-') { sequenceArray[idx] = code; @@ -141,14 +156,62 @@ namespace Sequence { if (compIds[i].length > 1) microHet.set(i, compIds[i]) } - this._seq = sequenceArray.join('') as unknown as ArrayLike<Alphabet>; - this._labels = labels.map(l => l.length > 1 ? `(${l.join('|')})` : l.join('')); + this._code = Column.ofStringArray(sequenceArray) as Column<Alphabet> + this._label = Column.ofLambda({ + value: i => { + const l = labels[i] + return l.length > 1 ? `(${l.join('|')})` : l.join('') + }, + rowCount: labels.length, + schema: Column.Schema.str + }) this._microHet = microHet this._offset = minSeqId - 1; + this._length = count + } + + constructor(public kind: K, public compId: Column<string>, public seqId: Column<number>, private getCode: (name: string) => string) { + } + } + + export function ofSequenceRanges(seqIdBegin: Column<number>, seqIdEnd: Column<number>): Sequence { + const kind = Kind.Generic + + return new SequenceRangesImpl(kind, seqIdBegin, seqIdEnd) as Sequence; + } - constructor(public kind: K, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) { + class SequenceRangesImpl<K extends Kind, Alphabet extends string> implements Base<K, Alphabet> { + public offset: number + public length: number + public code: Column<Alphabet> + public label: Column<string> + public seqId: Column<number> + public compId: Column<string> + public microHet: ReadonlyMap<number, string[]> + + constructor(public kind: K, private seqIdStart: Column<number>, private seqIdEnd: Column<number>) { + let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER; + for (let i = 0, _i = this.seqIdStart.rowCount; i < _i; i++) { + const idStart = this.seqIdStart.value(i); + const idEnd = this.seqIdEnd.value(i); + if (idStart < minSeqId) minSeqId = idStart; + if (maxSeqId < idEnd) maxSeqId = idEnd; + } + + const count = maxSeqId - minSeqId + 1; + this.code = Column.ofConst('X', count, Column.Schema.str) as Column<Alphabet> + this.label = Column.ofConst('', count, Column.Schema.str) + this.seqId = Column.ofLambda({ + value: row => row + minSeqId + 1, + rowCount: count, + schema: Column.Schema.int + }) + this.compId = Column.ofConst('', count, Column.Schema.str) + + this.offset = minSeqId - 1; + this.length = count } } } diff --git a/src/mol-model/structure/model/properties/coarse/hierarchy.ts b/src/mol-model/structure/model/properties/coarse/hierarchy.ts index db20568e808d4e83805b15805cdff095363f40f6..9b492981242a325cdf661d128e0e9a322f1c157e 100644 --- a/src/mol-model/structure/model/properties/coarse/hierarchy.ts +++ b/src/mol-model/structure/model/properties/coarse/hierarchy.ts @@ -11,14 +11,17 @@ import { ElementIndex, ChainIndex, EntityIndex } from '../../indexing'; import SortedRanges from '../../../../../mol-data/int/sorted-ranges'; export interface CoarsedElementKeys { - // assign a key to each element + /** Assign a key to each element */ chainKey: ArrayLike<ChainIndex>, - // assign a key to each element, index to the Model.entities.data table + /** Assign a key to each element, index to the Model.entities.data table */ entityKey: ArrayLike<EntityIndex>, - /** find index of the residue/feature element where seq_id is included */ + /** Find index of the residue/feature element where seq_id is included */ findSequenceKey(entityId: string, asym_id: string, seq_id: number): ElementIndex findChainKey(entityId: string, asym_id: string): ChainIndex + + /** Returns index or -1 if not present. */ + getEntityFromChain(cI: ChainIndex): EntityIndex } export interface CoarseElementData { diff --git a/src/mol-model/structure/model/properties/sequence.ts b/src/mol-model/structure/model/properties/sequence.ts index a9cd6715eb3fb1718821fabca0e7e9da6fcef96f..547be817970fac1fb813c2791e39432950483f32 100644 --- a/src/mol-model/structure/model/properties/sequence.ts +++ b/src/mol-model/structure/model/properties/sequence.ts @@ -1,7 +1,8 @@ /** - * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> */ import { Column } from '../../../../mol-data/db' @@ -9,6 +10,8 @@ import { AtomicHierarchy } from './atomic/hierarchy'; import { Entities } from './common'; import { Sequence } from '../../../sequence'; import { ChainIndex } from '../indexing'; +import { CoarseHierarchy } from './coarse'; +import { CoarseElements } from './coarse/hierarchy'; interface StructureSequence { readonly sequences: ReadonlyArray<StructureSequence.Entity>, @@ -18,19 +21,39 @@ interface StructureSequence { namespace StructureSequence { export interface Entity { readonly entityId: string, - readonly num: Column<number>, - /** Corresponds to _entity_poly_seq.mon_id */ - readonly compId: Column<string>, readonly sequence: Sequence } + function merge(...entitySeqs: StructureSequence[]): StructureSequence { + const sequences: StructureSequence.Entity[] = [] + const byEntityKey: { [key: number]: StructureSequence.Entity } = {} + + for (let i = 0, il = entitySeqs.length; i < il; ++i) { + sequences.push(...entitySeqs[i].sequences) + Object.assign(byEntityKey, entitySeqs[i].byEntityKey) + } + return { sequences, byEntityKey } + } + + export function fromHierarchy(entities: Entities, atomicHierarchy: AtomicHierarchy, coarseHierarchy: CoarseHierarchy, modResMap?: ReadonlyMap<string, string>): StructureSequence { + const atomic = fromAtomicHierarchy(entities, atomicHierarchy, modResMap) + const coarse = fromCoarseHierarchy(entities, coarseHierarchy) + return merge(atomic, coarse) + } + export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy, modResMap?: ReadonlyMap<string, string>): StructureSequence { const { label_comp_id, label_seq_id } = hierarchy.residues const { chainAtomSegments, residueAtomSegments } = hierarchy + const { count, offsets } = chainAtomSegments const byEntityKey: StructureSequence['byEntityKey'] = { }; const sequences: StructureSequence.Entity[] = []; + // check if chain segments are empty + if (count === 1 && offsets[0] === 0 && offsets[1] === 0) { + return { byEntityKey, sequences }; + } + for (let cI = 0 as ChainIndex, _cI = hierarchy.chains._rowCount; cI < _cI; cI++) { const entityKey = hierarchy.index.getEntityFromChain(cI); // Only for polymers, trying to mirror _entity_poly_seq @@ -43,16 +66,14 @@ namespace StructureSequence { } cI--; - const rStart = residueAtomSegments.index[chainAtomSegments.offsets[start]]; - const rEnd = residueAtomSegments.index[chainAtomSegments.offsets[cI + 1]]; + const rStart = residueAtomSegments.index[offsets[start]]; + const rEnd = residueAtomSegments.index[offsets[cI + 1]]; const compId = Column.window(label_comp_id, rStart, rEnd); const num = Column.window(label_seq_id, rStart, rEnd); byEntityKey[entityKey] = { entityId: entities.data.id.value(entityKey), - compId, - num, sequence: Sequence.ofResidueNames(compId, num, modResMap) }; @@ -61,6 +82,52 @@ namespace StructureSequence { return { byEntityKey, sequences }; } + + export function fromCoarseHierarchy(entities: Entities, hierarchy: CoarseHierarchy): StructureSequence { + const spheres = fromCoarseElements(entities, hierarchy.spheres) + const gaussians = fromCoarseElements(entities, hierarchy.gaussians) + return merge(spheres, gaussians) + } + + export function fromCoarseElements(entities: Entities, elements: CoarseElements): StructureSequence { + const { chainElementSegments, seq_id_begin, seq_id_end } = elements + const { count, offsets } = chainElementSegments + + const byEntityKey: StructureSequence['byEntityKey'] = { }; + const sequences: StructureSequence.Entity[] = []; + + // check if chain segments are empty + if (count === 1 && offsets[0] === 0 && offsets[1] === 0) { + return { byEntityKey, sequences }; + } + + for (let cI = 0 as ChainIndex, _cI = count; cI < _cI; cI++) { + const eK = elements.getEntityFromChain(cI); + if (byEntityKey[eK] !== void 0) continue; + + let start = cI; + cI++; + while (cI < _cI && eK === elements.getEntityFromChain(cI)) { + cI++; + } + cI--; + + const eStart = offsets[start]; + const eEnd = offsets[cI + 1]; + + const seqIdBegin = Column.window(seq_id_begin, eStart, eEnd); + const seqIdEnd = Column.window(seq_id_end, eStart, eEnd); + + byEntityKey[eK] = { + entityId: entities.data.id.value(eK), + sequence: Sequence.ofSequenceRanges(seqIdBegin, seqIdEnd) + }; + + sequences.push(byEntityKey[eK]); + } + + return { byEntityKey, sequences }; + } } export default StructureSequence \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/utils/coarse-keys.ts b/src/mol-model/structure/model/properties/utils/coarse-keys.ts index 26d91007c660bf353e9652e6851d326ce1dee7af..888932dff1987a24ce737c2a9090489e03b29ef0 100644 --- a/src/mol-model/structure/model/properties/utils/coarse-keys.ts +++ b/src/mol-model/structure/model/properties/utils/coarse-keys.ts @@ -8,6 +8,8 @@ import { Entities } from '../common'; import { CoarseElementData, CoarsedElementKeys } from '../coarse'; import { ChainIndex, ElementIndex, EntityIndex } from '../../indexing'; +import SortedRanges from '../../../../../mol-data/int/sorted-ranges'; +import { OrderedSet } from '../../../../../mol-data/int'; function getElementKey(map: Map<string, number>, key: string, counter: { index: number }) { if (map.has(key)) return map.get(key)!; @@ -23,7 +25,7 @@ function getElementSubstructureKeyMap(map: Map<number, Map<string, number>>, key return ret; } -function createLookUp(entities: Entities, chain: Map<number, Map<string, number>>, seq: Map<number, Map<number, number>>) { +function createLookUp(entities: Entities, chain: Map<number, Map<string, number>>, seq: Map<number, SeqMap>) { const getEntKey = entities.getEntityIndex; const findChainKey: CoarsedElementKeys['findChainKey'] = (e, c) => { const eKey = getEntKey(e); @@ -32,7 +34,6 @@ function createLookUp(entities: Entities, chain: Map<number, Map<string, number> if (!cm.has(c)) return -1 as ChainIndex; return cm.get(c)! as ChainIndex; } - // TODO consider implementing as binary search const findSequenceKey: CoarsedElementKeys['findSequenceKey'] = (e, c, s) => { const eKey = getEntKey(e); if (eKey < 0) return -1 as ElementIndex; @@ -41,8 +42,9 @@ function createLookUp(entities: Entities, chain: Map<number, Map<string, number> const cKey = cm.get(c) if (cKey === undefined) return -1 as ElementIndex const sm = seq.get(cKey)! - if (!sm.has(s)) return -1 as ElementIndex; - return sm.get(s)! as ElementIndex + const { elementIndices, seqRanges } = sm + const idx = SortedRanges.firstIntersectionIndex(seqRanges, OrderedSet.ofSingleton(s)) + return (idx !== -1 ? elementIndices[idx] : -1) as ElementIndex } return { findChainKey, findSequenceKey }; } @@ -51,39 +53,53 @@ function missingEntity(k: string) { throw new Error(`Missing entity entry for entity id '${k}'.`); } +type SeqMap = { elementIndices: number[], seqRanges: SortedRanges } + export function getCoarseKeys(data: CoarseElementData, entities: Entities): CoarsedElementKeys { const { entity_id, asym_id, seq_id_begin, seq_id_end, count, chainElementSegments } = data; - const seqMaps = new Map<number, Map<number, number>>(); + const seqMaps = new Map<number, SeqMap>(); const chainMaps = new Map<number, Map<string, number>>(), chainCounter = { index: 0 }; const chainKey = new Int32Array(count) as any as ChainIndex[]; const entityKey = new Int32Array(count) as any as EntityIndex[]; + const chainToEntity = new Int32Array(chainElementSegments.count) as any as EntityIndex[]; + for (let i = 0; i < count; i++) { entityKey[i] = entities.getEntityIndex(entity_id.value(i)); if (entityKey[i] < 0) missingEntity(entity_id.value(i)); } for (let cI = 0; cI < chainElementSegments.count; cI++) { - const start = chainElementSegments.offsets[cI], end = chainElementSegments.offsets[cI + 1]; - const map = getElementSubstructureKeyMap(chainMaps, entityKey[start]); + const start = chainElementSegments.offsets[cI] + const end = chainElementSegments.offsets[cI + 1]; + const eK = entityKey[start] + + chainToEntity[cI] = eK + + const map = getElementSubstructureKeyMap(chainMaps, eK); const key = getElementKey(map, asym_id.value(start), chainCounter) as ChainIndex; for (let i = start; i < end; i++) chainKey[i] = key; // create seq_id map for the ranges defined by seq_id_begin and seq_id_end - const seqMap: Map<number, number> = new Map() - seqMaps.set(key, seqMap) + const elementIndices: number[] = [] + const seqRanges: number[] = [] for (let i = start; i < end; i++) { const seqStart = seq_id_begin.value(i) const seqEnd = seq_id_end.value(i) - for (let j = seqStart; j <= seqEnd; j++) { - seqMap.set(j, i) - } + elementIndices.push(i) + seqRanges.push(seqStart, seqEnd) } + const seqMap = { elementIndices, seqRanges: SortedRanges.ofSortedRanges(seqRanges) } + seqMaps.set(key, seqMap) } const { findChainKey, findSequenceKey } = createLookUp(entities, chainMaps, seqMaps); - return { chainKey, entityKey, findSequenceKey, findChainKey }; + const getEntityFromChain: CoarsedElementKeys['getEntityFromChain'] = c => { + return chainToEntity[c] + } + + return { chainKey, entityKey, findSequenceKey, findChainKey, getEntityFromChain }; } \ No newline at end of file diff --git a/src/mol-model/structure/util.ts b/src/mol-model/structure/util.ts index f43a6cc6d9ee6c37bf4b33f9cfe718954c2eea0f..f1622f24ade5d773f3141deaba08afd3f3450e07 100644 --- a/src/mol-model/structure/util.ts +++ b/src/mol-model/structure/util.ts @@ -12,7 +12,7 @@ import Matrix from '../../mol-math/linear-algebra/matrix/matrix'; export function getCoarseBegCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIndex) { const entityKey = unit.coarseElements.entityKey[element] - const seq = unit.model.sequence.byEntityKey[entityKey] + const seq = unit.model.sequence.byEntityKey[entityKey].sequence const seq_id_begin = unit.coarseElements.seq_id_begin.value(element) return seq.compId.value(seq_id_begin - 1) // 1-indexed } diff --git a/src/mol-plugin/ui/sequence.tsx b/src/mol-plugin/ui/sequence.tsx index 2a6ad283df367ce25a2d6760c3db0647379e8114..f356805e6be8cbbe092f1e6a74f7ba06e445e9f0 100644 --- a/src/mol-plugin/ui/sequence.tsx +++ b/src/mol-plugin/ui/sequence.tsx @@ -19,6 +19,8 @@ import { ParamDefinition as PD } from '../../mol-util/param-definition'; import { HeteroSequenceWrapper } from './sequence/hetero'; import { State, StateSelection } from '../../mol-state'; +const MaxDisplaySequenceLength = 10000 + function opKey(l: StructureElement.Location) { const ids = SP.unit.pdbx_struct_oper_list_ids(l) const ncs = SP.unit.struct_ncs_oper_id(l) @@ -32,7 +34,7 @@ function splitModelEntityId(modelEntityId: string) { return [ parseInt(modelIdx), entityId ] } -function getSequenceWrapper(state: SequenceViewState, structureSelection: StructureElementSelectionManager): SequenceWrapper.Any | undefined { +function getSequenceWrapper(state: SequenceViewState, structureSelection: StructureElementSelectionManager): SequenceWrapper.Any | string { const { structure, modelEntityId, invariantUnitId, operatorKey } = state const l = StructureElement.Location.create() const [ modelIdx, entityId ] = splitModelEntityId(modelEntityId) @@ -43,11 +45,21 @@ function getSequenceWrapper(state: SequenceViewState, structureSelection: Struct if (unit.invariantId !== invariantUnitId) continue if (opKey(l) !== operatorKey) continue + if (unit.polymerElements.length) { + const l = StructureElement.Location.create(unit, unit.elements[0]) + const entitySeq = unit.model.sequence.byEntityKey[SP.entity.key(l)] + // check if entity sequence is available + if (!entitySeq) return 'No sequence available' + // check if sequence is too long + if (entitySeq.sequence.length > MaxDisplaySequenceLength) return 'Sequence too long' + } + const Wrapper = unit.polymerElements.length ? PolymerSequenceWrapper : HeteroSequenceWrapper const sw = new Wrapper({ structure, unit }) sw.markResidue(structureSelection.get(structure), MarkerAction.Select) return sw } + return 'No sequence available' } function getModelEntityOptions(structure: Structure) { @@ -266,12 +278,9 @@ export class SequenceView extends PluginUIComponent<{ }, SequenceViewState> { <ParameterControls params={this.params} values={this.values} onChange={this.setParamProps} /> </div> - {sequenceWrapper !== undefined - ? (sequenceWrapper.length <= 10000 - ? <Sequence sequenceWrapper={sequenceWrapper} /> - : <div className='msp-sequence-wrapper'>Sequence too long</div> - ) - : <div className='msp-sequence-wrapper'>No sequence available</div>} + {typeof sequenceWrapper === 'string' + ? <div className='msp-sequence-wrapper'>{sequenceWrapper}</div> + : <Sequence sequenceWrapper={sequenceWrapper} />} </div>; } } \ No newline at end of file diff --git a/src/mol-plugin/ui/sequence/polymer.ts b/src/mol-plugin/ui/sequence/polymer.ts index 63fcd6e6f3cf1d99453953872a9924bdddcf3839..52376995ebad4939458a040ed66f00a809150b43 100644 --- a/src/mol-plugin/ui/sequence/polymer.ts +++ b/src/mol-plugin/ui/sequence/polymer.ts @@ -20,12 +20,12 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> { private readonly modelNum: number private readonly asymId: string - seqId(seqIdx: number) { - return this.sequence.offset + seqIdx + 1 + private seqId(seqIdx: number) { + return this.sequence.seqId.value(seqIdx) } residueLabel(seqIdx: number) { - return this.sequence.labels[seqIdx] + return this.sequence.label.value(seqIdx) } residueColor(seqIdx: number) { return this.missing.has(this.modelNum, this.asymId, this.seqId(seqIdx)) @@ -63,13 +63,14 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> { constructor(data: StructureUnit) { const l = StructureElement.Location.create(data.unit, data.unit.elements[0]) - const sequence = data.unit.model.sequence.byEntityKey[SP.entity.key(l)].sequence - const length = sequence.sequence.length + const entitySeq = data.unit.model.sequence.byEntityKey[SP.entity.key(l)] + + const length = entitySeq.sequence.length const markerArray = new Uint8Array(length) super(data, markerArray, length) - this.sequence = sequence + this.sequence = entitySeq.sequence this.missing = data.unit.model.properties.missingResidues this.modelNum = data.unit.model.modelNum diff --git a/src/mol-theme/color/entity-source.ts b/src/mol-theme/color/entity-source.ts index 92b4d60ee6aef963eaffbe3e8c836268f58cfdf2..5a553b87cd8ec79edeafdb1dee5ca44e97b3ea15 100644 --- a/src/mol-theme/color/entity-source.ts +++ b/src/mol-theme/color/entity-source.ts @@ -58,7 +58,7 @@ function addSrc(seqToSrcByModelEntity: Map<string, Int16Array>, srcKeySerialMap: if (!seqToSrcByModelEntity.has(mK)) { const entityIndex = model.entities.getEntityIndex(entityId) const seq = model.sequence.sequences[entityIndex].sequence - seqToSrc = new Int16Array(seq.sequence.length) + seqToSrc = new Int16Array(seq.length) seqToSrcByModelEntity.set(mK, seqToSrc) } else { seqToSrc = seqToSrcByModelEntity.get(mK)! diff --git a/src/mol-theme/color/hydrophobicity.ts b/src/mol-theme/color/hydrophobicity.ts index f0db44b7f3f749f6eb23e6266a220ece15251aaf..934f4300872393992b0bf3bc4b853961d3d0b820 100644 --- a/src/mol-theme/color/hydrophobicity.ts +++ b/src/mol-theme/color/hydrophobicity.ts @@ -44,7 +44,7 @@ function getCoarseCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIn if (seqIdBegin === seqIdEnd) { const { modifiedResidues } = unit.model.properties const entityKey = unit.coarseElements.entityKey[element] - const seq = unit.model.sequence.byEntityKey[entityKey] + const seq = unit.model.sequence.byEntityKey[entityKey].sequence let compId = seq.compId.value(seqIdBegin - 1) // 1-indexed const parentId = modifiedResidues.parentId.get(compId) return parentId === undefined ? compId : parentId diff --git a/src/mol-theme/color/residue-name.ts b/src/mol-theme/color/residue-name.ts index c2e98072409845f197ef2f377f0d079e0c67bd38..5ff36bf56c40942d9f2386ed78340276293c8dd1 100644 --- a/src/mol-theme/color/residue-name.ts +++ b/src/mol-theme/color/residue-name.ts @@ -86,7 +86,7 @@ function getCoarseCompId(unit: Unit.Spheres | Unit.Gaussians, element: ElementIn if (seqIdBegin === seqIdEnd) { const { modifiedResidues } = unit.model.properties const entityKey = unit.coarseElements.entityKey[element] - const seq = unit.model.sequence.byEntityKey[entityKey] + const seq = unit.model.sequence.byEntityKey[entityKey].sequence let compId = seq.compId.value(seqIdBegin - 1) // 1-indexed const parentId = modifiedResidues.parentId.get(compId) return parentId === undefined ? compId : parentId diff --git a/src/mol-theme/color/sequence-id.ts b/src/mol-theme/color/sequence-id.ts index 35d7f31c944192fe9744dbe3720f4747e2bd50e9..b71e97d3e8db4b0d01a6d52b540b699200ae0962 100644 --- a/src/mol-theme/color/sequence-id.ts +++ b/src/mol-theme/color/sequence-id.ts @@ -61,7 +61,7 @@ function getSequenceLength(unit: Unit, element: ElementIndex) { if (entityId === '') return 0 const entityIndex = model.entities.getEntityIndex(entityId) if (entityIndex === -1) return 0 - return model.sequence.byEntityKey[entityIndex].sequence.sequence.length + return model.sequence.byEntityKey[entityIndex].sequence.length } export function SequenceIdColorTheme(ctx: ThemeDataContext, props: PD.Values<SequenceIdColorThemeParams>): ColorTheme<SequenceIdColorThemeParams> { diff --git a/src/mol-theme/label.ts b/src/mol-theme/label.ts index 4771f77da600da3094fdbb752268901f16014df5..839cda9729c27b6ce7e4f35b80925270090244d4 100644 --- a/src/mol-theme/label.ts +++ b/src/mol-theme/label.ts @@ -129,16 +129,26 @@ export function atomicElementLabel(location: StructureElement.Location<Unit.Atom } export function coarseElementLabel(location: StructureElement.Location<Unit.Spheres | Unit.Gaussians>, granularity: LabelGranularity) { - // TODO handle granularity const asym_id = Props.coarse.asym_id(location) const seq_id_begin = Props.coarse.seq_id_begin(location) const seq_id_end = Props.coarse.seq_id_end(location) - if (seq_id_begin === seq_id_end) { - const entityIndex = Props.coarse.entityKey(location) - const seq = location.unit.model.sequence.byEntityKey[entityIndex] - const comp_id = seq.compId.value(seq_id_begin - 1) // 1-indexed - return `${comp_id} ${seq_id_begin}:${asym_id}` - } else { - return `${seq_id_begin}-${seq_id_end}:${asym_id}` + + const label: string[] = [] + + switch (granularity) { + case 'element': + case 'residue': + if (seq_id_begin === seq_id_end) { + const entityIndex = Props.coarse.entityKey(location) + const seq = location.unit.model.sequence.byEntityKey[entityIndex] + const comp_id = seq.sequence.compId.value(seq_id_begin - 1) // 1-indexed + label.push(`${comp_id} ${seq_id_begin}-${seq_id_end}`) + } else { + label.push(`${seq_id_begin}-${seq_id_end}`) + } + case 'chain': + label.push(`Chain ${asym_id}`) } + + return label.reverse().join(' | ') } \ No newline at end of file