diff --git a/src/mol-model/structure/model/formats/mmcif/sequence.ts b/src/mol-model/structure/model/formats/mmcif/sequence.ts index 3b30419696303332c6c91fec2fbc66b6de3f2451..c9f0825208a9538a3bac331162776d6d84989fbc 100644 --- a/src/mol-model/structure/model/formats/mmcif/sequence.ts +++ b/src/mol-model/structure/model/formats/mmcif/sequence.ts @@ -10,8 +10,18 @@ import { Column } from 'mol-data/db'; import { AtomicHierarchy } from '../../properties/atomic'; import { Entities } from '../../properties/common'; +// TODO how to handle microheterogeneity +// see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html +// +// Data items in the ENTITY_POLY_SEQ category specify the sequence +// of monomers in a polymer. Allowance is made for the possibility +// of microheterogeneity in a sample by allowing a given sequence +// number to be correlated with more than one monomer ID. The +// corresponding ATOM_SITE entries should reflect this +// heterogeneity. + export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): Sequence { - if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(hierarchy); + if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(entities, hierarchy); const { entity_id, num, mon_id } = cif.entity_poly_seq; diff --git a/src/mol-model/structure/model/properties/sequence.ts b/src/mol-model/structure/model/properties/sequence.ts index b34b1ef61b624307682ef17e577b670444bbec36..229c5509b72ccff1ec691a867bdfa3f1e1f44a1b 100644 --- a/src/mol-model/structure/model/properties/sequence.ts +++ b/src/mol-model/structure/model/properties/sequence.ts @@ -6,6 +6,7 @@ import { Column } from 'mol-data/db' import { AtomicHierarchy } from './atomic/hierarchy'; +import { Entities } from './common'; interface Sequence { readonly byEntityKey: { [key: number]: Sequence.Entity } @@ -19,10 +20,36 @@ namespace Sequence { readonly compId: Column<string> } - export function fromAtomicHierarchy(hierarchy: AtomicHierarchy): Sequence { - // const { label_comp_id } = hierarchy.residues; + export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): Sequence { + const { label_entity_id } = hierarchy.chains + const { label_comp_id, label_seq_id } = hierarchy.residues + const { chainSegments, residueSegments } = hierarchy - throw 'not implemented'; + const byEntityKey: Sequence['byEntityKey'] = {}; + + const chainCount = hierarchy.chains._rowCount + for (let i = 0; i < chainCount; ++i) { + const entityId = label_entity_id.value(i) + const entityIndex = entities.getEntityIndex(entityId) + // TODO only for polymers, mirroring _entity_poly_seq, ok??? + if (entities.data.type.value(i) !== 'polymer') continue + + const entityKey = hierarchy.entityKey[entityIndex] + if (byEntityKey[entityKey] !== undefined) continue + + const start = residueSegments.segmentMap[chainSegments.segments[i]] + let end = residueSegments.segmentMap[chainSegments.segments[i + 1]] + // TODO better way to handle end??? + if (end === undefined) end = hierarchy.residues._rowCount + + byEntityKey[entityKey] = { + entityId, + compId: Column.window(label_comp_id, start, end), + num: Column.window(label_seq_id, start, end) + } + } + + return { byEntityKey } } }