From 59f5f0caec89e3a0404255b1feb4009e334f236a Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Fri, 11 May 2018 20:07:15 +0200 Subject: [PATCH] Parse sec struct from mmCIF --- src/apps/structure-info/model.ts | 34 +++- .../structure/model/formats/mmcif.ts | 4 + .../formats/mmcif/secondary-structure.ts | 154 ++++++++++++++++++ src/mol-model/structure/model/model.ts | 3 + .../model/properties/seconday-structure.ts | 15 +- src/mol-model/structure/model/types.ts | 1 + src/mol-model/structure/query/properties.ts | 6 +- src/mol-util/bit-flags.ts | 2 +- 8 files changed, 210 insertions(+), 9 deletions(-) create mode 100644 src/mol-model/structure/model/formats/mmcif/secondary-structure.ts diff --git a/src/apps/structure-info/model.ts b/src/apps/structure-info/model.ts index ad61b5d13..c6d606cfb 100644 --- a/src/apps/structure-info/model.ts +++ b/src/apps/structure-info/model.ts @@ -16,6 +16,8 @@ import { OrderedSet } from 'mol-data/int'; import { Table } from 'mol-data/db'; import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; import { openCif, downloadCif } from './helpers'; +import { BitFlags } from 'mol-util'; +import { SecondaryStructureType } from 'mol-model/structure/model/types'; async function downloadFromPdb(pdb: string) { @@ -39,6 +41,35 @@ export function atomLabel(model: Model, aI: number) { return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)} ${label_atom_id.value(aI)}` } +export function residueLabel(model: Model, rI: number) { + const { residues, chains, residueSegments, chainSegments } = model.atomicHierarchy + const { label_comp_id, label_seq_id } = residues + const { label_asym_id } = chains + const cI = chainSegments.segmentMap[residueSegments.segments[rI]] + return `${label_asym_id.value(cI)} ${label_comp_id.value(rI)} ${label_seq_id.value(rI)}` +} + +export function printSecStructure(model: Model) { + console.log('Secondary Structure\n============='); + const { residues } = model.atomicHierarchy; + const { type, key } = model.properties.secondaryStructure; + + const count = residues._rowCount; + let rI = 0; + while (rI < count) { + let start = rI; + while (rI < count && key[start] === key[rI]) rI++; + rI--; + + if (BitFlags.has(type[start], SecondaryStructureType.Flag.Beta)) { + console.log(`Sheet: ${residueLabel(model, start)} - ${residueLabel(model, rI)} (key ${key[start]})`); + } else if (BitFlags.has(type[start], SecondaryStructureType.Flag.Helix)) { + console.log(`Helix: ${residueLabel(model, start)} - ${residueLabel(model, rI)} (key ${key[start]})`); + } + + rI++; + } +} export function printBonds(structure: Structure) { for (const unit of structure.units) { @@ -122,7 +153,8 @@ async function run(mmcif: mmCIF_Database) { printSequence(models[0]); printIHMModels(models[0]); printUnits(structure); - printBonds(structure); + // printBonds(structure); + printSecStructure(models[0]); } async function runDL(pdb: string) { diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts index d2a2338f3..6f135c720 100644 --- a/src/mol-model/structure/model/formats/mmcif.ts +++ b/src/mol-model/structure/model/formats/mmcif.ts @@ -22,6 +22,7 @@ import { getSequence } from './mmcif/sequence'; import mmCIF_Format = Format.mmCIF import { Task } from 'mol-task'; +import { getSecondaryStructureMmCif } from './mmcif/secondary-structure'; function findModelBounds({ data }: mmCIF_Format, startIndex: number) { const num = data.atom_site.pdbx_PDB_model_num; @@ -151,6 +152,9 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model): atomicConformation: getConformation(format, bounds), coarseHierarchy: coarse.hierarchy, coarseConformation: coarse.conformation, + properties: { + secondaryStructure: getSecondaryStructureMmCif(format.data, atomicHierarchy) + }, symmetry: getSymmetry(format) }; } diff --git a/src/mol-model/structure/model/formats/mmcif/secondary-structure.ts b/src/mol-model/structure/model/formats/mmcif/secondary-structure.ts new file mode 100644 index 000000000..ed07db38b --- /dev/null +++ b/src/mol-model/structure/model/formats/mmcif/secondary-structure.ts @@ -0,0 +1,154 @@ + +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database as mmCIF, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif' +import { SecondaryStructureType } from '../../types'; +import { AtomicHierarchy } from '../../properties/atomic'; +import { SecondaryStructure } from '../../properties/seconday-structure'; +import { Column } from 'mol-data/db'; + +export function getSecondaryStructureMmCif(data: mmCIF_Database, hierarchy: AtomicHierarchy): SecondaryStructure { + const map: SecondaryStructureMap = new Map(); + addHelices(data.struct_conf, map); + // must add Helices 1st because of 'key' value assignment. + addSheets(data.struct_sheet_range, map, data.struct_conf._rowCount); + + const secStruct: SecondaryStructureData = { + type: new Int32Array(hierarchy.residues._rowCount) as any, + key: new Int32Array(hierarchy.residues._rowCount) as any + }; + + if (map.size > 0) assignSecondaryStructureRanges(hierarchy, map, secStruct); + return secStruct; +} + +type SecondaryStructureEntry = { + startSeqNumber: number, + startInsCode: string | null, + endSeqNumber: number, + endInsCode: string | null, + type: SecondaryStructureType, + key: number +} +type SecondaryStructureMap = Map<string, Map<number, SecondaryStructureEntry>> +type SecondaryStructureData = { type: SecondaryStructureType[], key: number[] } + +function addHelices(cat: mmCIF['struct_conf'], map: SecondaryStructureMap) { + if (!cat._rowCount) return; + + const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat; + const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat; + const { pdbx_PDB_helix_class, conf_type_id } = cat; + + for (let i = 0, _i = cat._rowCount; i < _i; i++) { + const type = pdbx_PDB_helix_class.valueKind(i) === Column.ValueKind.Present + ? SecondaryStructureType.SecondaryStructurePdb[pdbx_PDB_helix_class.value(i)] + : conf_type_id.valueKind(i) === Column.ValueKind.Present + ? SecondaryStructureType.SecondaryStructureMmcif[conf_type_id.value(i)] + : SecondaryStructureType.Flag.NA + + const entry: SecondaryStructureEntry = { + startSeqNumber: beg_label_seq_id.value(i), + startInsCode: pdbx_beg_PDB_ins_code.value(i), + endSeqNumber: end_label_seq_id.value(i), + endInsCode: pdbx_end_PDB_ins_code.value(i), + type: SecondaryStructureType.create(type), + key: i + 1 + }; + + const asymId = beg_label_asym_id.value(i)!; + if (map.has(asymId)) { + map.get(asymId)!.set(entry.startSeqNumber, entry); + } else { + map.set(asymId, new Map([[entry.startSeqNumber, entry]])); + } + } +} + +function addSheets(cat: mmCIF['struct_sheet_range'], map: SecondaryStructureMap, sheetCount: number) { + if (!cat._rowCount) return; + + const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat; + const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat; + const { sheet_id } = cat; + + const sheet_id_key = new Map<string, number>(); + let currentKey = sheetCount + 1; + + for (let i = 0, _i = cat._rowCount; i < _i; i++) { + const id = sheet_id.value(i); + let key: number; + if (sheet_id_key.has(id)) key = sheet_id_key.get(id)!; + else { + key = currentKey++; + sheet_id_key.set(id, key); + } + + const entry: SecondaryStructureEntry = { + startSeqNumber: beg_label_seq_id.value(i), + startInsCode: pdbx_beg_PDB_ins_code.value(i), + endSeqNumber: end_label_seq_id.value(i), + endInsCode: pdbx_end_PDB_ins_code.value(i), + type: SecondaryStructureType.create(SecondaryStructureType.Flag.Beta | SecondaryStructureType.Flag.BetaSheet), + key + }; + + const asymId = beg_label_asym_id.value(i)!; + if (map.has(asymId)) { + map.get(asymId)!.set(entry.startSeqNumber, entry); + } else { + map.set(asymId, new Map([[entry.startSeqNumber, entry]])); + } + } + + return; +} + +function assignSecondaryStructureEntry(hierarchy: AtomicHierarchy, entry: SecondaryStructureEntry, resStart: number, resEnd: number, data: SecondaryStructureData) { + const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues; + const { endSeqNumber, endInsCode, type, key } = entry; + + let rI = resStart; + while (rI < resEnd) { + const seqNumber = label_seq_id.value(rI); + + data.type[rI] = type; + data.key[rI] = key; + + if ((seqNumber > endSeqNumber) || + (seqNumber === endSeqNumber && pdbx_PDB_ins_code.value(rI) === endInsCode)) { + break; + } + + rI++; + } +} + +function assignSecondaryStructureRanges(hierarchy: AtomicHierarchy, map: SecondaryStructureMap, data: SecondaryStructureData) { + const { segments: chainSegments, count: chainCount } = hierarchy.chainSegments; + const { label_asym_id } = hierarchy.chains; + const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues; + + for (let cI = 0; cI < chainCount; cI++) { + const resStart = chainSegments[cI], resEnd = chainSegments[cI + 1]; + const asymId = label_asym_id.value(cI); + + if (map.has(asymId)) { + const entries = map.get(asymId)!; + + for (let rI = resStart; rI < resEnd; rI++) { + const seqNumber = label_seq_id.value(rI); + if (entries.has(seqNumber)) { + const entry = entries.get(seqNumber)!; + const insCode = pdbx_PDB_ins_code.value(rI); + if (entry.startInsCode !== insCode) continue; + assignSecondaryStructureEntry(hierarchy, entry, rI, resEnd, data); + } + } + } + } +} \ No newline at end of file diff --git a/src/mol-model/structure/model/model.ts b/src/mol-model/structure/model/model.ts index 0a1725d48..1053bde91 100644 --- a/src/mol-model/structure/model/model.ts +++ b/src/mol-model/structure/model/model.ts @@ -11,6 +11,7 @@ import { AtomicHierarchy, AtomicConformation } from './properties/atomic' import { ModelSymmetry } from './properties/symmetry' import { CoarseHierarchy, CoarseConformation } from './properties/coarse' import { Entities } from './properties/common'; +import { SecondaryStructure } from './properties/seconday-structure'; //import from_gro from './formats/gro' import from_mmCIF from './formats/mmcif' @@ -34,6 +35,8 @@ interface Model extends Readonly<{ atomicHierarchy: AtomicHierarchy, atomicConformation: AtomicConformation, + properties: { secondaryStructure: SecondaryStructure }, + coarseHierarchy: CoarseHierarchy, coarseConformation: CoarseConformation }> { diff --git a/src/mol-model/structure/model/properties/seconday-structure.ts b/src/mol-model/structure/model/properties/seconday-structure.ts index 04eef7fb9..dfde631a3 100644 --- a/src/mol-model/structure/model/properties/seconday-structure.ts +++ b/src/mol-model/structure/model/properties/seconday-structure.ts @@ -4,11 +4,14 @@ * @author David Sehnal <david.sehnal@gmail.com> */ +import { SecondaryStructureType } from '../types'; + /** Secondary structure "indexed" by residues. */ -export interface SecondaryStructure { - type: number[], - index: number[], - flags: number[], +interface SecondaryStructure { + // assign flags to each residue + readonly type: ArrayLike<SecondaryStructureType>, /** unique value for each "element". This is because single sheet is speficied by multiple records. */ - key: number[] -} \ No newline at end of file + readonly key: ArrayLike<number> +} + +export { SecondaryStructure } \ No newline at end of file diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index caa1ee2c8..36d0cb935 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -94,6 +94,7 @@ export namespace SecondaryStructureType { export const Turn = ['s', 't', 'l', ''] export const is: (ss: SecondaryStructureType, f: Flag) => boolean = BitFlags.has + export const create: (fs: Flag) => SecondaryStructureType = BitFlags.create export const enum Flag { None = 0x0, diff --git a/src/mol-model/structure/query/properties.ts b/src/mol-model/structure/query/properties.ts index a4d4ad5b3..3299acaae 100644 --- a/src/mol-model/structure/query/properties.ts +++ b/src/mol-model/structure/query/properties.ts @@ -52,7 +52,11 @@ const residue = { auth_comp_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_comp_id.value(l.unit.residueIndex[l.element])), label_seq_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.label_seq_id.value(l.unit.residueIndex[l.element])), auth_seq_id: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.element])), - pdbx_PDB_ins_code: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.element])) + pdbx_PDB_ins_code: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.atomicHierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.element])), + + // Properties + secondary_structure_type: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.type[l.unit.residueIndex[l.element]]), + secondary_structure_key: Element.property(l => !Unit.isAtomic(l.unit) ? notAtomic() : l.unit.model.properties.secondaryStructure.key[l.unit.residueIndex[l.element]]), } const chain = { diff --git a/src/mol-util/bit-flags.ts b/src/mol-util/bit-flags.ts index 0cf3b761d..8143eb160 100644 --- a/src/mol-util/bit-flags.ts +++ b/src/mol-util/bit-flags.ts @@ -4,7 +4,7 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -interface BitFlags<Flags> { '@type': Flags } +interface BitFlags<Flags> extends Number { '@type': Flags } namespace BitFlags { export function create<F>(flags: F): BitFlags<F> { return flags as any; } -- GitLab