diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts index 17b7ee9cf9f4df0ad0ab90bc398af82163679569..11e899c90e65bdce4fc65332787f04346bc2850a 100644 --- a/src/mol-model/structure/model/formats/mmcif.ts +++ b/src/mol-model/structure/model/formats/mmcif.ts @@ -70,6 +70,7 @@ function getNcsOperators(format: mmCIF_Format) { } return opers; } + function getModifiedResidueNameMap(format: mmCIF_Format) { const data = format.data.pdbx_struct_mod_residue; const map = new Map<string, string>(); @@ -121,8 +122,22 @@ function getChemicalComponentMap(format: mmCIF_Format) { return map } -function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, previous?: Model): Model { - const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, previous); +export interface FormatData { + modifiedResidueNameMap: Map<string, string> + asymIdSerialMap: Map<string, number> + chemicalComponentMap: Map<string, ChemicalComponent> +} + +function getFormatData(format: mmCIF_Format): FormatData { + return { + modifiedResidueNameMap: getModifiedResidueNameMap(format), + asymIdSerialMap: getAsymIdSerialMap(format), + chemicalComponentMap: getChemicalComponentMap(format) + } +} + +function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model { + const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous); if (previous && atomic.sameAsPrevious) { return { ...previous, atomicConformation: atomic.conformation }; } @@ -132,10 +147,6 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities ? format.data.entry.id.value(0) : format.data._name; - const modifiedResidueNameMap = getModifiedResidueNameMap(format); - const asymIdSerialMap = getAsymIdSerialMap(format) - const chemicalComponentMap = getChemicalComponentMap(format) - return { id: UUID.create(), label, @@ -143,16 +154,14 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities modelNum: atom_site.pdbx_PDB_model_num.value(0), entities, symmetry: getSymmetry(format), - sequence: getSequence(format.data, entities, atomic.hierarchy, modifiedResidueNameMap), + sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidueNameMap), atomicHierarchy: atomic.hierarchy, atomicConformation: atomic.conformation, coarseHierarchy: coarse.hierarchy, coarseConformation: coarse.conformation, properties: { secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), - modifiedResidueNameMap, - asymIdSerialMap, - chemicalComponentMap + ...formatData }, customProperties: new CustomProperties(), _staticPropertyData: Object.create(null), @@ -160,12 +169,9 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities }; } -function createModelIHM(format: mmCIF_Format, data: IHMData): Model { - const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities); - const coarse = getIHMCoarse(data); - const modifiedResidueNameMap = getModifiedResidueNameMap(format); - const asymIdSerialMap = getAsymIdSerialMap(format) - const chemicalComponentMap = getChemicalComponentMap(format) +function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model { + const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData); + const coarse = getIHMCoarse(data, formatData); return { id: UUID.create(), @@ -174,16 +180,14 @@ function createModelIHM(format: mmCIF_Format, data: IHMData): Model { modelNum: data.model_id, entities: data.entities, symmetry: getSymmetry(format), - sequence: getSequence(format.data, data.entities, atomic.hierarchy, modifiedResidueNameMap), + sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidueNameMap), atomicHierarchy: atomic.hierarchy, atomicConformation: atomic.conformation, coarseHierarchy: coarse.hierarchy, coarseConformation: coarse.conformation, properties: { secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), - modifiedResidueNameMap, - asymIdSerialMap, - chemicalComponentMap + ...formatData }, customProperties: new CustomProperties(), _staticPropertyData: Object.create(null), @@ -204,7 +208,7 @@ function findModelEnd(num: Column<number>, startIndex: number) { return endIndex; } -async function readStandard(ctx: RuntimeContext, format: mmCIF_Format) { +async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { const atomCount = format.data.atom_site._rowCount; const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; @@ -213,7 +217,7 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format) { while (modelStart < atomCount) { const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart); const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); - const model = createStandardModel(format, atom_site, entities, models.length > 0 ? models[models.length - 1] : void 0); + const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); attachProps(model); models.push(model); modelStart = modelEnd; @@ -235,7 +239,7 @@ function splitTable<T extends Table<any>>(table: T, col: Column<number>) { return ret; } -async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) { +async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { const { ihm_model_list } = format.data; const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; @@ -257,7 +261,7 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) { ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) }; - const model = createModelIHM(format, data); + const model = createModelIHM(format, data, formatData); attachProps(model); models.push(model); } @@ -266,9 +270,10 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format) { } function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> { + const formatData = getFormatData(format) return Task.create('Create mmCIF Model', async ctx => { const isIHM = format.data.ihm_model_list._rowCount > 0; - return isIHM ? await readIHM(ctx, format) : await readStandard(ctx, format); + return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData); }); } diff --git a/src/mol-model/structure/model/formats/mmcif/atomic.ts b/src/mol-model/structure/model/formats/mmcif/atomic.ts index e9f69f9e607507b6da761812cc92eb041a0d8008..677914582d91d39e0806903bea388fa2914ad54a 100644 --- a/src/mol-model/structure/model/formats/mmcif/atomic.ts +++ b/src/mol-model/structure/model/formats/mmcif/atomic.ts @@ -17,13 +17,16 @@ import { ElementSymbol } from '../../types'; import { Entities } from '../../properties/common'; import mmCIF_Format = Format.mmCIF +import { getAtomicRanges } from '../../properties/utils/atomic-ranges'; +import { FormatData } from '../mmcif'; + type AtomSite = mmCIF_Database['atom_site'] function findHierarchyOffsets(atom_site: AtomSite) { - if (atom_site._rowCount === 0) return { residues: [], polymers: [], chains: [] }; + if (atom_site._rowCount === 0) return { residues: [], chains: [] }; const start = 0, end = atom_site._rowCount; - const residues = [start as Element], chains = [start as Element], polymers = [start as Element]; + const residues = [start as Element], chains = [start as Element]; const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = atom_site; @@ -34,13 +37,11 @@ function findHierarchyOffsets(atom_site: AtomSite) { || !auth_seq_id.areValuesEqual(i - 1, i) || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i) || !label_comp_id.areValuesEqual(i - 1, i); - const newPolymer = newResidue && label_seq_id.value(i - 1) !== label_seq_id.value(i) - 1; if (newResidue) residues[residues.length] = i as Element; - if (newPolymer) polymers[polymers.length] = i as Element; if (newChain) chains[chains.length] = i as Element; } - return { residues, polymers, chains }; + return { residues, chains }; } function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData { @@ -78,7 +79,7 @@ function isHierarchyDataEqual(a: AtomicData, b: AtomicData) { && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>) } -export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, previous?: Model) { +export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) { const hierarchyOffsets = findHierarchyOffsets(atom_site); const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets); @@ -93,11 +94,11 @@ export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_sit const hierarchySegments: AtomicSegments = { residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, Interval.ofBounds(0, atom_site._rowCount)), chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, Interval.ofBounds(0, atom_site._rowCount)), - polymerSegments: Segmentation.ofOffsets(hierarchyOffsets.polymers, Interval.ofBounds(0, atom_site._rowCount)), } const hierarchyKeys = getAtomicKeys(hierarchyData, entities, hierarchySegments); - const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }; + const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, formatData.chemicalComponentMap); + const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments, ...hierarchyRanges }; return { sameAsPrevious: false, hierarchy, diff --git a/src/mol-model/structure/model/formats/mmcif/ihm.ts b/src/mol-model/structure/model/formats/mmcif/ihm.ts index 1c93c716194329740575cde23028e263816f727e..d9b401d46ead15b02552f13ae2709ca23075e9be 100644 --- a/src/mol-model/structure/model/formats/mmcif/ihm.ts +++ b/src/mol-model/structure/model/formats/mmcif/ihm.ts @@ -13,6 +13,8 @@ import { UUID } from 'mol-util'; import { Segmentation, Interval } from 'mol-data/int'; import { Mat3, Tensor } from 'mol-math/linear-algebra'; import { Element } from '../../../structure' +import { getCoarseRanges } from '../../properties/utils/coarse-ranges'; +import { FormatData } from '../mmcif'; export interface IHMData { model_id: number, @@ -25,7 +27,7 @@ export interface IHMData { export const EmptyIHMCoarse = { hierarchy: CoarseHierarchy.Empty, conformation: void 0 as any } -export function getIHMCoarse(data: IHMData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } { +export function getIHMCoarse(data: IHMData, formatData: FormatData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } { const { ihm_sphere_obj_site, ihm_gaussian_obj_site } = data; if (ihm_sphere_obj_site._rowCount === 0 && ihm_gaussian_obj_site._rowCount === 0) return EmptyIHMCoarse; @@ -33,16 +35,18 @@ export function getIHMCoarse(data: IHMData): { hierarchy: CoarseHierarchy, confo const sphereData = getData(ihm_sphere_obj_site); const sphereConformation = getSphereConformation(ihm_sphere_obj_site); const sphereKeys = getCoarseKeys(sphereData, data.entities); + const sphereRanges = getCoarseRanges(sphereData, formatData.chemicalComponentMap); const gaussianData = getData(ihm_gaussian_obj_site); const gaussianConformation = getGaussianConformation(ihm_gaussian_obj_site); const gaussianKeys = getCoarseKeys(gaussianData, data.entities); + const gaussianRanges = getCoarseRanges(gaussianData, formatData.chemicalComponentMap); return { hierarchy: { isDefined: true, - spheres: { ...sphereData, ...sphereKeys }, - gaussians: { ...gaussianData, ...gaussianKeys }, + spheres: { ...sphereData, ...sphereKeys, ...sphereRanges }, + gaussians: { ...gaussianData, ...gaussianKeys, ...gaussianRanges }, }, conformation: { id: UUID.create(), @@ -81,19 +85,14 @@ function getGaussianConformation(data: mmCIF['ihm_gaussian_obj_site']): CoarseGa } function getSegments(asym_id: Column<string>, seq_id_begin: Column<number>, seq_id_end: Column<number>) { - const polymerOffsets = [0 as Element], chainOffsets = [0 as Element]; + const chainOffsets = [0 as Element]; for (let i = 1, _i = asym_id.rowCount; i < _i; i++) { const newChain = !asym_id.areValuesEqual(i - 1, i); - const newPolymer = newChain - || seq_id_end.value(i - 1) !== seq_id_begin.value(i) - 1; - - if (newPolymer) polymerOffsets[polymerOffsets.length] = i as Element; if (newChain) chainOffsets[chainOffsets.length] = i as Element; } return { - chainSegments: Segmentation.ofOffsets(chainOffsets, Interval.ofBounds(0, asym_id.rowCount)), - polymerSegments: Segmentation.ofOffsets(polymerOffsets, Interval.ofBounds(0, asym_id.rowCount)) + chainSegments: Segmentation.ofOffsets(chainOffsets, Interval.ofBounds(0, asym_id.rowCount)) } } diff --git a/src/mol-model/structure/model/properties/atomic/hierarchy.ts b/src/mol-model/structure/model/properties/atomic/hierarchy.ts index 12c9de4c1ed01dfbaaa055a7d2e7756ced2ed743..8870dd849eac8e355595057099055fc13d65c5d9 100644 --- a/src/mol-model/structure/model/properties/atomic/hierarchy.ts +++ b/src/mol-model/structure/model/properties/atomic/hierarchy.ts @@ -9,6 +9,7 @@ import { Segmentation } from 'mol-data/int' import { mmCIF_Schema as mmCIF } from 'mol-io/reader/cif/schema/mmcif' import { ElementSymbol } from '../../types' import { Element } from '../../../structure' +import SortedRanges from 'mol-data/int/sorted-ranges'; export const AtomsSchema = { type_symbol: Column.Schema.Aliased<ElementSymbol>(mmCIF.atom_site.type_symbol), @@ -52,11 +53,6 @@ export interface AtomicSegments { residueSegments: Segmentation<Element>, /** Maps chainIndex to a range of atoms [segments[cI], segments[cI + 1]) */ chainSegments: Segmentation<Element>, - /** - * bonded/connected stretches of polymer chains, i.e. a chain will be - * broken into multiple polymer segments if there are missing residues - */ - polymerSegments: Segmentation<Element> // TODO: include entity segments? } @@ -78,5 +74,10 @@ export interface AtomicKeys { findResidueKey(entityId: string, label_asym_id: string, label_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code: string): number } -type _Hierarchy = AtomicData & AtomicSegments & AtomicKeys +export interface AtomicRanges { + polymerRanges: SortedRanges<Element> + gapRanges: SortedRanges<Element> +} + +type _Hierarchy = AtomicData & AtomicSegments & AtomicKeys & AtomicRanges export interface AtomicHierarchy extends _Hierarchy { } \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/coarse/hierarchy.ts b/src/mol-model/structure/model/properties/coarse/hierarchy.ts index 57d3da0acaac976009976324824438a94ae7ee2e..f9057f34894e3604588df1325fa9f1a5ae2795af 100644 --- a/src/mol-model/structure/model/properties/coarse/hierarchy.ts +++ b/src/mol-model/structure/model/properties/coarse/hierarchy.ts @@ -8,6 +8,7 @@ import { Column } from 'mol-data/db' import { Segmentation } from 'mol-data/int'; import { Element } from '../../../structure' +import SortedRanges from 'mol-data/int/sorted-ranges'; export interface CoarsedElementKeys { // assign a key to each element @@ -28,14 +29,14 @@ export interface CoarseElementData { seq_id_end: Column<number>, chainSegments: Segmentation<Element>, - /** - * bonded/connected stretches of polymer chains, i.e. a chain will be - * broken into multiple polymer segments if there are missing residues - */ - polymerSegments: Segmentation<Element> } -export type CoarseElements = CoarsedElementKeys & CoarseElementData +export interface CoarseRanges { + polymerRanges: SortedRanges<Element> + gapRanges: SortedRanges<Element> +} + +export type CoarseElements = CoarsedElementKeys & CoarseElementData & CoarseRanges export interface CoarseHierarchy { isDefined: boolean, diff --git a/src/mol-model/structure/model/properties/utils/atomic-ranges.ts b/src/mol-model/structure/model/properties/utils/atomic-ranges.ts new file mode 100644 index 0000000000000000000000000000000000000000..a28eca281de545c9079cbea9618b19f7dbbdf9ae --- /dev/null +++ b/src/mol-model/structure/model/properties/utils/atomic-ranges.ts @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { AtomicSegments } from '../atomic'; +import { AtomicData, AtomicRanges } from '../atomic/hierarchy'; +import { Segmentation, Interval } from 'mol-data/int'; +import SortedRanges from 'mol-data/int/sorted-ranges'; +import { Element } from '../../../../structure'; +import { ChemicalComponent } from '../chemical-component'; +import { MoleculeType, isPolymer } from '../../types'; + +export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, chemicalComponentMap: Map<string, ChemicalComponent>): AtomicRanges { + const polymerRanges: number[] = [] + const gapRanges: number[] = [] + const chainIt = Segmentation.transientSegments(segments.chainSegments, Interval.ofBounds(0, data.atoms._rowCount)) + const residueIt = Segmentation.transientSegments(segments.residueSegments, Interval.ofBounds(0, data.atoms._rowCount)) + const { label_seq_id, label_comp_id } = data.residues + + let prevSeqId: number + let prevEnd: number + let startIndex: number + + while (chainIt.hasNext) { + const chainSegment = chainIt.move(); + residueIt.setSegment(chainSegment); + prevSeqId = -1 + prevEnd = -1 + startIndex = -1 + + while (residueIt.hasNext) { + const residueSegment = residueIt.move(); + const residueIndex = residueSegment.index + const cc = chemicalComponentMap.get(label_comp_id.value(residueIndex)) + const moleculeType = cc ? cc.moleculeType : MoleculeType.unknown + const seqId = label_seq_id.value(residueIndex) + if (isPolymer(moleculeType)) { + if (startIndex !== -1) { + if (seqId !== prevSeqId + 1) { + polymerRanges.push(startIndex, prevEnd - 1) + gapRanges.push(prevEnd - 1, residueSegment.start) + startIndex = residueSegment.start + } else if (!residueIt.hasNext) { + polymerRanges.push(startIndex, residueSegment.end - 1) + } + } else { + startIndex = residueSegment.start // start polymer + } + } else { + if (startIndex !== -1) { + polymerRanges.push(startIndex, prevEnd - 1) + startIndex = -1 + } + } + + prevEnd = residueSegment.end + prevSeqId = seqId + } + } + + console.log(polymerRanges, gapRanges) + + return { + polymerRanges: SortedRanges.ofSortedRanges(polymerRanges as Element[]), + gapRanges: SortedRanges.ofSortedRanges(gapRanges as Element[]) + } +} \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/utils/coarse-ranges.ts b/src/mol-model/structure/model/properties/utils/coarse-ranges.ts new file mode 100644 index 0000000000000000000000000000000000000000..635bc51847a5fbc6681ab6019e6016f988d605ee --- /dev/null +++ b/src/mol-model/structure/model/properties/utils/coarse-ranges.ts @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { CoarseRanges, CoarseElementData } from '../coarse/hierarchy'; +import { Segmentation, Interval } from 'mol-data/int'; +import SortedRanges from 'mol-data/int/sorted-ranges'; +import { Element } from '../../../../structure'; +import { ChemicalComponent } from '../chemical-component'; + +// TODO assumes all coarse elements are part of a polymer + +export function getCoarseRanges(data: CoarseElementData, chemicalComponentMap: Map<string, ChemicalComponent>): CoarseRanges { + const polymerRanges: number[] = [] + const gapRanges: number[] = [] + const chainIt = Segmentation.transientSegments(data.chainSegments, Interval.ofBounds(0, data.count)) + + const { seq_id_begin, seq_id_end } = data + + while (chainIt.hasNext) { + const { start, end } = chainIt.move(); + console.log('chain', start, end) + + let startIndex = -1 + let prevSeqEnd = -1 + for (let i = start; i < end; ++i) { + const seqEnd = seq_id_end.value(i) + if (i === start) { + startIndex = i + prevSeqEnd = seq_id_end.value(i) + } else { + if (prevSeqEnd !== seq_id_begin.value(i) - 1) { + polymerRanges.push(startIndex, i - 1) + startIndex = i + } + } + if (i === end - 1) { + polymerRanges.push(startIndex, i) + } + prevSeqEnd = seqEnd + } + } + + console.log(polymerRanges, gapRanges) + + return { + polymerRanges: SortedRanges.ofSortedRanges(polymerRanges as Element[]), + gapRanges: SortedRanges.ofSortedRanges(gapRanges as Element[]) + } +} \ No newline at end of file