diff --git a/src/mol-model-formats/structure/mmcif/atomic.ts b/src/mol-model-formats/structure/mmcif/atomic.ts index 7c4fbfbbfba1b8a266302d7bfbb2627186153ead..422d45776d1fdd1f8b89378a0f7f4e902c8de3f5 100644 --- a/src/mol-model-formats/structure/mmcif/atomic.ts +++ b/src/mol-model-formats/structure/mmcif/atomic.ts @@ -101,7 +101,7 @@ export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_sit const index = getAtomicIndex(hierarchyData, entities, hierarchySegments); const derived = getAtomicDerivedData(hierarchyData, index, formatData.chemicalComponentMap); - const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, formatData.chemicalComponentMap); + const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, derived.residue.moleculeType); const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchySegments, ...hierarchyRanges, index, derived }; return { sameAsPrevious: false, hierarchy, conformation }; } \ No newline at end of file diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts index 5d435458b246b19a6efc80e5d43705c89e22a1c6..8a8e4012695ad8af3f777f8ea12e7bd1c6cb6c81 100644 --- a/src/mol-model-formats/structure/mmcif/parser.ts +++ b/src/mol-model-formats/structure/mmcif/parser.ts @@ -23,11 +23,12 @@ import { getSecondaryStructureMmCif } from './secondary-structure'; import { getSequence } from './sequence'; import { sortAtomSite } from './sort'; import { StructConn } from './bonds/struct_conn'; -import { ChemicalComponent, ChemicalComponentMap, CommonChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component'; +import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component'; import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types'; import { ModelFormat } from '../format'; import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants'; import mmCIF_Format = ModelFormat.mmCIF +import { memoize1 } from 'mol-util/memoize'; export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) { const formatData = getFormatData(format) @@ -78,6 +79,7 @@ function getNcsOperators(format: mmCIF_Format) { } return opers; } + function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] { const data = format.data.pdbx_struct_mod_residue; const parentId = new Map<string, string>(); @@ -95,9 +97,9 @@ function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['m } function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap { + const map = new Map<string, ChemicalComponent>(); const { chem_comp } = format.data if (chem_comp._rowCount > 0) { - const map = new Map<string, ChemicalComponent>(); const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp for (let i = 0, il = id.rowCount; i < il; ++i) { const _id = id.value(i) @@ -113,10 +115,8 @@ function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap { } map.set(_id, cc) } - return map - } else { - return CommonChemicalComponentMap } + return map } function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { @@ -147,12 +147,24 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap } } } else { - // TODO check if present in format.data.atom_site.label_comp_id - SaccharideCompIdMap.forEach((v, k) => map.set(k, v)) + const uniqueNames = getUniqueComponentNames(format) + SaccharideCompIdMap.forEach((v, k) => { + if (uniqueNames.has(k)) map.set(k, v) + }) } return map } +const getUniqueComponentNames = memoize1((format: mmCIF_Format) => { + const uniqueNames = new Set<string>() + const data = format.data.atom_site + const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id; + for (let i = 0, il = comp_id.rowCount; i < il; ++i) { + uniqueNames.add(comp_id.value(i)) + } + return uniqueNames +}) + export interface FormatData { modifiedResidues: Model['properties']['modifiedResidues'] chemicalComponentMap: Model['properties']['chemicalComponentMap'] diff --git a/src/mol-model/structure/model/properties/chemical-component.ts b/src/mol-model/structure/model/properties/chemical-component.ts index 76d23851d5daf220e16c881a0a8c986a995c13b0..35d34e571790872c8e1a110bc125705d7a4b49c4 100644 --- a/src/mol-model/structure/model/properties/chemical-component.ts +++ b/src/mol-model/structure/model/properties/chemical-component.ts @@ -16,209 +16,4 @@ export interface ChemicalComponent { formulaWeight: number } -export type ChemicalComponentMap = ReadonlyMap<string, ChemicalComponent> - -const CommonChemicalComponents: ChemicalComponent[] = [ - { - id: 'ALA', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'ALANINE', - synonyms: [], - formula: 'C3 H7 N O2', - formulaWeight: 89.093 - }, - { - id: 'ARG', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'ARGININE', - synonyms: [], - formula: 'C6 H15 N4 O2 1', - formulaWeight: 175.209 - }, - { - id: 'ASN', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'ASPARAGINE', - synonyms: [], - formula: 'C4 H8 N2 O3', - formulaWeight: 132.118 - }, - { - id: 'ASP', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'ASPARTIC ACID', - synonyms: [], - formula: 'C4 H7 N O4', - formulaWeight: 133.103 - }, - { - id: 'CYS', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'CYSTEINE', - synonyms: [], - formula: 'C3 H7 N O2 S', - formulaWeight: 121.158 - }, - { - id: 'GLN', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'GLUTAMINE', - synonyms: [], - formula: 'C5 H10 N2 O3', - formulaWeight: 146.144 - }, - { - id: 'GLU', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'GLUTAMIC ACID', - synonyms: [], - formula: 'C5 H9 N O4', - formulaWeight: 147.129 - }, - { - id: 'GLY', - type: ComponentType['peptide linking'], - moleculeType: MoleculeType.protein, - name: 'GLYCINE', - synonyms: [], - formula: 'C2 H5 N O2', - formulaWeight: 75.067 - }, - { - id: 'HIS', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'HISTIDINE', - synonyms: [], - formula: 'C6 H10 N3 O2 1', - formulaWeight: 156.162 - }, - { - id: 'ILE', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'ISOLEUCINE', - synonyms: [], - formula: 'C6 H13 N O2', - formulaWeight: 131.173 - }, - { - id: 'LEU', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'LEUCINE', - synonyms: [], - formula: 'C6 H13 N O2', - formulaWeight: 131.173 - }, - { - id: 'LYS', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'LYSINE', - synonyms: [], - formula: 'C6 H15 N2 O2 1', - formulaWeight: 147.195 - }, - { - id: 'MET', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'METHIONINE', - synonyms: [], - formula: 'C5 H11 N O2 S', - formulaWeight: 149.211 - }, - { - id: 'PHE', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'PHENYLALANINE', - synonyms: [], - formula: 'C9 H11 N O2', - formulaWeight: 165.19 - }, - { - id: 'PRO', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'PROLINE', - synonyms: [], - formula: 'C5 H9 N O2', - formulaWeight: 115.13 - }, - { // 'O' as per IUPAC definition - id: 'PYL', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'PYRROLYSINE', - synonyms: [], - formula: 'C12 H21 N3 O3', - formulaWeight: 255.31 - }, - { // 'U' as per IUPAC definition - id: 'SEC', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'SELENOCYSTEINE', - synonyms: [], - formula: 'C3 H7 N O2 Se', - formulaWeight: 168.05 - }, - { - id: 'SER', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'SERINE', - synonyms: [], - formula: 'C3 H7 N O3', - formulaWeight: 105.09 - }, - { - id: 'THR', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'THREONINE', - synonyms: [], - formula: 'C4 H9 N O3', - formulaWeight: 119.12 - }, - { - id: 'TRP', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'TRYPTOPHAN', - synonyms: [], - formula: 'C11 H12 N2 O2', - formulaWeight: 204.22 - }, - { - id: 'TYR', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'TYROSINE', - synonyms: [], - formula: 'C9 H11 N O3', - formulaWeight: 181.19 - }, - { - id: 'VAL', - type: ComponentType['L-peptide linking'], - moleculeType: MoleculeType.protein, - name: 'VALINE', - synonyms: [], - formula: 'C5 H11 N O2', - formulaWeight: 117.15 - } -] -export const CommonChemicalComponentMap = new Map() -for (let i = 0, il = CommonChemicalComponents.length; i < il; ++i) { - CommonChemicalComponentMap.set(CommonChemicalComponents[i].id, CommonChemicalComponents[i]) -} \ No newline at end of file +export type ChemicalComponentMap = ReadonlyMap<string, ChemicalComponent> \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/utils/atomic-derived.ts b/src/mol-model/structure/model/properties/utils/atomic-derived.ts index c998a2513010e4ea9315d3202f148d33b982dfce..5dcb94da5b0f869eee578eca33f241d1846c31af 100644 --- a/src/mol-model/structure/model/properties/utils/atomic-derived.ts +++ b/src/mol-model/structure/model/properties/utils/atomic-derived.ts @@ -1,5 +1,5 @@ /** - * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2018-2019 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> */ @@ -8,22 +8,32 @@ import { AtomicData } from '../atomic'; import { ChemicalComponentMap } from '../chemical-component'; import { AtomicIndex, AtomicDerivedData } from '../atomic/hierarchy'; import { ElementIndex, ResidueIndex } from '../../indexing'; -import { MoleculeType } from '../../types'; +import { MoleculeType, getMoleculeType, getComponentType } from '../../types'; import { getAtomIdForAtomRole } from 'mol-model/structure/util'; export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemicalComponentMap: ChemicalComponentMap): AtomicDerivedData { - const { label_comp_id, _rowCount: n } = data.residues const traceElementIndex = new Uint32Array(n) const directionElementIndex = new Uint32Array(n) const moleculeType = new Uint8Array(n) + const moleculeTypeMap = new Map<string, MoleculeType>() + for (let i = 0; i < n; ++i) { const compId = label_comp_id.value(i) const chemCompMap = chemicalComponentMap const cc = chemCompMap.get(compId) - const molType = cc ? cc.moleculeType : MoleculeType.unknown + let molType: MoleculeType + if (cc) { + molType = cc.moleculeType + } else if (moleculeTypeMap.has(compId)){ + molType = moleculeTypeMap.get(compId)! + } else { + molType = getMoleculeType(getComponentType(compId), compId) + // TODO if unknown molecule type, use atom names to guess molecule type + moleculeTypeMap.set(compId, molType) + } moleculeType[i] = molType const traceAtomId = getAtomIdForAtomRole(molType, 'trace') @@ -33,7 +43,6 @@ export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemi directionElementIndex[i] = index.findAtomOnResidue(i as ResidueIndex, directionAtomId) } - return { residue: { traceElementIndex: traceElementIndex as unknown as ArrayLike<ElementIndex>, diff --git a/src/mol-model/structure/model/properties/utils/atomic-ranges.ts b/src/mol-model/structure/model/properties/utils/atomic-ranges.ts index 7853fb37776f4f9abc625c332e08b55883357a20..ed6816364115f26aa64dbf6a981bf35b3a8c751d 100644 --- a/src/mol-model/structure/model/properties/utils/atomic-ranges.ts +++ b/src/mol-model/structure/model/properties/utils/atomic-ranges.ts @@ -8,7 +8,6 @@ import { AtomicSegments } from '../atomic'; import { AtomicData, AtomicRanges } from '../atomic/hierarchy'; import { Segmentation, Interval } from 'mol-data/int'; import SortedRanges from 'mol-data/int/sorted-ranges'; -import { ChemicalComponentMap } from '../chemical-component'; import { MoleculeType, isPolymer } from '../../types'; import { ElementIndex, ResidueIndex } from '../../indexing'; import { getAtomIdForAtomRole } from '../../../util'; @@ -17,11 +16,6 @@ import { Vec3 } from 'mol-math/linear-algebra'; // TODO add gaps at the ends of the chains by comparing to the polymer sequence data -function getMoleculeType(compId: string, chemicalComponentMap: ChemicalComponentMap) { - const cc = chemicalComponentMap.get(compId) - return cc ? cc.moleculeType : MoleculeType.unknown -} - function getElementIndexForAtomId(rI: ResidueIndex, atomId: string, data: AtomicData, segments: AtomicSegments): ElementIndex { const { offsets } = segments.residueAtomSegments const { label_atom_id } = data.atoms @@ -31,10 +25,9 @@ function getElementIndexForAtomId(rI: ResidueIndex, atomId: string, data: Atomic return offsets[rI] } -function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, chemicalComponentMap: ChemicalComponentMap) { - const { label_comp_id } = data.residues - const mtStart = getMoleculeType(label_comp_id.value(riStart), chemicalComponentMap) - const mtEnd = getMoleculeType(label_comp_id.value(riEnd), chemicalComponentMap) +function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, moleculeType: ArrayLike<MoleculeType>) { + const mtStart = moleculeType[riStart] + const mtEnd = moleculeType[riEnd] if (!isPolymer(mtStart) || !isPolymer(mtEnd)) return false const startId = getAtomIdForAtomRole(mtStart, 'backboneStart') @@ -49,13 +42,13 @@ function areBackboneConnected(riStart: ResidueIndex, riEnd: ResidueIndex, data: return Vec3.distance(pStart, pEnd) < 10 } -export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, chemicalComponentMap: ChemicalComponentMap): AtomicRanges { +export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conformation: AtomicConformation, moleculeType: ArrayLike<MoleculeType>): AtomicRanges { const polymerRanges: number[] = [] const gapRanges: number[] = [] const cyclicPolymerMap = new Map<ResidueIndex, ResidueIndex>() const chainIt = Segmentation.transientSegments(segments.chainAtomSegments, Interval.ofBounds(0, data.atoms._rowCount)) const residueIt = Segmentation.transientSegments(segments.residueAtomSegments, Interval.ofBounds(0, data.atoms._rowCount)) - const { label_seq_id, label_comp_id } = data.residues + const { label_seq_id } = data.residues let prevSeqId: number let prevStart: number @@ -72,7 +65,7 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf const riStart = segments.residueAtomSegments.index[chainSegment.start] const riEnd = segments.residueAtomSegments.index[chainSegment.end - 1] - if (areBackboneConnected(riStart, riEnd, data, segments, conformation, chemicalComponentMap)) { + if (areBackboneConnected(riStart, riEnd, data, segments, conformation, moleculeType)) { cyclicPolymerMap.set(riStart, riEnd) cyclicPolymerMap.set(riEnd, riStart) } @@ -80,9 +73,8 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf while (residueIt.hasNext) { const residueSegment = residueIt.move(); const residueIndex = residueSegment.index - const moleculeType = getMoleculeType(label_comp_id.value(residueIndex), chemicalComponentMap) const seqId = label_seq_id.value(residueIndex) - if (isPolymer(moleculeType)) { + if (isPolymer(moleculeType[residueIndex])) { if (startIndex !== -1) { if (seqId !== prevSeqId + 1) { polymerRanges.push(startIndex, prevEnd - 1) @@ -93,7 +85,7 @@ export function getAtomicRanges(data: AtomicData, segments: AtomicSegments, conf } else { const riStart = segments.residueAtomSegments.index[residueSegment.start] const riEnd = segments.residueAtomSegments.index[prevEnd - 1] - if (!areBackboneConnected(riStart, riEnd, data, segments, conformation, chemicalComponentMap)) { + if (!areBackboneConnected(riStart, riEnd, data, segments, conformation, moleculeType)) { polymerRanges.push(startIndex, prevEnd - 1) startIndex = residueSegment.start } diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index 4f2b7d525341c3162d4ae59d58e6a2d1964604e2..20d44428bb70f4bbfcc9d7d7ea7990a172e3a4cd 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -6,6 +6,7 @@ */ import BitFlags from 'mol-util/bit-flags' +import { SaccharideCompIdMap } from '../structure/carbohydrates/constants'; const _esCache = (function () { const cache = Object.create(null); @@ -158,9 +159,32 @@ export const WaterNames = [ 'SOL', 'WAT', 'HOH', 'H2O', 'W', 'DOD', 'D3O', 'TIP3', 'TIP4', 'SPC' ] -export const ExtraSaccharideNames = [ - 'MLR' -] +export const AminoAcidOneLetterCodeMap = { + 'HIS': 'H', + 'ARG': 'R', + 'LYS': 'K', + 'ILE': 'I', + 'PHE': 'F', + 'LEU': 'L', + 'TRP': 'W', + 'ALA': 'A', + 'MET': 'M', + 'PRO': 'P', + 'CYS': 'C', + 'ASN': 'N', + 'VAL': 'V', + 'GLY': 'G', + 'SER': 'S', + 'GLN': 'Q', + 'TYR': 'Y', + 'ASP': 'D', + 'GLU': 'E', + 'THR': 'T', + + 'SEC': 'U', // as per IUPAC definition + 'PYL': 'O', // as per IUPAC definition +} +export const AminoAcidNames = Object.keys(AminoAcidOneLetterCodeMap) export const RnaBaseNames = [ 'A', 'C', 'T', 'G', 'I', 'U' ] export const DnaBaseNames = [ 'DA', 'DC', 'DT', 'DG', 'DI', 'DU' ] @@ -184,7 +208,7 @@ export function getMoleculeType(compType: string, compId: string) { return MoleculeType.RNA } else if (DNAComponentTypeNames.includes(compType)) { return MoleculeType.DNA - } else if (SaccharideComponentTypeNames.includes(compType) || ExtraSaccharideNames.includes(compId)) { + } else if (SaccharideComponentTypeNames.includes(compType)) { return MoleculeType.saccharide } else if (WaterNames.includes(compId)) { return MoleculeType.water @@ -197,6 +221,21 @@ export function getMoleculeType(compType: string, compId: string) { } } +export function getComponentType(compId: string) { + compId = compId.toUpperCase() + if (AminoAcidNames.includes(compId)) { + return 'peptide linking' + } else if (RnaBaseNames.includes(compId)) { + return 'RNA linking' + } else if (DnaBaseNames.includes(compId)) { + return 'DNA linking' + } else if (SaccharideCompIdMap.has(compId)) { + return 'saccharide' + } else { + return 'other' + } +} + export function isPolymer(moleculeType: MoleculeType) { return moleculeType === MoleculeType.protein || moleculeType === MoleculeType.DNA || moleculeType === MoleculeType.RNA || moleculeType === MoleculeType.PNA }