diff --git a/src/mol-model-formats/structure/pdb/entity.ts b/src/mol-model-formats/structure/pdb/entity.ts new file mode 100644 index 0000000000000000000000000000000000000000..99cd0a1e54a9532d2774256e536a048707c27080 --- /dev/null +++ b/src/mol-model-formats/structure/pdb/entity.ts @@ -0,0 +1,135 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Tokens } from '../../../mol-io/reader/common/text/tokenizer'; +import { CifCategory, CifField } from '../../../mol-io/reader/cif'; +import { WaterNames } from '../../../mol-model/structure/model/types'; + +const Spec = { + 'MOL_ID': '', + 'MOLECULE': '', + 'CHAIN': '', + 'FRAGMENT': '', + 'SYNONYM': '', + 'EC': '', + 'ENGINEERED': '', + 'MUTATION': '', + 'OTHER_DETAILS': '' +} +type Spec = keyof typeof Spec + +type Compound = { chains: string[], name: string } + +export function parseCmpnd(lines: Tokens, lineStart: number, lineEnd: number) { + const getLine = (n: number) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]) + + let currentSpec: Spec | undefined + let currentCompound: Compound = { chains: [], name: '' } + const Compounds: Compound[] = [] + + for (let i = lineStart; i < lineEnd; i++) { + let line = getLine(i) + // COLUMNS DATA TYPE FIELD DEFINITION + // ---------------------------------------------------------------------------------- + // 1 - 6 Record name "COMPND" + // 8 - 10 Continuation continuation Allows concatenation of multiple records. + // 11 - 80 Specification compound Description of the molecular components. + // list + + const cmpnd = line.substr(10, 70).trim() + const cmpndSpecEnd = cmpnd.indexOf(':') + const cmpndSpec = cmpnd.substring(0, cmpndSpecEnd) + + let value: string + + if (cmpndSpec in Spec) { + currentSpec = cmpndSpec as Spec + value = cmpnd.substring(cmpndSpecEnd + 2) + } else { + value = cmpnd + } + value = value.replace(/;$/, '') + + if (currentSpec === 'MOL_ID') { + currentCompound = { + chains: [], + name: '' + } + Compounds.push(currentCompound) + } else if (currentSpec === 'MOLECULE') { + if (currentCompound.name) currentCompound.name += ' ' + currentCompound.name += value + } else if (currentSpec === 'CHAIN') { + Array.prototype.push.apply(currentCompound.chains, value.split(/\s*,\s*/)) + } + } + + return Compounds +} + +export class EntityBuilder { + private count = 0 + private ids: string[] = [] + private types: string[] = [] + private descriptions: string[] = [] + + private compoundsMap = new Map<string, string>() + private heteroMap = new Map<string, string>() + private chainMap = new Map<string, string>() + private waterId?: string + + private add(type: string, description: string) { + this.count += 1 + this.ids.push(`${this.count}`) + this.types.push(type) + this.descriptions.push(description) + } + + getEntityId(residueName: string, chainId: string, isHet: boolean): string { + if (isHet) { + if (WaterNames.has(residueName)) { + if (this.waterId === undefined) { + this.add('water', 'Water') + this.waterId = `${this.count}` + } + return this.waterId; + } else { + if (!this.heteroMap.has(residueName)) { + this.add('non-polymer', residueName) + this.heteroMap.set(residueName, `${this.count}`) + } + return this.heteroMap.get(residueName)! + } + } else if (this.compoundsMap.has(chainId)) { + return this.compoundsMap.get(chainId)! + } else { + if (!this.chainMap.has(chainId)) { + this.add('polymer', chainId) + this.chainMap.set(chainId, `${this.count}`) + } + return this.chainMap.get(chainId)! + } + } + + getEntityCategory() { + const entity = { + id: CifField.ofStrings(this.ids), + type: CifField.ofStrings(this.types), + pdbx_description: CifField.ofStrings(this.descriptions) + } + return CifCategory.ofFields('entity', entity) + } + + setCompounds(compounds: Compound[]) { + for (let i = 0, il = compounds.length; i < il; ++i) { + const { chains, name } = compounds[i] + this.add('polymer', name) + for (let j = 0, jl = chains.length; j < jl; ++j) { + this.compoundsMap.set(chains[j], `${this.count}`) + } + } + } +} \ No newline at end of file diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 362f8c26b2529edbbc8daa281f6a9a4065559314..a7e1aaf826c1508c51685674205e8b4fdd2604f4 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -11,16 +11,9 @@ import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif'; import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer'; import { PdbFile } from '../../../mol-io/reader/pdb/schema'; import { parseCryst1, parseRemark350, parseMtrix } from './assembly'; -import { WaterNames } from '../../../mol-model/structure/model/types'; import { parseHelix, parseSheet } from './secondary-structure'; import { guessElementSymbolTokens } from '../util'; - -function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { - return { - id: CifField.ofStrings(['1', '2', '3']), - type: CifField.ofStrings(['polymer', 'non-polymer', 'water']) - } -} +import { parseCmpnd, EntityBuilder } from './entity'; type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never function atom_site_template(data: string, count: number) { @@ -82,15 +75,7 @@ function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_s }; } -function getEntityId(residueName: string, isHet: boolean) { - if (isHet) { - if (WaterNames.has(residueName)) return '3'; - return '2'; - } - return '1'; -} - -function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) { +function addAtom(sites: AtomSiteTemplate, entityBuilder: EntityBuilder, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) { const { data: str } = data; const length = e - s; @@ -122,6 +107,7 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num // 22 Character Chain identifier. TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22); + const chainId = str.substring(s + 21, s + 22); // 23 - 26 Integer Residue sequence number. // TODO: support HEX @@ -169,7 +155,7 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16) } - sites.label_entity_id[sites.index] = getEntityId(residueName, isHet); + sites.label_entity_id[sites.index] = entityBuilder.getEntityId(residueName, chainId, isHet); sites.pdbx_PDB_model_num[sites.index] = model; sites.index++; @@ -195,7 +181,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { } const atom_site = atom_site_template(data, atomCount); - + const entityBuilder = new EntityBuilder(); const helperCategories: CifCategory[] = []; let modelNum = 0, modelStr = ''; @@ -206,19 +192,28 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { case 'A': if (!substringStartsWith(data, s, e, 'ATOM ')) continue; if (!modelNum) { modelNum++; modelStr = '' + modelNum; } - addAtom(atom_site, modelStr, tokenizer, s, e, false); + addAtom(atom_site, entityBuilder, modelStr, tokenizer, s, e, false); break; case 'C': if (substringStartsWith(data, s, e, 'CRYST1')) { helperCategories.push(...parseCryst1(pdb.id || '?', data.substring(s, e))); + } else if (substringStartsWith(data, s, e, 'CONNECT')) { + // TODO: CONNECT records => struct_conn + } else if (substringStartsWith(data, s, e, 'COMPND')) { + let j = i + 1; + while (true) { + s = indices[2 * j]; e = indices[2 * j + 1]; + if (!substringStartsWith(data, s, e, 'COMPND')) break; + j++; + } + entityBuilder.setCompounds(parseCmpnd(lines, i, j)) + i = j - 1; } - // TODO CONNECT records => struct_conn - // TODO COMPND records => entity break; case 'H': if (substringStartsWith(data, s, e, 'HETATM')) { if (!modelNum) { modelNum++; modelStr = '' + modelNum; } - addAtom(atom_site, modelStr, tokenizer, s, e, true); + addAtom(atom_site, entityBuilder, modelStr, tokenizer, s, e, true); } else if (substringStartsWith(data, s, e, 'HELIX')) { let j = i + 1; while (true) { @@ -229,7 +224,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { helperCategories.push(parseHelix(lines, i, j)); i = j - 1; } - // TODO HETNAM records => chem_comp (at least partially, needs to be completed with common bases and amino acids) + // TODO: HETNAM records => chem_comp (at least partially, needs to be completed with common bases and amino acids) break; case 'M': if (substringStartsWith(data, s, e, 'MODEL ')) { @@ -246,10 +241,10 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { helperCategories.push(...parseMtrix(lines, i, j)); i = j - 1; } - // TODO MODRES records => pdbx_struct_mod_residue + // TODO: MODRES records => pdbx_struct_mod_residue break; case 'O': - // TODO ORIGX record => cif.database_PDB_matrix.origx, cif.database_PDB_matrix.origx_vector + // TODO: ORIGX record => cif.database_PDB_matrix.origx, cif.database_PDB_matrix.origx_vector break; case 'R': if (substringStartsWith(data, s, e, 'REMARK 350')) { @@ -274,13 +269,13 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { helperCategories.push(parseSheet(lines, i, j)); i = j - 1; } - // TODO SCALE record => cif.atom_sites.fract_transf_matrix, cif.atom_sites.fract_transf_vector + // TODO: SCALE record => cif.atom_sites.fract_transf_matrix, cif.atom_sites.fract_transf_vector break; } } const categories = { - entity: CifCategory.ofFields('entity', _entity()), + entity: entityBuilder.getEntityCategory(), atom_site: CifCategory.ofFields('atom_site', _atom_site(atom_site)) } as any; diff --git a/src/mol-plugin/ui/sequence/polymer.ts b/src/mol-plugin/ui/sequence/polymer.ts index b86f783defd3b14cbc4d22314d297a667cd93db5..14e8bf64d921a55fe673ebb3e2188ea01587388d 100644 --- a/src/mol-plugin/ui/sequence/polymer.ts +++ b/src/mol-plugin/ui/sequence/polymer.ts @@ -39,10 +39,11 @@ export class PolymerSequenceWrapper extends SequenceWrapper<StructureUnit> { if (StructureElement.isLoci(loci)) { if (!Structure.areParentsEqual(loci.structure, structure)) return false + const { offset } = this.sequence for (const e of loci.elements) { if (e.unit.id === unit.id) { OrderedSet.forEach(e.indices, v => { - if (apply(getSeqIndices(e.unit, e.unit.elements[v]))) changed = true + if (apply(getSeqIndices(e.unit, e.unit.elements[v], offset))) changed = true }) } } @@ -99,22 +100,22 @@ function createResidueQuery(unitId: number, label_seq_id: number) { }); } -function getSeqIndices(unit: Unit, element: ElementIndex): Interval { +function getSeqIndices(unit: Unit, element: ElementIndex, offset: number): Interval { const { model } = unit switch (unit.kind) { case Unit.Kind.Atomic: const residueIndex = model.atomicHierarchy.residueAtomSegments.index[element] const seqId = model.atomicHierarchy.residues.label_seq_id.value(residueIndex) - return Interval.ofSingleton(seqId - 1) + return Interval.ofSingleton(seqId - 1 - offset) case Unit.Kind.Spheres: return Interval.ofRange( - model.coarseHierarchy.spheres.seq_id_begin.value(element) - 1, - model.coarseHierarchy.spheres.seq_id_end.value(element) - 1 + model.coarseHierarchy.spheres.seq_id_begin.value(element) - 1 - offset, + model.coarseHierarchy.spheres.seq_id_end.value(element) - 1 - offset ) case Unit.Kind.Gaussians: return Interval.ofRange( - model.coarseHierarchy.gaussians.seq_id_begin.value(element) - 1, - model.coarseHierarchy.gaussians.seq_id_end.value(element) - 1 + model.coarseHierarchy.gaussians.seq_id_begin.value(element) - 1 - offset, + model.coarseHierarchy.gaussians.seq_id_end.value(element) - 1 - offset ) } } \ No newline at end of file