diff --git a/src/apps/structure-info/model.ts b/src/apps/structure-info/model.ts index 0fdb0d735ab1ac460b42fe91892aa3a35dce24fd..8d06ce4f5d3190c6ccd3706a90f48a8ec00cbb32 100644 --- a/src/apps/structure-info/model.ts +++ b/src/apps/structure-info/model.ts @@ -14,7 +14,7 @@ import { Model, Structure, StructureElement, Unit, StructureProperties, UnitRing import { OrderedSet } from 'mol-data/int'; import { openCif, downloadCif } from './helpers'; import { Vec3 } from 'mol-math/linear-algebra'; -import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif'; +import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif'; import { ModelFormat } from 'mol-model-parsers/structure/format'; @@ -200,7 +200,7 @@ export function printModelStats(models: ReadonlyArray<Model>) { } export async function getModelsAndStructure(frame: CifFrame) { - const models = await parse_mmCIF(ModelFormat.mmCIF(frame)).run(); + const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(frame)).run(); const structure = Structure.ofModel(models[0]); return { models, structure }; } diff --git a/src/mol-model-parsers/structure/mmcif.ts b/src/mol-model-parsers/structure/mmcif.ts index bc10993ac5114966373df6aa3b722b3256c43dac..c3ea2870b77de63aa2218cb8db0e4604eeb52f1b 100644 --- a/src/mol-model-parsers/structure/mmcif.ts +++ b/src/mol-model-parsers/structure/mmcif.ts @@ -5,304 +5,12 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -import { Column, Table } from 'mol-data/db'; -import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; -import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry'; -import { Tensor, Vec3 } from 'mol-math/linear-algebra'; -import { Task, RuntimeContext } from 'mol-task'; -import UUID from 'mol-util/uuid'; import { Model } from 'mol-model/structure/model/model'; -import { Entities } from 'mol-model/structure/model/properties/common'; -import { CustomProperties } from 'mol-model/structure/model/properties/custom'; -import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry'; -import { createAssemblies } from './mmcif/assembly'; -import { getAtomicHierarchyAndConformation } from './mmcif/atomic'; -import { ComponentBond } from './mmcif/bonds'; -import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './mmcif/ihm'; -import { getSecondaryStructureMmCif } from './mmcif/secondary-structure'; -import { getSequence } from './mmcif/sequence'; -import { sortAtomSite } from './mmcif/sort'; -import { StructConn } from './mmcif/bonds/struct_conn'; -import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component'; -import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types'; +import { Task } from 'mol-task'; import { ModelFormat } from './format'; -import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants'; +import { _parse_mmCif } from './mmcif/parser'; import mmCIF_Format = ModelFormat.mmCIF -export function parse_mmCIF(format: mmCIF_Format): Task<Model.Trajectory> { - const formatData = getFormatData(format) - return Task.create('Create mmCIF Model', async ctx => { - const isIHM = format.data.ihm_model_list._rowCount > 0; - return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData); - }); -} - -type AtomSite = mmCIF_Database['atom_site'] - -function getSymmetry(format: mmCIF_Format): ModelSymmetry { - const assemblies = createAssemblies(format); - const spacegroup = getSpacegroup(format); - const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup); - return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) }; -} - -function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) { - const { atom_sites } = format.data; - if (atom_sites._rowCount === 0) return false; - // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix - return false; -} - -function getSpacegroup(format: mmCIF_Format): Spacegroup { - const { symmetry, cell } = format.data; - if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1; - const groupName = symmetry['space_group_name_H-M'].value(0); - const spaceCell = SpacegroupCell.create(groupName, - Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)), - Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180)); - - return Spacegroup.create(spaceCell); -} - -function getNcsOperators(format: mmCIF_Format) { - const { struct_ncs_oper } = format.data; - if (struct_ncs_oper._rowCount === 0) return void 0; - const { id, matrix, vector } = struct_ncs_oper; - - const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space; - - const opers: SymmetryOperator[] = []; - for (let i = 0; i < struct_ncs_oper._rowCount; i++) { - const m = Tensor.toMat3(matrixSpace, matrix.value(i)); - const v = Tensor.toVec3(vectorSpace, vector.value(i)); - if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue; - opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v); - } - return opers; -} -function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] { - const data = format.data.pdbx_struct_mod_residue; - const parentId = new Map<string, string>(); - const details = new Map<string, string>(); - const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id; - const parent_id = data.parent_comp_id, details_data = data.details; - - for (let i = 0; i < data._rowCount; i++) { - const id = comp_id.value(i); - parentId.set(id, parent_id.value(i)); - details.set(id, details_data.value(i)); - } - - return { parentId, details }; -} - -function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap { - const map = new Map<string, ChemicalComponent>(); - const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp - for (let i = 0, il = id.rowCount; i < il; ++i) { - const _id = id.value(i) - const _type = type.value(i) - const cc: ChemicalComponent = { - id: _id, - type: ComponentType[_type], - moleculeType: getMoleculeType(_type, _id), - name: name.value(i), - synonyms: pdbx_synonyms.value(i), - formula: formula.value(i), - formulaWeight: formula_weight.value(i), - } - map.set(_id, cc) - } - return map -} - -function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { - const map = new Map<string, SaccharideComponent>(); - const { pdbx_chem_comp_identifier } = format.data - if (pdbx_chem_comp_identifier._rowCount > 0) { - const { comp_id, type, identifier } = pdbx_chem_comp_identifier - for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) { - if (type.value(i) === 'SNFG CARB SYMBOL') { - const snfgName = identifier.value(i) - const saccharideComp = SaccharidesSnfgMap.get(snfgName) - if (saccharideComp) { - map.set(comp_id.value(i), saccharideComp) - } else { - console.warn(`Unknown SNFG name '${snfgName}'`) - } - } - } - } else if (format.data.chem_comp._rowCount > 0) { - const { id, type } = format.data.chem_comp - for (let i = 0, il = id.rowCount; i < il; ++i) { - const _id = id.value(i) - const _type = type.value(i) - if (SaccharideCompIdMap.has(_id)) { - map.set(_id, SaccharideCompIdMap.get(_id)!) - } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) { - map.set(_id, UnknownSaccharideComponent) - } - } - } else { - // TODO check if present in format.data.atom_site.label_comp_id - SaccharideCompIdMap.forEach((v, k) => map.set(k, v)) - } - return map -} - -export interface FormatData { - modifiedResidues: Model['properties']['modifiedResidues'] - chemicalComponentMap: Model['properties']['chemicalComponentMap'] - saccharideComponentMap: Model['properties']['saccharideComponentMap'] -} - -function getFormatData(format: mmCIF_Format): FormatData { - return { - modifiedResidues: getModifiedResidueNameMap(format), - chemicalComponentMap: getChemicalComponentMap(format), - saccharideComponentMap: getSaccharideComponentMap(format) - } -} - -function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model { - const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous); - if (previous && atomic.sameAsPrevious) { - return { - ...previous, - id: UUID.create22(), - modelNum: atom_site.pdbx_PDB_model_num.value(0), - atomicConformation: atomic.conformation, - _dynamicPropertyData: Object.create(null) - }; - } - - const coarse = EmptyIHMCoarse; - const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present - ? format.data.entry.id.value(0) - : format.data._name; - - return { - id: UUID.create22(), - label, - sourceData: format, - modelNum: atom_site.pdbx_PDB_model_num.value(0), - entities, - symmetry: getSymmetry(format), - sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId), - atomicHierarchy: atomic.hierarchy, - atomicConformation: atomic.conformation, - coarseHierarchy: coarse.hierarchy, - coarseConformation: coarse.conformation, - properties: { - secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), - ...formatData - }, - customProperties: new CustomProperties(), - _staticPropertyData: Object.create(null), - _dynamicPropertyData: Object.create(null) - }; -} - -function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model { - const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData); - const coarse = getIHMCoarse(data, formatData); - - return { - id: UUID.create22(), - label: data.model_name, - sourceData: format, - modelNum: data.model_id, - entities: data.entities, - symmetry: getSymmetry(format), - sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId), - atomicHierarchy: atomic.hierarchy, - atomicConformation: atomic.conformation, - coarseHierarchy: coarse.hierarchy, - coarseConformation: coarse.conformation, - properties: { - secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), - ...formatData - }, - customProperties: new CustomProperties(), - _staticPropertyData: Object.create(null), - _dynamicPropertyData: Object.create(null) - }; -} - -function attachProps(model: Model) { - ComponentBond.attachFromMmCif(model); - StructConn.attachFromMmCif(model); -} - -function findModelEnd(num: Column<number>, startIndex: number) { - const rowCount = num.rowCount; - if (!num.isDefined) return rowCount; - let endIndex = startIndex + 1; - while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++; - return endIndex; -} - -async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { - const atomCount = format.data.atom_site._rowCount; - const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; - - const models: Model[] = []; - let modelStart = 0; - while (modelStart < atomCount) { - const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart); - const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); - const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); - attachProps(model); - models.push(model); - modelStart = modelEnd; - } - return models; -} - -function splitTable<T extends Table<any>>(table: T, col: Column<number>) { - const ret = new Map<number, T>() - const rowCount = table._rowCount; - let modelStart = 0; - while (modelStart < rowCount) { - const modelEnd = findModelEnd(col, modelStart); - const id = col.value(modelStart); - const window = Table.window(table, table._schema, modelStart, modelEnd) as T; - ret.set(id, window); - modelStart = modelEnd; - } - return ret; -} - -async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { - const { ihm_model_list } = format.data; - const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; - - if (!format.data.atom_site.ihm_model_id.isDefined) { - throw new Error('expected _atom_site.ihm_model_id to be defined') - } - - // TODO: will IHM require sorting or will we trust it? - const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id); - const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id); - const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id); - - const models: Model[] = []; - - const { model_id, model_name } = ihm_model_list; - for (let i = 0; i < ihm_model_list._rowCount; i++) { - const id = model_id.value(i); - const data: IHMData = { - model_id: id, - model_name: model_name.value(i), - entities: entities, - atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0), - ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), - ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) - }; - const model = createModelIHM(format, data, formatData); - attachProps(model); - models.push(model); - } - - return models; +export function trajecotryFromMmCIF(format: mmCIF_Format): Task<Model.Trajectory> { + return Task.create('Create mmCIF Model', ctx => _parse_mmCif(format, ctx)); } \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/atomic.ts b/src/mol-model-parsers/structure/mmcif/atomic.ts index 259933942c37b3615eae0ff08ae789b3ade7775e..7c4fbfbbfba1b8a266302d7bfbb2627186153ead 100644 --- a/src/mol-model-parsers/structure/mmcif/atomic.ts +++ b/src/mol-model-parsers/structure/mmcif/atomic.ts @@ -15,10 +15,10 @@ import { getAtomicIndex } from 'mol-model/structure/model/properties/utils/atomi import { ElementSymbol } from 'mol-model/structure/model/types'; import { Entities } from 'mol-model/structure/model/properties/common'; import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges'; -import { FormatData } from '../mmcif'; import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived'; import { ModelFormat } from '../format'; import mmCIF_Format = ModelFormat.mmCIF +import { FormatData } from './parser'; type AtomSite = mmCIF_Database['atom_site'] diff --git a/src/mol-model-parsers/structure/mmcif/ihm.ts b/src/mol-model-parsers/structure/mmcif/ihm.ts index 7ca51db554a8c12a6229e147e971684a451f66a1..731af9e3af83de9784c51bba90da32d0c15de777 100644 --- a/src/mol-model-parsers/structure/mmcif/ihm.ts +++ b/src/mol-model-parsers/structure/mmcif/ihm.ts @@ -14,7 +14,7 @@ import { Segmentation, Interval } from 'mol-data/int'; import { Mat3, Tensor } from 'mol-math/linear-algebra'; import { ElementIndex, ChainIndex } from 'mol-model/structure/model/indexing'; import { getCoarseRanges } from 'mol-model/structure/model/properties/utils/coarse-ranges'; -import { FormatData } from '../mmcif'; +import { FormatData } from './parser'; export interface IHMData { model_id: number, diff --git a/src/mol-model-parsers/structure/mmcif/parser.ts b/src/mol-model-parsers/structure/mmcif/parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..a51afcb2787881faed93d3d3e48ffb94a744f69c --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/parser.ts @@ -0,0 +1,306 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Column, Table } from 'mol-data/db'; +import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry'; +import { Tensor, Vec3 } from 'mol-math/linear-algebra'; +import { RuntimeContext } from 'mol-task'; +import UUID from 'mol-util/uuid'; +import { Model } from 'mol-model/structure/model/model'; +import { Entities } from 'mol-model/structure/model/properties/common'; +import { CustomProperties } from 'mol-model/structure/model/properties/custom'; +import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry'; +import { createAssemblies } from './assembly'; +import { getAtomicHierarchyAndConformation } from './atomic'; +import { ComponentBond } from './bonds'; +import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './ihm'; +import { getSecondaryStructureMmCif } from './secondary-structure'; +import { getSequence } from './sequence'; +import { sortAtomSite } from './sort'; +import { StructConn } from './bonds/struct_conn'; +import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component'; +import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types'; +import { ModelFormat } from '../format'; +import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants'; +import mmCIF_Format = ModelFormat.mmCIF + +export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) { + const formatData = getFormatData(format) + const isIHM = format.data.ihm_model_list._rowCount > 0; + return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData); +} + +type AtomSite = mmCIF_Database['atom_site'] + +function getSymmetry(format: mmCIF_Format): ModelSymmetry { + const assemblies = createAssemblies(format); + const spacegroup = getSpacegroup(format); + const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup); + return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) }; +} + +function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) { + const { atom_sites } = format.data; + if (atom_sites._rowCount === 0) return false; + // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix + return false; +} + +function getSpacegroup(format: mmCIF_Format): Spacegroup { + const { symmetry, cell } = format.data; + if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1; + const groupName = symmetry['space_group_name_H-M'].value(0); + const spaceCell = SpacegroupCell.create(groupName, + Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)), + Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180)); + + return Spacegroup.create(spaceCell); +} + +function getNcsOperators(format: mmCIF_Format) { + const { struct_ncs_oper } = format.data; + if (struct_ncs_oper._rowCount === 0) return void 0; + const { id, matrix, vector } = struct_ncs_oper; + + const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space; + + const opers: SymmetryOperator[] = []; + for (let i = 0; i < struct_ncs_oper._rowCount; i++) { + const m = Tensor.toMat3(matrixSpace, matrix.value(i)); + const v = Tensor.toVec3(vectorSpace, vector.value(i)); + if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue; + opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v); + } + return opers; +} +function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] { + const data = format.data.pdbx_struct_mod_residue; + const parentId = new Map<string, string>(); + const details = new Map<string, string>(); + const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id; + const parent_id = data.parent_comp_id, details_data = data.details; + + for (let i = 0; i < data._rowCount; i++) { + const id = comp_id.value(i); + parentId.set(id, parent_id.value(i)); + details.set(id, details_data.value(i)); + } + + return { parentId, details }; +} + +function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap { + const map = new Map<string, ChemicalComponent>(); + const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp + for (let i = 0, il = id.rowCount; i < il; ++i) { + const _id = id.value(i) + const _type = type.value(i) + const cc: ChemicalComponent = { + id: _id, + type: ComponentType[_type], + moleculeType: getMoleculeType(_type, _id), + name: name.value(i), + synonyms: pdbx_synonyms.value(i), + formula: formula.value(i), + formulaWeight: formula_weight.value(i), + } + map.set(_id, cc) + } + return map +} + +function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { + const map = new Map<string, SaccharideComponent>(); + const { pdbx_chem_comp_identifier } = format.data + if (pdbx_chem_comp_identifier._rowCount > 0) { + const { comp_id, type, identifier } = pdbx_chem_comp_identifier + for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) { + if (type.value(i) === 'SNFG CARB SYMBOL') { + const snfgName = identifier.value(i) + const saccharideComp = SaccharidesSnfgMap.get(snfgName) + if (saccharideComp) { + map.set(comp_id.value(i), saccharideComp) + } else { + console.warn(`Unknown SNFG name '${snfgName}'`) + } + } + } + } else if (format.data.chem_comp._rowCount > 0) { + const { id, type } = format.data.chem_comp + for (let i = 0, il = id.rowCount; i < il; ++i) { + const _id = id.value(i) + const _type = type.value(i) + if (SaccharideCompIdMap.has(_id)) { + map.set(_id, SaccharideCompIdMap.get(_id)!) + } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) { + map.set(_id, UnknownSaccharideComponent) + } + } + } else { + // TODO check if present in format.data.atom_site.label_comp_id + SaccharideCompIdMap.forEach((v, k) => map.set(k, v)) + } + return map +} + +export interface FormatData { + modifiedResidues: Model['properties']['modifiedResidues'] + chemicalComponentMap: Model['properties']['chemicalComponentMap'] + saccharideComponentMap: Model['properties']['saccharideComponentMap'] +} + +function getFormatData(format: mmCIF_Format): FormatData { + return { + modifiedResidues: getModifiedResidueNameMap(format), + chemicalComponentMap: getChemicalComponentMap(format), + saccharideComponentMap: getSaccharideComponentMap(format) + } +} + +function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model { + const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous); + if (previous && atomic.sameAsPrevious) { + return { + ...previous, + id: UUID.create22(), + modelNum: atom_site.pdbx_PDB_model_num.value(0), + atomicConformation: atomic.conformation, + _dynamicPropertyData: Object.create(null) + }; + } + + const coarse = EmptyIHMCoarse; + const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present + ? format.data.entry.id.value(0) + : format.data._name; + + return { + id: UUID.create22(), + label, + sourceData: format, + modelNum: atom_site.pdbx_PDB_model_num.value(0), + entities, + symmetry: getSymmetry(format), + sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + atomicHierarchy: atomic.hierarchy, + atomicConformation: atomic.conformation, + coarseHierarchy: coarse.hierarchy, + coarseConformation: coarse.conformation, + properties: { + secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), + ...formatData + }, + customProperties: new CustomProperties(), + _staticPropertyData: Object.create(null), + _dynamicPropertyData: Object.create(null) + }; +} + +function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model { + const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData); + const coarse = getIHMCoarse(data, formatData); + + return { + id: UUID.create22(), + label: data.model_name, + sourceData: format, + modelNum: data.model_id, + entities: data.entities, + symmetry: getSymmetry(format), + sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + atomicHierarchy: atomic.hierarchy, + atomicConformation: atomic.conformation, + coarseHierarchy: coarse.hierarchy, + coarseConformation: coarse.conformation, + properties: { + secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), + ...formatData + }, + customProperties: new CustomProperties(), + _staticPropertyData: Object.create(null), + _dynamicPropertyData: Object.create(null) + }; +} + +function attachProps(model: Model) { + ComponentBond.attachFromMmCif(model); + StructConn.attachFromMmCif(model); +} + +function findModelEnd(num: Column<number>, startIndex: number) { + const rowCount = num.rowCount; + if (!num.isDefined) return rowCount; + let endIndex = startIndex + 1; + while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++; + return endIndex; +} + +async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { + const atomCount = format.data.atom_site._rowCount; + const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; + + const models: Model[] = []; + let modelStart = 0; + while (modelStart < atomCount) { + const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart); + const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); + const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); + attachProps(model); + models.push(model); + modelStart = modelEnd; + } + return models; +} + +function splitTable<T extends Table<any>>(table: T, col: Column<number>) { + const ret = new Map<number, T>() + const rowCount = table._rowCount; + let modelStart = 0; + while (modelStart < rowCount) { + const modelEnd = findModelEnd(col, modelStart); + const id = col.value(modelStart); + const window = Table.window(table, table._schema, modelStart, modelEnd) as T; + ret.set(id, window); + modelStart = modelEnd; + } + return ret; +} + +async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { + const { ihm_model_list } = format.data; + const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; + + if (!format.data.atom_site.ihm_model_id.isDefined) { + throw new Error('expected _atom_site.ihm_model_id to be defined') + } + + // TODO: will IHM require sorting or will we trust it? + const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id); + const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id); + const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id); + + const models: Model[] = []; + + const { model_id, model_name } = ihm_model_list; + for (let i = 0; i < ihm_model_list._rowCount; i++) { + const id = model_id.value(i); + const data: IHMData = { + model_id: id, + model_name: model_name.value(i), + entities: entities, + atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0), + ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), + ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) + }; + const model = createModelIHM(format, data, formatData); + attachProps(model); + models.push(model); + } + + return models; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/pdb.ts b/src/mol-model-parsers/structure/pdb.ts new file mode 100644 index 0000000000000000000000000000000000000000..a8695876900d671bf514d1c8ec01872fdcfbae31 --- /dev/null +++ b/src/mol-model-parsers/structure/pdb.ts @@ -0,0 +1,21 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { PdbFile } from 'mol-io/reader/pdb/schema'; +import { pdbToMmCif } from './pdb/to-cif'; +import { Model } from 'mol-model/structure/model'; +import { Task } from 'mol-task'; +import { ModelFormat } from './format'; +import { _parse_mmCif } from './mmcif/parser'; + +export function trajectoryFromPDB(pdb: PdbFile): Task<Model.Trajectory> { + return Task.create('Parse PDB', async ctx => { + await ctx.update('Converting to mmCIF'); + const cif = await pdbToMmCif(pdb); + const format = ModelFormat.mmCIF(cif); + return _parse_mmCif(format, ctx); + }) +} diff --git a/src/mol-io/reader/pdb/to-cif.ts b/src/mol-model-parsers/structure/pdb/to-cif.ts similarity index 92% rename from src/mol-io/reader/pdb/to-cif.ts rename to src/mol-model-parsers/structure/pdb/to-cif.ts index c6862994be0c603ce22227635e62d292ec10c03f..799ab8c9a4ed7b7240a38c1134f9e2d6b30abe48 100644 --- a/src/mol-io/reader/pdb/to-cif.ts +++ b/src/mol-model-parsers/structure/pdb/to-cif.ts @@ -4,13 +4,11 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import { CifField, CifCategory } from '../cif'; -import { mmCIF_Schema } from '../cif/schema/mmcif'; -import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'; -import { PdbFile } from './schema'; -import { CifFile } from '../cif/data-model'; import { substringStartsWith } from 'mol-util/string'; -import { Task } from 'mol-task'; +import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif'; +import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer'; +import { PdbFile } from 'mol-io/reader/pdb/schema'; function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory { return { @@ -208,7 +206,7 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num sites.index++; } -async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> { +export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { const { lines } = pdb; const { data, indices } = lines; const tokenizer = Tokenizer(data); @@ -260,19 +258,8 @@ async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> { } return { - name: pdb.id, - blocks: [{ - saveFrames: [], - header: pdb.id || 'PDB', - categoryNames: Object.keys(categories), - categories - }] + header: pdb.id || 'PDB', + categoryNames: Object.keys(categories), + categories }; -} - -export function convertPDBtoMmCif(pdb: PdbFile): Task<CifFile> { - return Task.create('Convert PDB to mmCIF', async ctx => { - await ctx.update('Converting to mmCIF...'); - return pdbToMmCIF(pdb); - }); } \ No newline at end of file diff --git a/src/mol-plugin/state/actions/basic.ts b/src/mol-plugin/state/actions/basic.ts index 48c855fbcc57c4d8e3892d0715c743aee30c30b6..483e317e3e5edf2ee7ed63d51253082a772b520c 100644 --- a/src/mol-plugin/state/actions/basic.ts +++ b/src/mol-plugin/state/actions/basic.ts @@ -94,7 +94,7 @@ export const OpenStructure = StateAction.build({ function createModelTree(b: StateTreeBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, format: 'pdb' | 'cif' = 'cif') { const parsed = format === 'cif' ? b.apply(StateTransforms.Data.ParseCif).apply(StateTransforms.Model.TrajectoryFromMmCif) - : b.apply(StateTransforms.Data.ConvertPDBtoMmCif).apply(StateTransforms.Model.TrajectoryFromMmCif); + : b.apply(StateTransforms.Model.TrajectoryFromPDB); return parsed.apply(StateTransforms.Model.ModelFromTrajectory, { modelIndex: 0 }); } diff --git a/src/mol-plugin/state/transforms/data.ts b/src/mol-plugin/state/transforms/data.ts index c8395300b2d6b25a0e000220390ef33b51b0949f..59ed1b3a0d9e8e3eb3fb6fb615a4a3bfbb18b017 100644 --- a/src/mol-plugin/state/transforms/data.ts +++ b/src/mol-plugin/state/transforms/data.ts @@ -15,8 +15,6 @@ import { Transformer } from 'mol-state'; import { readFromFile } from 'mol-util/data-source'; import * as CCP4 from 'mol-io/reader/ccp4/parser' import * as DSN6 from 'mol-io/reader/dsn6/parser' -import { parsePDB } from 'mol-io/reader/pdb/parser'; -import { convertPDBtoMmCif } from 'mol-io/reader/pdb/to-cif'; export { Download } type Download = typeof Download @@ -97,24 +95,6 @@ const ParseCif = PluginStateTransform.BuiltIn({ } }); -export { ConvertPDBtoMmCif } -type ConvertPDBtoMmCif = typeof ConvertPDBtoMmCif -const ConvertPDBtoMmCif = PluginStateTransform.BuiltIn({ - name: 'convert-pdb-to-mmcif', - display: { name: 'Convert PDB string to mmCIF' }, - from: [SO.Data.String], - to: SO.Format.Cif -})({ - apply({ a }) { - return Task.create('Parse CIF', async ctx => { - const parsed = await parsePDB(a.data).runInContext(ctx); - if (parsed.isError) throw new Error(parsed.message); - const cif = await convertPDBtoMmCif(parsed.result).runInContext(ctx); - return new SO.Format.Cif(cif); - }); - } -}); - export { ParseCcp4 } type ParseCcp4 = typeof ParseCcp4 const ParseCcp4 = PluginStateTransform.BuiltIn({ diff --git a/src/mol-plugin/state/transforms/model.ts b/src/mol-plugin/state/transforms/model.ts index 7f1dedacf625f8ea13f13e0f0f744f719412520b..1d2a96e8c8bf1e0de221251c0d0cb0ad6a97cc1d 100644 --- a/src/mol-plugin/state/transforms/model.ts +++ b/src/mol-plugin/state/transforms/model.ts @@ -19,8 +19,10 @@ import { stringToWords } from 'mol-util/string'; import { volumeFromCcp4 } from 'mol-model/volume/formats/ccp4'; import { Vec3 } from 'mol-math/linear-algebra'; import { volumeFromDsn6 } from 'mol-model/volume/formats/dsn6'; -import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif'; +import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif'; import { ModelFormat } from 'mol-model-parsers/structure/format'; +import { parsePDB } from 'mol-io/reader/pdb/parser'; +import { trajectoryFromPDB } from 'mol-model-parsers/structure/pdb'; export { TrajectoryFromMmCif } type TrajectoryFromMmCif = typeof TrajectoryFromMmCif @@ -47,7 +49,7 @@ const TrajectoryFromMmCif = PluginStateTransform.BuiltIn({ const header = params.blockHeader || a.data.blocks[0].header; const block = a.data.blocks.find(b => b.header === header); if (!block) throw new Error(`Data block '${[header]}' not found.`); - const models = await parse_mmCIF(ModelFormat.mmCIF(block)).runInContext(ctx); + const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(block)).runInContext(ctx); if (models.length === 0) throw new Error('No models found.'); const props = { label: models[0].label, description: `${models.length} model${models.length === 1 ? '' : 's'}` }; return new SO.Molecule.Trajectory(models, props); @@ -55,6 +57,27 @@ const TrajectoryFromMmCif = PluginStateTransform.BuiltIn({ } }); + +export { TrajectoryFromPDB } +type TrajectoryFromPDB = typeof TrajectoryFromPDB +const TrajectoryFromPDB = PluginStateTransform.BuiltIn({ + name: 'trajectory-from-pdb', + display: { name: 'Parse PDB string and create trajectory' }, + from: [SO.Data.String], + to: SO.Molecule.Trajectory +})({ + apply({ a }) { + return Task.create('Parse PDB', async ctx => { + const parsed = await parsePDB(a.data).runInContext(ctx); + if (parsed.isError) throw new Error(parsed.message); + const models = await trajectoryFromPDB(parsed.result).runInContext(ctx); + const props = { label: models[0].label, description: `${models.length} model${models.length === 1 ? '' : 's'}` }; + return new SO.Molecule.Trajectory(models, props); + }); + } +}); + + export { ModelFromTrajectory } const plus1 = (v: number) => v + 1, minus1 = (v: number) => v - 1; type ModelFromTrajectory = typeof ModelFromTrajectory diff --git a/src/perf-tests/lookup3d.ts b/src/perf-tests/lookup3d.ts index b74a2d4bbea45f2986085e1701ab3a2358d593e8..fb75917fc0d9299310e6c4c79ef5c673f1cd03cd 100644 --- a/src/perf-tests/lookup3d.ts +++ b/src/perf-tests/lookup3d.ts @@ -8,7 +8,7 @@ import { GridLookup3D } from 'mol-math/geometry'; // import { sortArray } from 'mol-data/util'; import { OrderedSet } from 'mol-data/int'; import { ModelFormat } from 'mol-model-parsers/structure/format'; -import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif'; +import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif'; require('util.promisify').shim(); const readFileAsync = util.promisify(fs.readFile); @@ -34,7 +34,7 @@ export async function readCIF(path: string) { } const mmcif = ModelFormat.mmCIF(parsed.result.blocks[0]); - const models = await parse_mmCIF(mmcif).run(); + const models = await trajecotryFromMmCIF(mmcif).run(); const structures = models.map(Structure.ofModel); return { mmcif: mmcif.data, models, structures }; diff --git a/src/perf-tests/structure.ts b/src/perf-tests/structure.ts index 91c326e4efc2dfc62037e9586d45923ec6f97928..60f485c27ecc8ead2305c85ee7f45973f16b28be 100644 --- a/src/perf-tests/structure.ts +++ b/src/perf-tests/structure.ts @@ -17,7 +17,7 @@ import { Structure, Model, Queries as Q, StructureElement, StructureSelection, S import to_mmCIF from 'mol-model/structure/export/mmcif' import { Vec3 } from 'mol-math/linear-algebra'; import { ModelFormat } from 'mol-model-parsers/structure/format'; -import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif'; +import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif'; // import { printUnits } from 'apps/structure-info/model'; // import { EquivalenceClasses } from 'mol-data/util'; @@ -76,7 +76,7 @@ export async function readCIF(path: string) { console.timeEnd('schema') console.time('buildModels') - const models = await parse_mmCIF(mmcif).run(); + const models = await trajecotryFromMmCIF(mmcif).run(); console.timeEnd('buildModels') const structures = models.map(Structure.ofModel); diff --git a/src/servers/model/server/structure-wrapper.ts b/src/servers/model/server/structure-wrapper.ts index 2e9944c5161d0ad4a31f953cdefc4306361b8e2b..b124bdd2869147a7aae429cb2487cabbe7243969 100644 --- a/src/servers/model/server/structure-wrapper.ts +++ b/src/servers/model/server/structure-wrapper.ts @@ -15,7 +15,7 @@ import * as zlib from 'zlib' import { Job } from './jobs'; import { ConsoleLogger } from 'mol-util/console-logger'; import { ModelPropertiesProvider } from '../property-provider'; -import { parse_mmCIF } from 'mol-model-parsers/structure/mmcif'; +import { trajecotryFromMmCIF } from 'mol-model-parsers/structure/mmcif'; import { ModelFormat } from 'mol-model-parsers/structure/format'; require('util.promisify').shim(); @@ -110,7 +110,7 @@ export async function readStructureWrapper(key: string, sourceId: string | '_loc const frame = (await parseCif(data)).blocks[0]; perf.end('parse'); perf.start('createModel'); - const models = await parse_mmCIF(ModelFormat.mmCIF(frame)).run(); + const models = await trajecotryFromMmCIF(ModelFormat.mmCIF(frame)).run(); perf.end('createModel'); const modelMap = new Map<number, Model>();