From e90ccfdd209fdb837a847ce11ef5142f010746ed Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Thu, 9 Apr 2020 18:42:33 -0700 Subject: [PATCH] basic support for models from cif-core files --- data/cif-field-names/cif-core-field-names.csv | 4 +- src/mol-io/reader/cif/schema/cif-core.ts | 27 +++ src/mol-model-formats/structure/cif-core.ts | 165 ++++++++++++++++++ src/mol-plugin-state/actions/data-format.ts | 9 +- src/mol-plugin-state/actions/structure.ts | 22 ++- src/mol-plugin-state/formats/registry.ts | 6 +- src/mol-plugin-state/formats/trajectory.ts | 27 ++- src/mol-plugin-state/transforms/model.ts | 33 ++++ src/mol-plugin/index.ts | 1 + 9 files changed, 284 insertions(+), 10 deletions(-) create mode 100644 src/mol-model-formats/structure/cif-core.ts diff --git a/data/cif-field-names/cif-core-field-names.csv b/data/cif-field-names/cif-core-field-names.csv index 3cc24d659..c104fefdb 100644 --- a/data/cif-field-names/cif-core-field-names.csv +++ b/data/cif-field-names/cif-core-field-names.csv @@ -59,5 +59,7 @@ atom_site_aniso.U_12 geom_bond.atom_site_label_1 geom_bond.atom_site_label_2 geom_bond.distance +geom_bond.site_symmetry_1 geom_bond.site_symmetry_2 -geom_bond.publ_flag \ No newline at end of file +geom_bond.publ_flag +geom_bond.valence \ No newline at end of file diff --git a/src/mol-io/reader/cif/schema/cif-core.ts b/src/mol-io/reader/cif/schema/cif-core.ts index 853649733..211b208aa 100644 --- a/src/mol-io/reader/cif/schema/cif-core.ts +++ b/src/mol-io/reader/cif/schema/cif-core.ts @@ -280,6 +280,29 @@ export const CifCore_Schema = { * publication or should be placed in a table of significant angles. */ publ_flag: str, + /** + * The set of data items which specify the symmetry operation codes + * which must be applied to the atom sites involved in the geometry angle. + * + * The symmetry code of each atom site as the symmetry-equivalent position + * number 'n' and the cell translation number 'pqr'. These numbers are + * combined to form the code 'n pqr' or n_pqr. + * + * The character string n_pqr is composed as follows: + * + * n refers to the symmetry operation that is applied to the + * coordinates stored in _atom_site.fract_xyz. It must match a + * number given in _symmetry_equiv.pos_site_id. + * + * p, q and r refer to the translations that are subsequently + * applied to the symmetry transformed coordinates to generate + * the atom used in calculating the angle. These translations + * (x,y,z) are related to (p,q,r) by the relations + * p = 5 + x + * q = 5 + y + * r = 5 + z + */ + site_symmetry_1: str, /** * The set of data items which specify the symmetry operation codes * which must be applied to the atom sites involved in the geometry angle. @@ -303,6 +326,10 @@ export const CifCore_Schema = { * r = 5 + z */ site_symmetry_2: str, + /** + * Bond valence calculated from the bond distance. + */ + valence: float, }, /** * The CATEGORY of data items used to record details about the diff --git a/src/mol-model-formats/structure/cif-core.ts b/src/mol-model-formats/structure/cif-core.ts new file mode 100644 index 000000000..4513d0415 --- /dev/null +++ b/src/mol-model-formats/structure/cif-core.ts @@ -0,0 +1,165 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Column, Table } from '../../mol-data/db'; +import { Model, Symmetry } from '../../mol-model/structure/model'; +import { MoleculeType } from '../../mol-model/structure/model/types'; +import { RuntimeContext, Task } from '../../mol-task'; +import { createModels } from './basic/parser'; +import { BasicSchema, createBasic } from './basic/schema'; +import { ComponentBuilder } from './common/component'; +import { EntityBuilder } from './common/entity'; +import { ModelFormat } from './format'; +import { CifCore_Database } from '../../mol-io/reader/cif/schema/cif-core'; +import { CifFrame, CIF } from '../../mol-io/reader/cif'; +import { Spacegroup, SpacegroupCell } from '../../mol-math/geometry'; +import { Vec3 } from '../../mol-math/linear-algebra'; +import { ModelSymmetry } from './property/symmetry'; +import { IndexPairBonds } from './property/bonds/index-pair'; + +function getSpacegroupNameOrNumber(space_group: CifCore_Database['space_group']) { + const groupNumber = space_group.IT_number.value(0) + const groupName = space_group['name_H-M_full'].value(0) + if (!space_group.IT_number.isDefined) return groupName + if (!space_group['name_H-M_full'].isDefined) return groupNumber + return groupNumber +} + +function getSymmetry(db: CifCore_Database): Symmetry { + const { cell, space_group } = db + const nameOrNumber = getSpacegroupNameOrNumber(space_group) + const spaceCell = SpacegroupCell.create(nameOrNumber, + Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)), + Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180)); + + return { + spacegroup: Spacegroup.create(spaceCell), + assemblies : [], + isNonStandardCrytalFrame: false, + ncsOperators: [] + } +} + +async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: RuntimeContext): Promise<Model[]> { + + const atomCount = db.atom_site._rowCount + const MOL = Column.ofConst('MOL', atomCount, Column.Schema.str); + const A = Column.ofConst('A', atomCount, Column.Schema.str); + const seq_id = Column.ofConst(1, atomCount, Column.Schema.int); + + const symmetry = getSymmetry(db) + const m = symmetry.spacegroup.cell.fromFractional + + const { fract_x, fract_y, fract_z } = db.atom_site + const x = new Float32Array(atomCount) + const y = new Float32Array(atomCount) + const z = new Float32Array(atomCount) + const v = Vec3() + for (let i = 0; i < atomCount; ++i) { + Vec3.set(v, fract_x.value(i), fract_y.value(i), fract_z.value(i)) + Vec3.transformMat4(v, v, m) + x[i] = v[0], y[i] = v[1], z[i] = v[2] + } + + const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, { + auth_asym_id: A, + auth_atom_id: db.atom_site.label, + auth_comp_id: MOL, + auth_seq_id: seq_id, + Cartn_x: Column.ofFloatArray(x), + Cartn_y: Column.ofFloatArray(y), + Cartn_z: Column.ofFloatArray(z), + id: Column.range(0, atomCount - 1), + + label_asym_id: A, + label_atom_id: db.atom_site.label, + label_comp_id: MOL, + label_seq_id: seq_id, + label_entity_id: Column.ofConst('1', atomCount, Column.Schema.str), + + occupancy: db.atom_site.occupancy, + type_symbol: db.atom_site.type_symbol, + + pdbx_PDB_model_num: Column.ofConst(1, atomCount, Column.Schema.int), + }, atomCount); + + const name = db.chemical.name_common.value(0) || db.chemical.name_systematic.value(0) + + const entityBuilder = new EntityBuilder() + entityBuilder.setNames([['MOL', name || 'Unknown Entity']]) + entityBuilder.getEntityId('MOL', MoleculeType.Unknown, 'A'); + + const componentBuilder = new ComponentBuilder(seq_id, db.atom_site.type_symbol); + componentBuilder.setNames([['MOL', name || 'Unknown Molecule']]) + componentBuilder.add('MOL', 0); + + const basics = createBasic({ + entity: entityBuilder.getEntityTable(), + chem_comp: componentBuilder.getChemCompTable(), + atom_site + }); + + const models = await createModels(basics, format, ctx); + + if (models.length > 0) { + ModelSymmetry.Provider.set(models[0], symmetry) + + const bondCount = db.geom_bond._rowCount + if(bondCount > 0) { + const labelIndexMap: { [label: string]: number } = {} + const { label } = db.atom_site + for (let i = 0, il = label.rowCount; i < il; ++i) { + labelIndexMap[label.value(i)] = i + } + + const indexA: number[] = [] + const indexB: number[] = [] + const order: number[] = [] + + const { atom_site_label_1, atom_site_label_2, valence, site_symmetry_1, site_symmetry_2 } = db.geom_bond + for (let i = 0; i < bondCount; ++i) { + if (site_symmetry_1.value(i) === site_symmetry_2.value(i)) { + indexA[i] = labelIndexMap[atom_site_label_1.value(i)] + indexB[i] = labelIndexMap[atom_site_label_2.value(i)] + // TODO derive from bond length if undefined + order[i] = valence.isDefined ? valence.value(i) : 1 + } + } + + // TODO support symmetry + IndexPairBonds.Provider.set(models[0], IndexPairBonds.fromData({ pairs: { + indexA: Column.ofIntArray(indexA), + indexB: Column.ofIntArray(indexB), + order: Column.ofIntArray(order) + }, count: indexA.length })); + } + } + + return models; +} + +// + +export { CifCoreFormat }; + +type CifCoreFormat = ModelFormat<CifCoreFormat.Data> + +namespace CifCoreFormat { + export type Data = { db: CifCore_Database, frame: CifFrame } + export function is(x: ModelFormat): x is CifCoreFormat { + return x.kind === 'cifCore' + } + + export function fromFrame(frame: CifFrame, db?: CifCore_Database): CifCoreFormat { + if (!db) db = CIF.schema.cifCore(frame) + return { kind: 'cifCore', name: db._name, data: { db, frame } }; + } +} + +export function trajectoryFromCifCore(frame: CifFrame): Task<Model.Trajectory> { + const format = CifCoreFormat.fromFrame(frame) + return Task.create('Parse CIF Core', ctx => getModels(format.data.db, format, ctx)) +} diff --git a/src/mol-plugin-state/actions/data-format.ts b/src/mol-plugin-state/actions/data-format.ts index 93c4b13be..1e49ff50e 100644 --- a/src/mol-plugin-state/actions/data-format.ts +++ b/src/mol-plugin-state/actions/data-format.ts @@ -12,7 +12,7 @@ import { FileInfo, getFileInfo } from '../../mol-util/file-info'; import { ParamDefinition as PD } from '../../mol-util/param-definition'; import { PluginStateObject } from '../objects'; import { PlyProvider } from './shape'; -import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider, MolProvider } from './structure'; +import { DcdProvider, GroProvider, MmcifProvider, PdbProvider, Provider3dg, PsfProvider, MolProvider, CifCoreProvider } from './structure'; import { Ccp4Provider, DscifProvider, Dsn6Provider } from './volume'; export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | PluginStateObject.Data.String> { @@ -56,6 +56,7 @@ export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | Plugin constructor() { this.add('3dg', Provider3dg) this.add('ccp4', Ccp4Provider) + this.add('cifCore', CifCoreProvider) this.add('dcd', DcdProvider) this.add('dscif', DscifProvider) this.add('dsn6', Dsn6Provider) @@ -157,7 +158,7 @@ export const OpenFiles = StateAction.build({ // -type cifVariants = 'dscif' | -1 +type cifVariants = 'dscif' | 'coreCif' | -1 export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifVariants { if (info.ext === 'bcif') { try { @@ -166,7 +167,9 @@ export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifV if (msgpackDecode(data as Uint8Array).encoder.startsWith('VolumeServer')) return 'dscif' } catch { } } else if (info.ext === 'cif') { - if ((data as string).startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif' + const str = data as string + if (str.startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif' + if (str.includes('atom_site_fract_x') || str.includes('atom_site.fract_x')) return 'coreCif' } return -1 } \ No newline at end of file diff --git a/src/mol-plugin-state/actions/structure.ts b/src/mol-plugin-state/actions/structure.ts index 297850ad1..5b7716cf3 100644 --- a/src/mol-plugin-state/actions/structure.ts +++ b/src/mol-plugin-state/actions/structure.ts @@ -28,8 +28,8 @@ export const MmcifProvider: DataFormatProvider<PluginStateObject.Data.String | P binaryExtensions: ['bcif'], isApplicable: (info: FileInfo, data: Uint8Array | string) => { if (info.ext === 'mmcif' || info.ext === 'mcif') return true - // assume cif/bcif files that are not DensityServer CIF are mmCIF - if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) !== 'dscif' + // assume undetermined cif/bcif files are mmCIF + if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) === -1 return false }, getDefaultBuilder: (ctx: PluginContext, data, options) => { @@ -41,6 +41,24 @@ export const MmcifProvider: DataFormatProvider<PluginStateObject.Data.String | P } } +export const CifCoreProvider: DataFormatProvider<any> = { + label: 'cifCore', + description: 'CIF Core', + stringExtensions: ['cif'], + binaryExtensions: [], + isApplicable: (info: FileInfo, data: Uint8Array | string) => { + if (info.ext === 'cif') return guessCifVariant(info, data) === 'coreCif' + return false + }, + getDefaultBuilder: (ctx: PluginContext, data, options) => { + return Task.create('mmCIF default builder', async () => { + const trajectory = await ctx.builders.structure.parseTrajectory(data, 'cifCore'); + const representationPreset = options.visuals ? 'auto' : 'empty'; + await ctx.builders.structure.hierarchy.applyPreset(trajectory, 'default', { showUnitcell: options.visuals, representationPreset }); + }) + } +} + export const PdbProvider: DataFormatProvider<any> = { label: 'PDB', description: 'PDB', diff --git a/src/mol-plugin-state/formats/registry.ts b/src/mol-plugin-state/formats/registry.ts index 5b6c3bc5c..c9930afd8 100644 --- a/src/mol-plugin-state/formats/registry.ts +++ b/src/mol-plugin-state/formats/registry.ts @@ -101,7 +101,7 @@ export interface DataFormatProvider<P = any, R = any> { parse(plugin: PluginContext, data: StateObjectRef<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, params?: P): Promise<R> } -type cifVariants = 'dscif' | -1 +type cifVariants = 'dscif' | 'coreCif' | -1 export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifVariants { if (info.ext === 'bcif') { try { @@ -110,7 +110,9 @@ export function guessCifVariant(info: FileInfo, data: Uint8Array | string): cifV if (msgpackDecode(data as Uint8Array).encoder.startsWith('VolumeServer')) return 'dscif' } catch { } } else if (info.ext === 'cif') { - if ((data as string).startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif' + const str = data as string + if (str.startsWith('data_SERVER\n#\n_density_server_result')) return 'dscif' + if (str.includes('atom_site_fract_x') || str.includes('atom_site.fract_x')) return 'coreCif' } return -1 } \ No newline at end of file diff --git a/src/mol-plugin-state/formats/trajectory.ts b/src/mol-plugin-state/formats/trajectory.ts index 9a735feab..4fac28ad3 100644 --- a/src/mol-plugin-state/formats/trajectory.ts +++ b/src/mol-plugin-state/formats/trajectory.ts @@ -26,8 +26,8 @@ export const MmcifProvider: TrajectoryFormatProvider = { binaryExtensions: ['bcif'], isApplicable: (info: FileInfo, data: Uint8Array | string) => { if (info.ext === 'mmcif' || info.ext === 'mcif') return true - // assume cif/bcif files that are not DensityServer CIF are mmCIF - if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) !== 'dscif' + // assume undetermined cif/bcif files are mmCIF + if (info.ext === 'cif' || info.ext === 'bcif') return guessCifVariant(info, data) === -1 return false }, parse: async (plugin, data, params) => { @@ -43,6 +43,28 @@ export const MmcifProvider: TrajectoryFormatProvider = { } } +export const CifCoreProvider: TrajectoryFormatProvider = { + label: 'cifCore', + description: 'CIF Core', + stringExtensions: ['cif'], + binaryExtensions: [], + isApplicable: (info: FileInfo, data: Uint8Array | string) => { + if (info.ext === 'cif') return guessCifVariant(info, data) === 'coreCif' + return false + }, + parse: async (plugin, data, params) => { + const state = plugin.state.data; + const cif = state.build().to(data) + .apply(StateTransforms.Data.ParseCif, void 0, { state: { isGhost: true } }) + const trajectory = cif.apply(StateTransforms.Model.TrajectoryFromCifCore, void 0, { tags: params?.trajectoryTags }) + await plugin.updateDataState(trajectory, { revertOnError: true }); + if ((cif.selector.cell?.obj?.data.blocks.length || 0) > 1) { + plugin.state.data.updateCellState(cif.ref, { isGhost: false }); + } + return { trajectory: trajectory.selector }; + } +} + function directTrajectory(transformer: StateTransformer<PluginStateObject.Data.String | PluginStateObject.Data.Binary, PluginStateObject.Molecule.Trajectory>): TrajectoryFormatProvider['parse'] { return async (plugin, data, params) => { const state = plugin.state.data; @@ -100,6 +122,7 @@ export const MolProvider: TrajectoryFormatProvider = { export const BuildInTrajectoryFormats = [ ['mmcif', MmcifProvider] as const, + ['cifCore', CifCoreProvider] as const, ['pdb', PdbProvider] as const, ['gro', GroProvider] as const, ['3dg', Provider3dg] as const, diff --git a/src/mol-plugin-state/transforms/model.ts b/src/mol-plugin-state/transforms/model.ts index defe06b40..2e0839e8f 100644 --- a/src/mol-plugin-state/transforms/model.ts +++ b/src/mol-plugin-state/transforms/model.ts @@ -34,6 +34,7 @@ import { StructureSelectionQueries } from '../helpers/structure-selection-query' import { PluginStateObject as SO, PluginStateTransform } from '../objects'; import { parseMol } from '../../mol-io/reader/mol/parser'; import { trajectoryFromMol } from '../../mol-model-formats/structure/mol'; +import { trajectoryFromCifCore } from '../../mol-model-formats/structure/cif-core'; export { CoordinatesFromDcd }; export { TopologyFromPsf }; @@ -43,6 +44,7 @@ export { TrajectoryFromMmCif }; export { TrajectoryFromPDB }; export { TrajectoryFromGRO }; export { TrajectoryFromMOL }; +export { TrajectoryFromCifCore }; export { TrajectoryFrom3DG }; export { ModelFromTrajectory }; export { StructureFromTrajectory }; @@ -233,6 +235,37 @@ const TrajectoryFromMOL = PluginStateTransform.BuiltIn({ } }); +type TrajectoryFromCifCore = typeof TrajectoryFromCifCore +const TrajectoryFromCifCore = PluginStateTransform.BuiltIn({ + name: 'trajectory-from-cif-core', + display: { name: 'Parse CIF Core', description: 'Identify and create all separate models in the specified CIF data block' }, + from: SO.Format.Cif, + to: SO.Molecule.Trajectory, + params(a) { + if (!a) { + return { + blockHeader: PD.Optional(PD.Text(void 0, { description: 'Header of the block to parse. If none is specifed, the 1st data block in the file is used.' })) + }; + } + const { blocks } = a.data; + return { + blockHeader: PD.Optional(PD.Select(blocks[0] && blocks[0].header, blocks.map(b => [b.header, b.header] as [string, string]), { description: 'Header of the block to parse' })) + }; + } +})({ + apply({ a, params }) { + return Task.create('Parse CIF Core', async ctx => { + const header = params.blockHeader || a.data.blocks[0].header; + const block = a.data.blocks.find(b => b.header === header); + if (!block) throw new Error(`Data block '${[header]}' not found.`); + const models = await trajectoryFromCifCore(block).runInContext(ctx); + if (models.length === 0) throw new Error('No models found.'); + const props = { label: `${models[0].entry}`, description: `${models.length} model${models.length === 1 ? '' : 's'}` }; + return new SO.Molecule.Trajectory(models, props); + }); + } +}); + type TrajectoryFrom3DG = typeof TrajectoryFrom3DG const TrajectoryFrom3DG = PluginStateTransform.BuiltIn({ name: 'trajectory-from-3dg', diff --git a/src/mol-plugin/index.ts b/src/mol-plugin/index.ts index d756ca884..9b0e4d9d4 100644 --- a/src/mol-plugin/index.ts +++ b/src/mol-plugin/index.ts @@ -39,6 +39,7 @@ export const DefaultPluginSpec: PluginSpec = { PluginSpec.Action(StateTransforms.Data.ParseDsn6), PluginSpec.Action(StateTransforms.Model.TrajectoryFromMmCif), + PluginSpec.Action(StateTransforms.Model.TrajectoryFromCifCore), PluginSpec.Action(StateTransforms.Model.TrajectoryFromPDB), PluginSpec.Action(StateTransforms.Model.TransformStructureConformation), PluginSpec.Action(StateTransforms.Model.StructureCoordinateSystem), -- GitLab