From e9e27f32408d524820501c2a57ed3203ee1ed50d Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Thu, 28 Feb 2019 00:08:38 -0800 Subject: [PATCH] wip, basic gro format support --- src/mol-io/reader/_spec/gro.spec.ts | 6 +- src/mol-io/reader/cif/data-model.ts | 42 +++- src/mol-io/reader/gro/parser.ts | 4 +- src/mol-model-formats/structure/gro.ts | 236 +++++++------------- src/mol-model-formats/structure/pdb.ts | 2 +- src/mol-plugin/state/actions/data-format.ts | 3 +- src/mol-plugin/state/actions/structure.ts | 42 +++- src/mol-plugin/state/transforms/model.ts | 21 +- 8 files changed, 185 insertions(+), 171 deletions(-) diff --git a/src/mol-io/reader/_spec/gro.spec.ts b/src/mol-io/reader/_spec/gro.spec.ts index 055e4e61b..16a120a38 100644 --- a/src/mol-io/reader/_spec/gro.spec.ts +++ b/src/mol-io/reader/_spec/gro.spec.ts @@ -5,7 +5,7 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import Gro from '../gro/parser' +import { parseGRO } from '../gro/parser' const groString = `MD of 2 waters, t= 4.2 6 @@ -26,7 +26,7 @@ const groStringHighPrecision = `Generated by trjconv : 2168 system t= 15.00000 describe('gro reader', () => { it('basic', async () => { - const parsed = await Gro(groString).run(); + const parsed = await parseGRO(groString).run(); if (parsed.isError) { console.log(parsed) @@ -57,7 +57,7 @@ describe('gro reader', () => { }); it('high precision', async () => { - const parsed = await Gro(groStringHighPrecision).run(); + const parsed = await parseGRO(groStringHighPrecision).run(); if (parsed.isError) { console.log(parsed) diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index 855cecb9b..2800437dc 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -132,7 +132,7 @@ export namespace CifField { } } - export function ofNumbers(values: number[]): CifField { + export function ofNumbers(values: ArrayLike<number>): CifField { const rowCount = values.length; const str: CifField['str'] = row => { return '' + values[row]; }; const float: CifField['float'] = row => values[row]; @@ -196,6 +196,46 @@ export namespace CifField { toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) } } + + export function ofColumn(column: Column<any>): CifField { + const { rowCount, valueKind, areValuesEqual } = column; + + let str: CifField['str'] + let int: CifField['int'] + let float: CifField['float'] + + switch (column.schema.valueType) { + case 'float': + case 'int': + str = row => { return '' + column.value(row); }; + int = row => column.value(row); + float = row => column.value(row); + break + case 'str': + str = row => column.value(row); + int = row => { const v = column.value(row); return fastParseInt(v, 0, v.length) || 0; }; + float = row => { const v = column.value(row); return fastParseFloat(v, 0, v.length) || 0; }; + break + default: + throw new Error('unsupported') + } + + + return { + __array: void 0, + binaryEncoding: void 0, + isDefined: true, + rowCount, + str, + int, + float, + valueKind, + areValuesEqual, + toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), + toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params), + toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) + } + } } export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data { diff --git a/src/mol-io/reader/gro/parser.ts b/src/mol-io/reader/gro/parser.ts index 0367a3ee8..1a181f1a6 100644 --- a/src/mol-io/reader/gro/parser.ts +++ b/src/mol-io/reader/gro/parser.ts @@ -155,10 +155,8 @@ async function parseInternal(data: string, ctx: RuntimeContext): Promise<Result< return Result.success(result); } -export function parse(data: string) { +export function parseGRO(data: string) { return Task.create<Result<Schema.GroFile>>('Parse GRO', async ctx => { return await parseInternal(data, ctx); }); } - -export default parse; \ No newline at end of file diff --git a/src/mol-model-formats/structure/gro.ts b/src/mol-model-formats/structure/gro.ts index 1e04fb1e7..bd75839a3 100644 --- a/src/mol-model-formats/structure/gro.ts +++ b/src/mol-model-formats/structure/gro.ts @@ -1,154 +1,82 @@ -// TODO: make this work when the time comes. -// /** -// * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. -// * -// * @author Alexander Rose <alexander.rose@weirdbyte.de> -// */ - -// import { Column, Table } from 'mol-data/db'; -// import { Interval, Segmentation } from 'mol-data/int'; -// import { mmCIF_Schema as mmCIF } from 'mol-io/reader/cif/schema/mmcif'; -// import { Atoms } from 'mol-io/reader/gro/schema'; -// import UUID from 'mol-util/uuid'; -// import Format from '../format'; -// import Model from '../model'; -// import { AtomicConformation, AtomicData, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from '../properties/atomic'; -// import { CoarseHierarchy } from '../properties/coarse'; -// import { Entities } from '../properties/common'; -// import Sequence from '../properties/sequence'; -// import { ModelSymmetry } from '../properties/symmetry'; -// import { guessElement } from '../properties/utils/guess-element'; -// import { getAtomicKeys } from '../properties/utils/keys'; -// import { ElementSymbol } from '../types'; - -// import gro_Format = Format.gro - -// type HierarchyOffsets = { residues: ArrayLike<number>, chains: ArrayLike<number> } - -// function findHierarchyOffsets(atomsData: Atoms, bounds: Interval) { -// const start = Interval.start(bounds), end = Interval.end(bounds); -// const residues = [start], chains = [start]; - -// const { residueName, residueNumber } = atomsData; - -// for (let i = start + 1; i < end; i++) { -// const newResidue = !residueNumber.areValuesEqual(i - 1, i) -// || !residueName.areValuesEqual(i - 1, i); -// console.log(residueName.value(i - 1), residueName.value(i), residueNumber.value(i - 1), residueNumber.value(i), newResidue) -// if (newResidue) residues[residues.length] = i; -// } -// console.log(residues, residues.length) -// return { residues, chains }; -// } - -// function guessElementSymbol (value: string) { -// return ElementSymbol(guessElement(value)); -// } - -// function createHierarchyData(atomsData: Atoms, offsets: HierarchyOffsets): AtomicData { -// console.log(atomsData.atomName) -// const atoms = Table.ofColumns(AtomsSchema, { -// type_symbol: Column.ofArray({ array: Column.mapToArray(atomsData.atomName, guessElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }), -// label_atom_id: atomsData.atomName, -// auth_atom_id: atomsData.atomName, -// label_alt_id: Column.Undefined(atomsData.count, Column.Schema.str), -// pdbx_formal_charge: Column.Undefined(atomsData.count, Column.Schema.int) -// }); - -// const residues = Table.view(Table.ofColumns(ResiduesSchema, { -// group_PDB: Column.Undefined(atomsData.count, Column.Schema.Aliased<'ATOM' | 'HETATM'>(Column.Schema.str)), -// label_comp_id: atomsData.residueName, -// auth_comp_id: atomsData.residueName, -// label_seq_id: atomsData.residueNumber, -// auth_seq_id: atomsData.residueNumber, -// pdbx_PDB_ins_code: Column.Undefined(atomsData.count, Column.Schema.str), -// }), ResiduesSchema, offsets.residues); -// // Optimize the numeric columns -// Table.columnToArray(residues, 'label_seq_id', Int32Array); -// Table.columnToArray(residues, 'auth_seq_id', Int32Array); - -// // const chains = Table.ofColumns(Hierarchy.ChainsSchema, { -// // label_asym_id: Column.ofConst('A', atomsData.count, Column.Schema.str), -// // auth_asym_id: Column.ofConst('A', atomsData.count, Column.Schema.str), -// // label_entity_id: Column.Undefined(atomsData.count, Column.Schema.str) -// // }); - -// const chains = Table.ofUndefinedColumns(ChainsSchema, 0); - -// return { atoms, residues, chains }; -// } - -// function getConformation(atoms: Atoms): AtomicConformation { -// return { -// id: UUID.create(), -// atomId: atoms.atomNumber, -// occupancy: Column.Undefined(atoms.count, Column.Schema.int), -// B_iso_or_equiv: Column.Undefined(atoms.count, Column.Schema.float), -// x: Column.mapToArray(atoms.x, x => x * 10, Float32Array), -// y: Column.mapToArray(atoms.y, y => y * 10, Float32Array), -// z: Column.mapToArray(atoms.z, z => z * 10, Float32Array) -// } -// } - -// function isHierarchyDataEqual(a: AtomicData, b: AtomicData) { -// // need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300 -// return Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>) -// && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>) -// } - -// function createModel(format: gro_Format, modelNum: number, previous?: Model): Model { -// const structure = format.data.structures[modelNum]; -// const bounds = Interval.ofBounds(0, structure.atoms.count); - -// const hierarchyOffsets = findHierarchyOffsets(structure.atoms, bounds); -// const hierarchyData = createHierarchyData(structure.atoms, hierarchyOffsets); - -// if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) { -// return { -// ...previous, -// atomicConformation: getConformation(structure.atoms) -// }; -// } - -// const hierarchySegments: AtomicSegments = { -// residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, bounds), -// chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds), -// } - -// // TODO: create a better mock entity -// const entityTable = Table.ofRows<mmCIF['entity']>(mmCIF.entity, [{ -// id: '0', -// src_method: 'syn', -// type: 'polymer', -// pdbx_number_of_molecules: 1 -// }]); - -// const entities: Entities = { data: entityTable, getEntityIndex: Column.createIndexer(entityTable.id) }; - -// const hierarchyKeys = getAtomicKeys(hierarchyData, entities, hierarchySegments); -// const atomicHierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }; -// return { -// id: UUID.create(), -// sourceData: format, -// modelNum, -// atomicHierarchy, -// entities, -// sequence: Sequence.fromAtomicHierarchy(atomicHierarchy), -// atomicConformation: getConformation(structure.atoms), -// coarseHierarchy: CoarseHierarchy.Empty, -// coarseConformation: void 0 as any, -// symmetry: ModelSymmetry.Default -// }; -// } - -// function buildModels(format: gro_Format): ReadonlyArray<Model> { -// const models: Model[] = []; - -// format.data.structures.forEach((_, i) => { -// const model = createModel(format, i, models.length > 0 ? models[models.length - 1] : void 0); -// models.push(model); -// }); -// return models; -// } - -// export default buildModels; +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Model } from 'mol-model/structure/model'; +import { Task } from 'mol-task'; +import { ModelFormat } from './format'; +import { _parse_mmCif } from './mmcif/parser'; +import { GroFile, GroAtoms } from 'mol-io/reader/gro/schema'; +import { CifCategory, CifField } from 'mol-io/reader/cif'; +import { Column } from 'mol-data/db'; +import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; + +// TODO multi model files +// TODO seperate chains +// TODO better entity handling +// TODO improve performance + +function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { + return { + id: CifField.ofStrings(['1', '2', '3']), + type: CifField.ofStrings(['polymer', 'non-polymer', 'water']) + } +} + +function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } { + const auth_asym_id = CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)) + const auth_atom_id = CifField.ofColumn(atoms.atomName) + const auth_comp_id = CifField.ofColumn(atoms.residueName) + const auth_seq_id = CifField.ofColumn(atoms.residueNumber) + + return { + auth_asym_id, + auth_atom_id, + auth_comp_id, + auth_seq_id, + B_iso_or_equiv: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.float)), + Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)), + Cartn_y: CifField.ofNumbers(Column.mapToArray(atoms.y, y => y * 10, Float32Array)), + Cartn_z: CifField.ofNumbers(Column.mapToArray(atoms.z, z => z * 10, Float32Array)), + group_PDB: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), + id: CifField.ofColumn(atoms.atomNumber), + + label_alt_id: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), + + label_asym_id: auth_asym_id, + label_atom_id: auth_atom_id, + label_comp_id: auth_comp_id, + label_seq_id: auth_seq_id, + label_entity_id: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), + + occupancy: CifField.ofColumn(Column.ofConst(1, atoms.count, Column.Schema.float)), + type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), + + pdbx_PDB_ins_code: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), + pdbx_PDB_model_num: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), + } +} + +async function groToMmCif(gro: GroFile) { + const categories = { + entity: CifCategory.ofFields('entity', _entity()), + atom_site: CifCategory.ofFields('atom_site', _atom_site(gro.structures[0].atoms)) + } as any; + + return { + header: 'GRO', + categoryNames: Object.keys(categories), + categories + }; +} + +export function trajectoryFromGRO(gro: GroFile): Task<Model.Trajectory> { + return Task.create('Parse GRO', async ctx => { + await ctx.update('Converting to mmCIF'); + const cif = await groToMmCif(gro); + const format = ModelFormat.mmCIF(cif); + return _parse_mmCif(format, ctx); + }) +} diff --git a/src/mol-model-formats/structure/pdb.ts b/src/mol-model-formats/structure/pdb.ts index a86958769..cce4dc869 100644 --- a/src/mol-model-formats/structure/pdb.ts +++ b/src/mol-model-formats/structure/pdb.ts @@ -1,5 +1,5 @@ /** - * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> */ diff --git a/src/mol-plugin/state/actions/data-format.ts b/src/mol-plugin/state/actions/data-format.ts index eb9ad4612..7e4bbd656 100644 --- a/src/mol-plugin/state/actions/data-format.ts +++ b/src/mol-plugin/state/actions/data-format.ts @@ -12,7 +12,7 @@ import { PluginStateObject } from '../objects'; import { ParamDefinition as PD } from 'mol-util/param-definition'; import { Ccp4Provider, Dsn6Provider, DscifProvider } from './volume'; import { StateTransforms } from '../transforms'; -import { MmcifProvider, PdbProvider } from './structure'; +import { MmcifProvider, PdbProvider, GroProvider } from './structure'; export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | PluginStateObject.Data.String> { private _list: { name: string, provider: DataFormatProvider<D> }[] = [] @@ -56,6 +56,7 @@ export class DataFormatRegistry<D extends PluginStateObject.Data.Binary | Plugin this.add('ccp4', Ccp4Provider) this.add('dscif', DscifProvider) this.add('dsn6', Dsn6Provider) + this.add('gro', GroProvider) this.add('mmcif', MmcifProvider) this.add('pdb', PdbProvider) }; diff --git a/src/mol-plugin/state/actions/structure.ts b/src/mol-plugin/state/actions/structure.ts index 53696f1eb..0a5e93d99 100644 --- a/src/mol-plugin/state/actions/structure.ts +++ b/src/mol-plugin/state/actions/structure.ts @@ -22,7 +22,7 @@ export const MmcifProvider: DataFormatProvider<any> = { description: 'mmCIF', stringExtensions: ['cif', 'mmcif', 'mcif'], binaryExtensions: ['bcif'], - isApplicable: (info: FileInfo, data: Uint8Array) => { + isApplicable: (info: FileInfo, data: Uint8Array | string) => { return info.ext === 'cif' || info.ext === 'mmcif' || info.ext === 'mcif' || info.ext === 'bcif' }, getDefaultBuilder: (ctx: PluginContext, data: StateBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, state: State) => { @@ -38,17 +38,33 @@ export const PdbProvider: DataFormatProvider<any> = { description: 'PDB', stringExtensions: ['pdb', 'ent'], binaryExtensions: [], - isApplicable: (info: FileInfo, data: Uint8Array) => { + isApplicable: (info: FileInfo, data: string) => { return info.ext === 'pdb' || info.ext === 'ent' }, getDefaultBuilder: (ctx: PluginContext, data: StateBuilder.To<PluginStateObject.Data.String>, state: State) => { - return Task.create('mmCIF default builder', async taskCtx => { + return Task.create('PDB default builder', async taskCtx => { const traj = createModelTree(data, 'pdb'); await state.updateTree(createStructureTree(ctx, traj, false)).runInContext(taskCtx) }) } } +export const GroProvider: DataFormatProvider<any> = { + label: 'GRO', + description: 'GRO', + stringExtensions: ['gro'], + binaryExtensions: [], + isApplicable: (info: FileInfo, data: string) => { + return info.ext === 'gro' + }, + getDefaultBuilder: (ctx: PluginContext, data: StateBuilder.To<PluginStateObject.Data.String>, state: State) => { + return Task.create('GRO default builder', async taskCtx => { + const traj = createModelTree(data, 'gro'); + await state.updateTree(createStructureTree(ctx, traj, false)).runInContext(taskCtx) + }) + } +} + // export { DownloadStructure }; @@ -111,10 +127,22 @@ const DownloadStructure = StateAction.build({ return state.updateTree(createStructureTree(ctx, traj, params.source.params.supportProps)); }); -function createModelTree(b: StateBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, format: 'pdb' | 'cif' = 'cif') { - const parsed = format === 'cif' - ? b.apply(StateTransforms.Data.ParseCif, void 0, { props: { isGhost: true }}).apply(StateTransforms.Model.TrajectoryFromMmCif, void 0, { props: { isGhost: true }}) - : b.apply(StateTransforms.Model.TrajectoryFromPDB, void 0, { props: { isGhost: true }}); +function createModelTree(b: StateBuilder.To<PluginStateObject.Data.Binary | PluginStateObject.Data.String>, format: 'pdb' | 'cif' | 'gro' = 'cif') { + let parsed: StateBuilder.To<PluginStateObject.Molecule.Trajectory> + switch (format) { + case 'cif': + parsed = b.apply(StateTransforms.Data.ParseCif, void 0, { props: { isGhost: true }}) + .apply(StateTransforms.Model.TrajectoryFromMmCif, void 0, { props: { isGhost: true }}) + break + case 'pdb': + parsed = b.apply(StateTransforms.Model.TrajectoryFromPDB, void 0, { props: { isGhost: true }}); + break + case 'gro': + parsed = b.apply(StateTransforms.Model.TrajectoryFromGRO, void 0, { props: { isGhost: true }}); + break + default: + throw new Error('unsupported format') + } return parsed.apply(StateTransforms.Model.ModelFromTrajectory, { modelIndex: 0 }); } diff --git a/src/mol-plugin/state/transforms/model.ts b/src/mol-plugin/state/transforms/model.ts index 7131485bb..67b29a021 100644 --- a/src/mol-plugin/state/transforms/model.ts +++ b/src/mol-plugin/state/transforms/model.ts @@ -20,9 +20,12 @@ import { RuntimeContext, Task } from 'mol-task'; import { ParamDefinition as PD } from 'mol-util/param-definition'; import { stringToWords } from 'mol-util/string'; import { PluginStateObject as SO, PluginStateTransform } from '../objects'; +import { trajectoryFromGRO } from 'mol-model-formats/structure/gro'; +import { parseGRO } from 'mol-io/reader/gro/parser'; export { TrajectoryFromMmCif }; export { TrajectoryFromPDB }; +export { TrajectoryFromGRO }; export { ModelFromTrajectory }; export { StructureFromModel }; export { StructureAssemblyFromModel }; @@ -62,7 +65,6 @@ const TrajectoryFromMmCif = PluginStateTransform.BuiltIn({ } }); - type TrajectoryFromPDB = typeof TrajectoryFromPDB const TrajectoryFromPDB = PluginStateTransform.BuiltIn({ name: 'trajectory-from-pdb', @@ -81,6 +83,23 @@ const TrajectoryFromPDB = PluginStateTransform.BuiltIn({ } }); +type TrajectoryFromGRO = typeof TrajectoryFromGRO +const TrajectoryFromGRO = PluginStateTransform.BuiltIn({ + name: 'trajectory-from-gro', + display: { name: 'Parse GRO', description: 'Parse GRO string and create trajectory.' }, + from: [SO.Data.String], + to: SO.Molecule.Trajectory +})({ + apply({ a }) { + return Task.create('Parse GRO', async ctx => { + const parsed = await parseGRO(a.data).runInContext(ctx); + if (parsed.isError) throw new Error(parsed.message); + const models = await trajectoryFromGRO(parsed.result).runInContext(ctx); + const props = { label: models[0].label, description: `${models.length} model${models.length === 1 ? '' : 's'}` }; + return new SO.Molecule.Trajectory(models, props); + }); + } +}); const plus1 = (v: number) => v + 1, minus1 = (v: number) => v - 1; type ModelFromTrajectory = typeof ModelFromTrajectory -- GitLab