diff --git a/README.md b/README.md index d35442edd4eaefce2553a3486341583f9c5b44ad..d0e4fb4a20e1e903552075c531185d1360d08db3 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ The core of Mol* currently consists of these modules: - `mol-math` Math related (loosely) algorithms and data structures. - `mol-io` Parsing library. Each format is parsed into an interface that corresponds to the data stored by it. Support for common coordinate, experimental/map, and annotation data formats. - `mol-model` Data structures and algorithms (such as querying) for representing molecular data (including coordinate, experimental/map, and annotation data). +- `mol-model-parsers` Data format parsers for `mol-model`. - `mol-model-props` Common "custom properties". - `mol-script` A scriting language for creating representations/scenes and querying (includes the [MolQL query language](https://molql.github.io)). - `mol-geo` Creating (molecular) geometries. diff --git a/package.json b/package.json index b139054660070dab572be66c51017642c92bcb75..4e959aeb69ec07ba6d21bf3d0708cdd3310ed93f 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "mol-math($|/.*)": "<rootDir>/src/mol-math$1", "mol-model($|/.*)": "<rootDir>/src/mol-model$1", "mol-model-props($|/.*)": "<rootDir>/src/mol-model-props$1", + "mol-model-parsers($|/.*)": "<rootDir>/src/mol-model-parsers$1", "mol-plugin($|/.*)": "<rootDir>/src/mol-plugin$1", "mol-ql($|/.*)": "<rootDir>/src/mol-ql$1", "mol-repr($|/.*)": "<rootDir>/src/mol-repr$1", diff --git a/src/mol-io/reader/_spec/cif.spec.ts b/src/mol-io/reader/_spec/cif.spec.ts index a2fb03ed952f69e1c9c93878a7b5904d174f3ddd..43e5cd4c5139e3da21b9d7de00d57cae59bae5c0 100644 --- a/src/mol-io/reader/_spec/cif.spec.ts +++ b/src/mol-io/reader/_spec/cif.spec.ts @@ -6,17 +6,16 @@ */ import * as Data from '../cif/data-model' -import TextField from '../cif/text/field' import * as Schema from '../cif/schema' import { Column } from 'mol-data/db' const columnData = `123abc d,e,f '4 5 6'`; // 123abc d,e,f '4 5 6' -const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3); -const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3); -const strListField = TextField({ data: columnData, indices: [7, 12], count: 1 }, 1); -const intListField = TextField({ data: columnData, indices: [14, 19], count: 1 }, 1); +const intField = Data.CifField.ofTokens({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }); +const strField = Data.CifField.ofTokens({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }); +const strListField = Data.CifField.ofTokens({ data: columnData, indices: [7, 12], count: 1 }); +const intListField = Data.CifField.ofTokens({ data: columnData, indices: [14, 19], count: 1 }); const testBlock = Data.CifBlock(['test'], { test: Data.CifCategory('test', 3, ['int', 'str', 'strList', 'intList'], { diff --git a/src/mol-io/reader/csv/field.ts b/src/mol-io/reader/csv/field.ts index fdc4c5135d4037d72dbd06385accae9bd805bbfa..48d1f1072aa1a54cc9dd77e94318ecfce6ce14e5 100644 --- a/src/mol-io/reader/csv/field.ts +++ b/src/mol-io/reader/csv/field.ts @@ -4,6 +4,6 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -import Field from '../cif/text/field' +import { CifField } from '../cif/data-model'; -export default Field \ No newline at end of file +export default CifField.ofTokens \ No newline at end of file diff --git a/src/mol-io/reader/csv/parser.ts b/src/mol-io/reader/csv/parser.ts index 6b7e14c5fe5e137206b98d5e6a17e0d4091aa7c4..4207202703338f1300bce9ebc0142c8ac6b39294 100644 --- a/src/mol-io/reader/csv/parser.ts +++ b/src/mol-io/reader/csv/parser.ts @@ -254,7 +254,7 @@ async function handleRecords(state: State): Promise<Data.CsvTable> { const columns: Data.CsvColumns = Object.create(null); for (let i = 0; i < state.columnCount; ++i) { - columns[state.columnNames[i]] = Field(state.tokens[i], state.recordCount); + columns[state.columnNames[i]] = Field(state.tokens[i]); } return Data.CsvTable(state.recordCount, state.columnNames, columns) diff --git a/src/mol-model-parsers/structure/format.ts b/src/mol-model-parsers/structure/format.ts new file mode 100644 index 0000000000000000000000000000000000000000..d8ecc3d822830da08eb6718c50166457d89d644a --- /dev/null +++ b/src/mol-model-parsers/structure/format.ts @@ -0,0 +1,18 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; +import CIF, { CifFrame } from 'mol-io/reader/cif'; + +type ModelFormat = + | ModelFormat.mmCIF + +namespace ModelFormat { + export interface mmCIF { kind: 'mmCIF', data: mmCIF_Database, frame: CifFrame } + export function mmCIF(frame: CifFrame, data?: mmCIF_Database): mmCIF { return { kind: 'mmCIF', data: data || CIF.schema.mmCIF(frame), frame }; } +} + +export { ModelFormat } \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif.ts b/src/mol-model-parsers/structure/mmcif.ts new file mode 100644 index 0000000000000000000000000000000000000000..2c3126f371e7827017112945733b2f54c4777273 --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif.ts @@ -0,0 +1,311 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Column, Table } from 'mol-data/db'; +import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { Spacegroup, SpacegroupCell, SymmetryOperator } from 'mol-math/geometry'; +import { Tensor, Vec3 } from 'mol-math/linear-algebra'; +import { Task, RuntimeContext } from 'mol-task'; +import UUID from 'mol-util/uuid'; +import { Model } from 'mol-model/structure/model/model'; +import { Entities } from 'mol-model/structure/model/properties/common'; +import { CustomProperties } from 'mol-model/structure/model/properties/custom'; +import { ModelSymmetry } from 'mol-model/structure/model/properties/symmetry'; +import { createAssemblies } from './mmcif/assembly'; +import { getAtomicHierarchyAndConformation } from './mmcif/atomic'; +import { ComponentBond } from './mmcif/bonds'; +import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './mmcif/ihm'; +import { getSecondaryStructureMmCif } from './mmcif/secondary-structure'; +import { getSequence } from './mmcif/sequence'; +import { sortAtomSite } from './mmcif/sort'; +import { StructConn } from './mmcif/bonds/struct_conn'; +import { ChemicalComponent, ChemicalComponentMap } from 'mol-model/structure/model/properties/chemical-component'; +import { ComponentType, getMoleculeType, MoleculeType } from 'mol-model/structure/model/types'; +import { ModelFormat } from './format'; +import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from 'mol-model/structure/structure/carbohydrates/constants'; + +import mmCIF_Format = ModelFormat.mmCIF + +type AtomSite = mmCIF_Database['atom_site'] + +function getSymmetry(format: mmCIF_Format): ModelSymmetry { + const assemblies = createAssemblies(format); + const spacegroup = getSpacegroup(format); + const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup); + return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) }; +} + +function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) { + const { atom_sites } = format.data; + if (atom_sites._rowCount === 0) return false; + // TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix + return false; +} + +function getSpacegroup(format: mmCIF_Format): Spacegroup { + const { symmetry, cell } = format.data; + if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1; + const groupName = symmetry['space_group_name_H-M'].value(0); + const spaceCell = SpacegroupCell.create(groupName, + Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)), + Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180)); + + return Spacegroup.create(spaceCell); +} + +function getNcsOperators(format: mmCIF_Format) { + const { struct_ncs_oper } = format.data; + if (struct_ncs_oper._rowCount === 0) return void 0; + const { id, matrix, vector } = struct_ncs_oper; + + const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space; + + const opers: SymmetryOperator[] = []; + for (let i = 0; i < struct_ncs_oper._rowCount; i++) { + const m = Tensor.toMat3(matrixSpace, matrix.value(i)); + const v = Tensor.toVec3(vectorSpace, vector.value(i)); + if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue; + opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${id.value(i)}`, m, v); + } + return opers; +} +function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] { + const data = format.data.pdbx_struct_mod_residue; + const parentId = new Map<string, string>(); + const details = new Map<string, string>(); + const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id; + const parent_id = data.parent_comp_id, details_data = data.details; + + for (let i = 0; i < data._rowCount; i++) { + const id = comp_id.value(i); + parentId.set(id, parent_id.value(i)); + details.set(id, details_data.value(i)); + } + + return { parentId, details }; +} + +function getChemicalComponentMap(format: mmCIF_Format): ChemicalComponentMap { + const map = new Map<string, ChemicalComponent>(); + const { id, type, name, pdbx_synonyms, formula, formula_weight } = format.data.chem_comp + for (let i = 0, il = id.rowCount; i < il; ++i) { + const _id = id.value(i) + const _type = type.value(i) + const cc: ChemicalComponent = { + id: _id, + type: ComponentType[_type], + moleculeType: getMoleculeType(_type, _id), + name: name.value(i), + synonyms: pdbx_synonyms.value(i), + formula: formula.value(i), + formulaWeight: formula_weight.value(i), + } + map.set(_id, cc) + } + return map +} + +function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { + const map = new Map<string, SaccharideComponent>(); + const { pdbx_chem_comp_identifier } = format.data + if (pdbx_chem_comp_identifier._rowCount > 0) { + const { comp_id, type, identifier } = pdbx_chem_comp_identifier + for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) { + if (type.value(i) === 'SNFG CARB SYMBOL') { + const snfgName = identifier.value(i) + const saccharideComp = SaccharidesSnfgMap.get(snfgName) + if (saccharideComp) { + map.set(comp_id.value(i), saccharideComp) + } else { + console.warn(`Unknown SNFG name '${snfgName}'`) + } + } + } + } else if (format.data.chem_comp._rowCount > 0) { + const { id, type } = format.data.chem_comp + for (let i = 0, il = id.rowCount; i < il; ++i) { + const _id = id.value(i) + const _type = type.value(i) + if (SaccharideCompIdMap.has(_id)) { + map.set(_id, SaccharideCompIdMap.get(_id)!) + } else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) { + map.set(_id, UnknownSaccharideComponent) + } + } + } else { + // TODO check if present in format.data.atom_site.label_comp_id + SaccharideCompIdMap.forEach((v, k) => map.set(k, v)) + } + return map +} + +export interface FormatData { + modifiedResidues: Model['properties']['modifiedResidues'] + chemicalComponentMap: Model['properties']['chemicalComponentMap'] + saccharideComponentMap: Model['properties']['saccharideComponentMap'] +} + +function getFormatData(format: mmCIF_Format): FormatData { + return { + modifiedResidues: getModifiedResidueNameMap(format), + chemicalComponentMap: getChemicalComponentMap(format), + saccharideComponentMap: getSaccharideComponentMap(format) + } +} + +function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model { + const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous); + if (previous && atomic.sameAsPrevious) { + return { + ...previous, + id: UUID.create22(), + modelNum: atom_site.pdbx_PDB_model_num.value(0), + atomicConformation: atomic.conformation, + _dynamicPropertyData: Object.create(null) + }; + } + + const coarse = EmptyIHMCoarse; + const label = format.data.entry.id.valueKind(0) === Column.ValueKind.Present + ? format.data.entry.id.value(0) + : format.data._name; + + return { + id: UUID.create22(), + label, + sourceData: format, + modelNum: atom_site.pdbx_PDB_model_num.value(0), + entities, + symmetry: getSymmetry(format), + sequence: getSequence(format.data, entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + atomicHierarchy: atomic.hierarchy, + atomicConformation: atomic.conformation, + coarseHierarchy: coarse.hierarchy, + coarseConformation: coarse.conformation, + properties: { + secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), + ...formatData + }, + customProperties: new CustomProperties(), + _staticPropertyData: Object.create(null), + _dynamicPropertyData: Object.create(null) + }; +} + +function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model { + const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData); + const coarse = getIHMCoarse(data, formatData); + + return { + id: UUID.create22(), + label: data.model_name, + sourceData: format, + modelNum: data.model_id, + entities: data.entities, + symmetry: getSymmetry(format), + sequence: getSequence(format.data, data.entities, atomic.hierarchy, formatData.modifiedResidues.parentId), + atomicHierarchy: atomic.hierarchy, + atomicConformation: atomic.conformation, + coarseHierarchy: coarse.hierarchy, + coarseConformation: coarse.conformation, + properties: { + secondaryStructure: getSecondaryStructureMmCif(format.data, atomic.hierarchy), + ...formatData + }, + customProperties: new CustomProperties(), + _staticPropertyData: Object.create(null), + _dynamicPropertyData: Object.create(null) + }; +} + +function attachProps(model: Model) { + ComponentBond.attachFromMmCif(model); + StructConn.attachFromMmCif(model); +} + +function findModelEnd(num: Column<number>, startIndex: number) { + const rowCount = num.rowCount; + if (!num.isDefined) return rowCount; + let endIndex = startIndex + 1; + while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++; + return endIndex; +} + +async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { + const atomCount = format.data.atom_site._rowCount; + const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; + + const models: Model[] = []; + let modelStart = 0; + while (modelStart < atomCount) { + const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart); + const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); + const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); + attachProps(model); + models.push(model); + modelStart = modelEnd; + } + return models; +} + +function splitTable<T extends Table<any>>(table: T, col: Column<number>) { + const ret = new Map<number, T>() + const rowCount = table._rowCount; + let modelStart = 0; + while (modelStart < rowCount) { + const modelEnd = findModelEnd(col, modelStart); + const id = col.value(modelStart); + const window = Table.window(table, table._schema, modelStart, modelEnd) as T; + ret.set(id, window); + modelStart = modelEnd; + } + return ret; +} + +async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) { + const { ihm_model_list } = format.data; + const entities: Entities = { data: format.data.entity, getEntityIndex: Column.createIndexer(format.data.entity.id) }; + + if (!format.data.atom_site.ihm_model_id.isDefined) { + throw new Error('expected _atom_site.ihm_model_id to be defined') + } + + // TODO: will IHM require sorting or will we trust it? + const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id); + const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id); + const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id); + + const models: Model[] = []; + + const { model_id, model_name } = ihm_model_list; + for (let i = 0; i < ihm_model_list._rowCount; i++) { + const id = model_id.value(i); + const data: IHMData = { + model_id: id, + model_name: model_name.value(i), + entities: entities, + atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0), + ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), + ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) + }; + const model = createModelIHM(format, data, formatData); + attachProps(model); + models.push(model); + } + + return models; +} + +function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> { + const formatData = getFormatData(format) + return Task.create('Create mmCIF Model', async ctx => { + const isIHM = format.data.ihm_model_list._rowCount > 0; + return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData); + }); +} + +export default buildModels; \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/assembly.ts b/src/mol-model-parsers/structure/mmcif/assembly.ts new file mode 100644 index 0000000000000000000000000000000000000000..ad0fa39b9b9cf687478b2d6b5fc6730b4ea5bc03 --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/assembly.ts @@ -0,0 +1,150 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Mat4, Tensor } from 'mol-math/linear-algebra' +import { SymmetryOperator } from 'mol-math/geometry/symmetry-operator' +import { Assembly, OperatorGroup, OperatorGroups } from 'mol-model/structure/model/properties/symmetry' +import { Queries as Q } from 'mol-model/structure' +import { StructureProperties } from 'mol-model/structure'; +import { ModelFormat } from '../format'; +import mmCIF_Format = ModelFormat.mmCIF + +export function createAssemblies(format: mmCIF_Format): ReadonlyArray<Assembly> { + const { pdbx_struct_assembly } = format.data; + if (!pdbx_struct_assembly._rowCount) return []; + + const matrices = getMatrices(format); + const assemblies: Assembly[] = []; + for (let i = 0; i < pdbx_struct_assembly._rowCount; i++) { + assemblies[assemblies.length] = createAssembly(format, i, matrices); + } + return assemblies; +} + +type Matrices = Map<string, Mat4> +type Generator = { assemblyId: string, expression: string, asymIds: string[] } + +function createAssembly(format: mmCIF_Format, index: number, matrices: Matrices): Assembly { + const { pdbx_struct_assembly, pdbx_struct_assembly_gen } = format.data; + + const id = pdbx_struct_assembly.id.value(index); + const details = pdbx_struct_assembly.details.value(index); + const generators: Generator[] = []; + + const { assembly_id, oper_expression, asym_id_list } = pdbx_struct_assembly_gen; + + for (let i = 0, _i = pdbx_struct_assembly_gen._rowCount; i < _i; i++) { + if (assembly_id.value(i) !== id) continue; + generators[generators.length] = { + assemblyId: id, + expression: oper_expression.value(i), + asymIds: asym_id_list.value(i) + }; + } + + return Assembly.create(id, details, operatorGroupsProvider(generators, matrices)); +} + +function operatorGroupsProvider(generators: Generator[], matrices: Matrices): () => OperatorGroups { + return () => { + const groups: OperatorGroup[] = []; + + let operatorOffset = 0; + for (let i = 0; i < generators.length; i++) { + const gen = generators[i]; + const operatorList = parseOperatorList(gen.expression); + const operatorNames = expandOperators(operatorList); + const operators = getAssemblyOperators(matrices, operatorNames, operatorOffset, gen.assemblyId); + const selector = Q.generators.atoms({ chainTest: Q.pred.and( + Q.pred.eq(ctx => StructureProperties.unit.operator_name(ctx.element), SymmetryOperator.DefaultName), + Q.pred.inSet(ctx => StructureProperties.chain.label_asym_id(ctx.element), gen.asymIds) + )}); + groups[groups.length] = { selector, operators }; + operatorOffset += operators.length; + } + + return groups; + } +} + +function getMatrices({ data }: mmCIF_Format): Matrices { + const { pdbx_struct_oper_list } = data; + const { id, matrix, vector, _schema } = pdbx_struct_oper_list; + const matrices = new Map<string, Mat4>(); + + for (let i = 0, _i = pdbx_struct_oper_list._rowCount; i < _i; i++) { + const m = Tensor.toMat4(_schema.matrix.space, matrix.value(i)); + const t = Tensor.toVec3(_schema.vector.space, vector.value(i)); + Mat4.setTranslation(m, t); + Mat4.setValue(m, 3, 3, 1); + matrices.set(id.value(i), m); + } + + return matrices; +} + +function expandOperators(operatorList: string[][]) { + const ops: string[][] = []; + const currentOp: string[] = []; + for (let i = 0; i < operatorList.length; i++) currentOp[i] = ''; + expandOperators1(operatorList, ops, operatorList.length - 1, currentOp); + return ops; +} + +function expandOperators1(operatorNames: string[][], list: string[][], i: number, current: string[]) { + if (i < 0) { + list[list.length] = current.slice(0); + return; + } + + let ops = operatorNames[i], len = ops.length; + for (let j = 0; j < len; j++) { + current[i] = ops[j]; + expandOperators1(operatorNames, list, i - 1, current); + } +} + +function getAssemblyOperators(matrices: Matrices, operatorNames: string[][], startIndex: number, assemblyId: string) { + const operators: SymmetryOperator[] = []; + + let index = startIndex; + for (let op of operatorNames) { + let m = Mat4.identity(); + for (let i = 0; i < op.length; i++) { + Mat4.mul(m, m, matrices.get(op[i])!); + } + index++ + operators[operators.length] = SymmetryOperator.create(`A-${index}`, m, { id: assemblyId, operList: op }); + } + + return operators; +} + +function parseOperatorList(value: string): string[][] { + // '(X0)(1-5)' becomes [['X0'], ['1', '2', '3', '4', '5']] + // kudos to Glen van Ginkel. + + const oeRegex = /\(?([^\(\)]+)\)?]*/g, groups: string[] = [], ret: string[][] = []; + + let g: any; + while (g = oeRegex.exec(value)) groups[groups.length] = g[1]; + + groups.forEach(g => { + const group: string[] = []; + g.split(',').forEach(e => { + const dashIndex = e.indexOf('-'); + if (dashIndex > 0) { + const from = parseInt(e.substring(0, dashIndex)), to = parseInt(e.substr(dashIndex + 1)); + for (let i = from; i <= to; i++) group[group.length] = i.toString(); + } else { + group[group.length] = e.trim(); + } + }); + ret[ret.length] = group; + }); + + return ret; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/atomic.ts b/src/mol-model-parsers/structure/mmcif/atomic.ts new file mode 100644 index 0000000000000000000000000000000000000000..259933942c37b3615eae0ff08ae789b3ade7775e --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/atomic.ts @@ -0,0 +1,107 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Column, Table } from 'mol-data/db'; +import { Interval, Segmentation } from 'mol-data/int'; +import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; +import UUID from 'mol-util/uuid'; +import { ElementIndex } from 'mol-model/structure'; +import { Model } from 'mol-model/structure/model/model'; +import { AtomicConformation, AtomicData, AtomicHierarchy, AtomicSegments, AtomsSchema, ChainsSchema, ResiduesSchema } from 'mol-model/structure/model/properties/atomic'; +import { getAtomicIndex } from 'mol-model/structure/model/properties/utils/atomic-index'; +import { ElementSymbol } from 'mol-model/structure/model/types'; +import { Entities } from 'mol-model/structure/model/properties/common'; +import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges'; +import { FormatData } from '../mmcif'; +import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived'; +import { ModelFormat } from '../format'; +import mmCIF_Format = ModelFormat.mmCIF + + +type AtomSite = mmCIF_Database['atom_site'] + +function findHierarchyOffsets(atom_site: AtomSite) { + if (atom_site._rowCount === 0) return { residues: [], chains: [] }; + + const start = 0, end = atom_site._rowCount; + const residues = [start as ElementIndex], chains = [start as ElementIndex]; + + const { label_entity_id, label_asym_id, label_seq_id, auth_seq_id, pdbx_PDB_ins_code, label_comp_id } = atom_site; + + for (let i = start + 1 as ElementIndex; i < end; i++) { + const newChain = !label_entity_id.areValuesEqual(i - 1, i) || !label_asym_id.areValuesEqual(i - 1, i); + const newResidue = newChain + || !label_seq_id.areValuesEqual(i - 1, i) + || !auth_seq_id.areValuesEqual(i - 1, i) + || !pdbx_PDB_ins_code.areValuesEqual(i - 1, i) + || !label_comp_id.areValuesEqual(i - 1, i); + + if (newResidue) residues[residues.length] = i as ElementIndex; + if (newChain) chains[chains.length] = i as ElementIndex; + } + return { residues, chains }; +} + +function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData { + const atoms = Table.ofColumns(AtomsSchema, { + type_symbol: Column.ofArray({ array: Column.mapToArray(atom_site.type_symbol, ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }), + label_atom_id: atom_site.label_atom_id, + auth_atom_id: atom_site.auth_atom_id, + label_alt_id: atom_site.label_alt_id, + pdbx_formal_charge: atom_site.pdbx_formal_charge + }); + const residues = Table.view(atom_site, ResiduesSchema, offsets.residues); + // Optimize the numeric columns + Table.columnToArray(residues, 'label_seq_id', Int32Array); + Table.columnToArray(residues, 'auth_seq_id', Int32Array); + const chains = Table.view(atom_site, ChainsSchema, offsets.chains); + return { atoms, residues, chains }; +} + +function getConformation(atom_site: AtomSite): AtomicConformation { + return { + id: UUID.create22(), + atomId: atom_site.id, + occupancy: atom_site.occupancy, + B_iso_or_equiv: atom_site.B_iso_or_equiv, + x: atom_site.Cartn_x.toArray({ array: Float32Array }), + y: atom_site.Cartn_y.toArray({ array: Float32Array }), + z: atom_site.Cartn_z.toArray({ array: Float32Array }), + } +} + +function isHierarchyDataEqual(a: AtomicData, b: AtomicData) { + // TODO need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300 + return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>) + && Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>) + && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>) +} + +export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) { + const hierarchyOffsets = findHierarchyOffsets(atom_site); + const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets); + + if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) { + return { + sameAsPrevious: true, + hierarchy: previous.atomicHierarchy, + conformation: getConformation(atom_site) + }; + } + + const conformation = getConformation(atom_site) + + const hierarchySegments: AtomicSegments = { + residueAtomSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, Interval.ofBounds(0, atom_site._rowCount)), + chainAtomSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, Interval.ofBounds(0, atom_site._rowCount)), + } + + const index = getAtomicIndex(hierarchyData, entities, hierarchySegments); + const derived = getAtomicDerivedData(hierarchyData, index, formatData.chemicalComponentMap); + const hierarchyRanges = getAtomicRanges(hierarchyData, hierarchySegments, conformation, formatData.chemicalComponentMap); + const hierarchy: AtomicHierarchy = { ...hierarchyData, ...hierarchySegments, ...hierarchyRanges, index, derived }; + return { sameAsPrevious: false, hierarchy, conformation }; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/bonds.ts b/src/mol-model-parsers/structure/mmcif/bonds.ts new file mode 100644 index 0000000000000000000000000000000000000000..0f69970236251826f5c529d481726b6d2c82b8fc --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/bonds.ts @@ -0,0 +1,9 @@ +/** + * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +export * from './bonds/comp' +export * from './bonds/struct_conn' \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/bonds/comp.ts b/src/mol-model-parsers/structure/mmcif/bonds/comp.ts new file mode 100644 index 0000000000000000000000000000000000000000..eceb64760607d51d544a8a9a587a46471bd9334d --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/bonds/comp.ts @@ -0,0 +1,164 @@ +/** + * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Model } from 'mol-model/structure/model/model' +import { LinkType } from 'mol-model/structure/model/types' +import { ModelPropertyDescriptor } from 'mol-model/structure/model/properties/custom'; +import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; +import { Structure, Unit, StructureProperties, StructureElement } from 'mol-model/structure'; +import { Segmentation } from 'mol-data/int'; +import { CifWriter } from 'mol-io/writer/cif' + +export interface ComponentBond { + entries: Map<string, ComponentBond.Entry> +} + +export namespace ComponentBond { + export const Descriptor: ModelPropertyDescriptor = { + isStatic: true, + name: 'chem_comp_bond', + cifExport: { + prefix: '', + categories: [{ + name: 'chem_comp_bond', + instance(ctx) { + const chem_comp_bond = getChemCompBond(ctx.structures[0].model); + if (!chem_comp_bond) return CifWriter.Category.Empty; + + const comp_names = getUniqueResidueNames(ctx.structures[0]); + const { comp_id, _rowCount } = chem_comp_bond; + const indices: number[] = []; + for (let i = 0; i < _rowCount; i++) { + if (comp_names.has(comp_id.value(i))) indices[indices.length] = i; + } + + return CifWriter.Category.ofTable(chem_comp_bond, indices) + } + }] + } + } + + export function attachFromMmCif(model: Model): boolean { + if (model.customProperties.has(Descriptor)) return true; + if (model.sourceData.kind !== 'mmCIF') return false; + const { chem_comp_bond } = model.sourceData.data; + if (chem_comp_bond._rowCount === 0) return false; + + model.customProperties.add(Descriptor); + model._staticPropertyData.__ComponentBondData__ = chem_comp_bond; + return true; + } + + export function attachFromExternalData(model: Model, bonds: ComponentBond, force = false) { + if (!force && model.customProperties.has(Descriptor)) return true; + if (model._staticPropertyData.__ComponentBondData__) delete model._staticPropertyData.__ComponentBondData__; + model.customProperties.add(Descriptor); + model._staticPropertyData[PropName] = bonds; + return true; + } + + export class ComponentBondImpl implements ComponentBond { + entries: Map<string, ComponentBond.Entry> = new Map(); + + addEntry(id: string) { + let e = new Entry(id); + this.entries.set(id, e); + return e; + } + } + + export class Entry { + map: Map<string, Map<string, { order: number, flags: number }>> = new Map(); + + add(a: string, b: string, order: number, flags: number, swap = true) { + let e = this.map.get(a); + if (e !== void 0) { + let f = e.get(b); + if (f === void 0) { + e.set(b, { order, flags }); + } + } else { + let map = new Map<string, { order: number, flags: number }>(); + map.set(b, { order, flags }); + this.map.set(a, map); + } + + if (swap) this.add(b, a, order, flags, false); + } + + constructor(public id: string) { + } + } + + export function parseChemCompBond(data: mmCIF_Database['chem_comp_bond']): ComponentBond { + const { comp_id, atom_id_1, atom_id_2, value_order, pdbx_aromatic_flag, _rowCount: rowCount } = data; + + const compBond = new ComponentBondImpl(); + let entry = compBond.addEntry(comp_id.value(0)!); + for (let i = 0; i < rowCount; i++) { + const id = comp_id.value(i)!; + const nameA = atom_id_1.value(i)!; + const nameB = atom_id_2.value(i)!; + const order = value_order.value(i)!; + const aromatic = pdbx_aromatic_flag.value(i) === 'Y'; + + if (entry.id !== id) { + entry = compBond.addEntry(id); + } + + let flags: number = LinkType.Flag.Covalent; + let ord = 1; + if (aromatic) flags |= LinkType.Flag.Aromatic; + switch (order.toLowerCase()) { + case 'doub': + case 'delo': + ord = 2; + break; + case 'trip': ord = 3; break; + case 'quad': ord = 4; break; + } + + entry.add(nameA, nameB, ord, flags); + } + + return compBond; + } + + function getChemCompBond(model: Model) { + return model._staticPropertyData.__ComponentBondData__ as mmCIF_Database['chem_comp_bond']; + } + + export const PropName = '__ComponentBond__'; + export function get(model: Model): ComponentBond | undefined { + if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName]; + if (!model.customProperties.has(Descriptor)) return void 0; + + const chem_comp_bond = getChemCompBond(model); + if (!chem_comp_bond) return void 0; + + const chemComp = parseChemCompBond(chem_comp_bond); + model._staticPropertyData[PropName] = chemComp; + return chemComp; + } + + function getUniqueResidueNames(s: Structure) { + const prop = StructureProperties.residue.label_comp_id; + const names = new Set<string>(); + const loc = StructureElement.create(); + for (const unit of s.units) { + if (!Unit.isAtomic(unit)) continue; + const residues = Segmentation.transientSegments(unit.model.atomicHierarchy.residueAtomSegments, unit.elements); + loc.unit = unit; + while (residues.hasNext) { + const seg = residues.move(); + loc.element = unit.elements[seg.start]; + names.add(prop(loc)); + } + } + return names; + } +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/bonds/struct_conn.ts b/src/mol-model-parsers/structure/mmcif/bonds/struct_conn.ts new file mode 100644 index 0000000000000000000000000000000000000000..21a5b9e57f6b076c6b7184738dee962ecd58164d --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/bonds/struct_conn.ts @@ -0,0 +1,249 @@ +/** + * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Model } from 'mol-model/structure/model/model' +import { Structure } from 'mol-model/structure' +import { LinkType } from 'mol-model/structure/model/types' +import { findEntityIdByAsymId, findAtomIndexByLabelName } from '../util' +import { Column } from 'mol-data/db' +import { ModelPropertyDescriptor } from 'mol-model/structure/model/properties/custom'; +import { mmCIF_Database, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { SortedArray } from 'mol-data/int'; +import { CifWriter } from 'mol-io/writer/cif' +import { ElementIndex, ResidueIndex } from 'mol-model/structure/model/indexing'; + +export interface StructConn { + getResidueEntries(residueAIndex: ResidueIndex, residueBIndex: ResidueIndex): ReadonlyArray<StructConn.Entry>, + getAtomEntries(atomIndex: ElementIndex): ReadonlyArray<StructConn.Entry>, + readonly entries: ReadonlyArray<StructConn.Entry> +} + +export namespace StructConn { + export const Descriptor: ModelPropertyDescriptor = { + isStatic: true, + name: 'struct_conn', + cifExport: { + prefix: '', + categories: [{ + name: 'struct_conn', + instance(ctx) { + const structure = ctx.structures[0], model = structure.model; + const struct_conn = getStructConn(model); + if (!struct_conn) return CifWriter.Category.Empty; + + const strConn = get(model); + if (!strConn || strConn.entries.length === 0) return CifWriter.Category.Empty; + + const foundAtoms = new Set<ElementIndex>(); + const indices: number[] = []; + for (const entry of strConn.entries) { + const { partners } = entry; + let hasAll = true; + for (let i = 0, _i = partners.length; i < _i; i++) { + const atom = partners[i].atomIndex; + if (foundAtoms.has(atom)) continue; + if (hasAtom(structure, atom)) { + foundAtoms.add(atom); + } else { + hasAll = false; + break; + } + } + if (hasAll) { + indices[indices.length] = entry.rowIndex; + } + } + + return CifWriter.Category.ofTable(struct_conn, indices); + } + }] + } + } + + function hasAtom({ units }: Structure, element: ElementIndex) { + for (let i = 0, _i = units.length; i < _i; i++) { + if (SortedArray.indexOf(units[i].elements, element) >= 0) return true; + } + return false; + } + + function _resKey(rA: number, rB: number) { + if (rA < rB) return `${rA}-${rB}`; + return `${rB}-${rA}`; + } + const _emptyEntry: Entry[] = []; + + class StructConnImpl implements StructConn { + private _residuePairIndex: Map<string, StructConn.Entry[]> | undefined = void 0; + private _atomIndex: Map<number, StructConn.Entry[]> | undefined = void 0; + + private getResiduePairIndex() { + if (this._residuePairIndex) return this._residuePairIndex; + this._residuePairIndex = new Map(); + for (const e of this.entries) { + const ps = e.partners; + const l = ps.length; + for (let i = 0; i < l - 1; i++) { + for (let j = i + i; j < l; j++) { + const key = _resKey(ps[i].residueIndex, ps[j].residueIndex); + if (this._residuePairIndex.has(key)) { + this._residuePairIndex.get(key)!.push(e); + } else { + this._residuePairIndex.set(key, [e]); + } + } + } + } + return this._residuePairIndex; + } + + private getAtomIndex() { + if (this._atomIndex) return this._atomIndex; + this._atomIndex = new Map(); + for (const e of this.entries) { + for (const p of e.partners) { + const key = p.atomIndex; + if (this._atomIndex.has(key)) { + this._atomIndex.get(key)!.push(e); + } else { + this._atomIndex.set(key, [e]); + } + } + } + return this._atomIndex; + } + + + getResidueEntries(residueAIndex: ResidueIndex, residueBIndex: ResidueIndex): ReadonlyArray<StructConn.Entry> { + return this.getResiduePairIndex().get(_resKey(residueAIndex, residueBIndex)) || _emptyEntry; + } + + getAtomEntries(atomIndex: ElementIndex): ReadonlyArray<StructConn.Entry> { + return this.getAtomIndex().get(atomIndex) || _emptyEntry; + } + + constructor(public entries: StructConn.Entry[]) { + } + } + + export interface Entry { + rowIndex: number, + distance: number, + order: number, + flags: number, + partners: { residueIndex: ResidueIndex, atomIndex: ElementIndex, symmetry: string }[] + } + + type StructConnType = typeof mmCIF_Schema.struct_conn.conn_type_id.T + + export function attachFromMmCif(model: Model): boolean { + if (model.customProperties.has(Descriptor)) return true; + if (model.sourceData.kind !== 'mmCIF') return false; + const { struct_conn } = model.sourceData.data; + if (struct_conn._rowCount === 0) return false; + model.customProperties.add(Descriptor); + model._staticPropertyData.__StructConnData__ = struct_conn; + return true; + } + + function getStructConn(model: Model) { + return model._staticPropertyData.__StructConnData__ as mmCIF_Database['struct_conn']; + } + + export const PropName = '__StructConn__'; + export function get(model: Model): StructConn | undefined { + if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName]; + if (!model.customProperties.has(Descriptor)) return void 0; + + const struct_conn = getStructConn(model); + + const { conn_type_id, pdbx_dist_value, pdbx_value_order } = struct_conn; + const p1 = { + label_asym_id: struct_conn.ptnr1_label_asym_id, + label_seq_id: struct_conn.ptnr1_label_seq_id, + auth_seq_id: struct_conn.ptnr1_auth_seq_id, + label_atom_id: struct_conn.ptnr1_label_atom_id, + label_alt_id: struct_conn.pdbx_ptnr1_label_alt_id, + ins_code: struct_conn.pdbx_ptnr1_PDB_ins_code, + symmetry: struct_conn.ptnr1_symmetry + }; + const p2: typeof p1 = { + label_asym_id: struct_conn.ptnr2_label_asym_id, + label_seq_id: struct_conn.ptnr2_label_seq_id, + auth_seq_id: struct_conn.ptnr2_auth_seq_id, + label_atom_id: struct_conn.ptnr2_label_atom_id, + label_alt_id: struct_conn.pdbx_ptnr2_label_alt_id, + ins_code: struct_conn.pdbx_ptnr2_PDB_ins_code, + symmetry: struct_conn.ptnr2_symmetry + }; + + const _p = (row: number, ps: typeof p1) => { + if (ps.label_asym_id.valueKind(row) !== Column.ValueKind.Present) return void 0; + const asymId = ps.label_asym_id.value(row); + const residueIndex = model.atomicHierarchy.index.findResidue( + findEntityIdByAsymId(model, asymId), + asymId, + ps.auth_seq_id.value(row), + ps.ins_code.value(row) + ); + if (residueIndex < 0) return void 0; + const atomName = ps.label_atom_id.value(row); + // turns out "mismat" records might not have atom name value + if (!atomName) return void 0; + const atomIndex = findAtomIndexByLabelName(model, residueIndex, atomName, ps.label_alt_id.value(row)); + if (atomIndex < 0) return void 0; + return { residueIndex, atomIndex, symmetry: ps.symmetry.value(row) || '1_555' }; + } + + const _ps = (row: number) => { + const ret = []; + let p = _p(row, p1); + if (p) ret.push(p); + p = _p(row, p2); + if (p) ret.push(p); + return ret; + } + + const entries: StructConn.Entry[] = []; + for (let i = 0; i < struct_conn._rowCount; i++) { + const partners = _ps(i); + if (partners.length < 2) continue; + + const type = conn_type_id.value(i)! as StructConnType; + const orderType = (pdbx_value_order.value(i) || '').toLowerCase(); + let flags = LinkType.Flag.None; + let order = 1; + + switch (orderType) { + case 'sing': order = 1; break; + case 'doub': order = 2; break; + case 'trip': order = 3; break; + case 'quad': order = 4; break; + } + + switch (type) { + case 'covale': + case 'covale_base': + case 'covale_phosphate': + case 'covale_sugar': + case 'modres': + flags = LinkType.Flag.Covalent; + break; + case 'disulf': flags = LinkType.Flag.Covalent | LinkType.Flag.Sulfide; break; + case 'hydrog': flags = LinkType.Flag.Hydrogen; break; + case 'metalc': flags = LinkType.Flag.MetallicCoordination; break; + case 'saltbr': flags = LinkType.Flag.Ionic; break; + } + + entries.push({ rowIndex: i, flags, order, distance: pdbx_dist_value.value(i), partners }); + } + + const ret = new StructConnImpl(entries); + model._staticPropertyData[PropName] = ret; + return ret; + } +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/ihm.ts b/src/mol-model-parsers/structure/mmcif/ihm.ts new file mode 100644 index 0000000000000000000000000000000000000000..7ca51db554a8c12a6229e147e971684a451f66a1 --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/ihm.ts @@ -0,0 +1,102 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database as mmCIF, mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif' +import { CoarseHierarchy, CoarseConformation, CoarseElementData, CoarseSphereConformation, CoarseGaussianConformation } from 'mol-model/structure/model/properties/coarse' +import { Entities } from 'mol-model/structure/model/properties/common'; +import { Column } from 'mol-data/db'; +import { getCoarseKeys } from 'mol-model/structure/model/properties/utils/coarse-keys'; +import { UUID } from 'mol-util'; +import { Segmentation, Interval } from 'mol-data/int'; +import { Mat3, Tensor } from 'mol-math/linear-algebra'; +import { ElementIndex, ChainIndex } from 'mol-model/structure/model/indexing'; +import { getCoarseRanges } from 'mol-model/structure/model/properties/utils/coarse-ranges'; +import { FormatData } from '../mmcif'; + +export interface IHMData { + model_id: number, + model_name: string, + entities: Entities, + atom_site: mmCIF['atom_site'], + ihm_sphere_obj_site: mmCIF['ihm_sphere_obj_site'], + ihm_gaussian_obj_site: mmCIF['ihm_gaussian_obj_site'] +} + +export const EmptyIHMCoarse = { hierarchy: CoarseHierarchy.Empty, conformation: void 0 as any } + +export function getIHMCoarse(data: IHMData, formatData: FormatData): { hierarchy: CoarseHierarchy, conformation: CoarseConformation } { + const { ihm_sphere_obj_site, ihm_gaussian_obj_site } = data; + + if (ihm_sphere_obj_site._rowCount === 0 && ihm_gaussian_obj_site._rowCount === 0) return EmptyIHMCoarse; + + const sphereData = getData(ihm_sphere_obj_site); + const sphereConformation = getSphereConformation(ihm_sphere_obj_site); + const sphereKeys = getCoarseKeys(sphereData, data.entities); + const sphereRanges = getCoarseRanges(sphereData, formatData.chemicalComponentMap); + + const gaussianData = getData(ihm_gaussian_obj_site); + const gaussianConformation = getGaussianConformation(ihm_gaussian_obj_site); + const gaussianKeys = getCoarseKeys(gaussianData, data.entities); + const gaussianRanges = getCoarseRanges(gaussianData, formatData.chemicalComponentMap); + + return { + hierarchy: { + isDefined: true, + spheres: { ...sphereData, ...sphereKeys, ...sphereRanges }, + gaussians: { ...gaussianData, ...gaussianKeys, ...gaussianRanges }, + }, + conformation: { + id: UUID.create22(), + spheres: sphereConformation, + gaussians: gaussianConformation + } + }; +} + +function getSphereConformation(data: mmCIF['ihm_sphere_obj_site']): CoarseSphereConformation { + return { + x: data.Cartn_x.toArray({ array: Float32Array }), + y: data.Cartn_y.toArray({ array: Float32Array }), + z: data.Cartn_z.toArray({ array: Float32Array }), + radius: data.object_radius.toArray({ array: Float32Array }), + rmsf: data.rmsf.toArray({ array: Float32Array }) + }; +} + +function getGaussianConformation(data: mmCIF['ihm_gaussian_obj_site']): CoarseGaussianConformation { + const matrix_space = mmCIF_Schema.ihm_gaussian_obj_site.covariance_matrix.space; + const covariance_matrix: Mat3[] = []; + const { covariance_matrix: cm } = data; + + for (let i = 0, _i = cm.rowCount; i < _i; i++) { + covariance_matrix[i] = Tensor.toMat3(matrix_space, cm.value(i)); + } + + return { + x: data.mean_Cartn_x.toArray({ array: Float32Array }), + y: data.mean_Cartn_y.toArray({ array: Float32Array }), + z: data.mean_Cartn_z.toArray({ array: Float32Array }), + weight: data.weight.toArray({ array: Float32Array }), + covariance_matrix + }; +} + +function getSegments(asym_id: Column<string>, seq_id_begin: Column<number>, seq_id_end: Column<number>) { + const chainOffsets = [0 as ElementIndex]; + for (let i = 1, _i = asym_id.rowCount; i < _i; i++) { + const newChain = !asym_id.areValuesEqual(i - 1, i); + if (newChain) chainOffsets[chainOffsets.length] = i as ElementIndex; + } + + return { + chainElementSegments: Segmentation.ofOffsets<ElementIndex, ChainIndex>(chainOffsets, Interval.ofBounds(0, asym_id.rowCount)) + } +} + +function getData(data: mmCIF['ihm_sphere_obj_site'] | mmCIF['ihm_gaussian_obj_site']): CoarseElementData { + const { entity_id, seq_id_begin, seq_id_end, asym_id } = data; + return { count: entity_id.rowCount, entity_id, asym_id, seq_id_begin, seq_id_end, ...getSegments(asym_id, seq_id_begin, seq_id_end) }; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/pair-restraint.ts b/src/mol-model-parsers/structure/mmcif/pair-restraint.ts new file mode 100644 index 0000000000000000000000000000000000000000..40cec49d41786cc3576f29bca18c357e87cb715e --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/pair-restraint.ts @@ -0,0 +1,8 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +export * from './pair-restraints/cross-links' +// export * from './pair-restraints/predicted-contacts' \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/pair-restraints/cross-links.ts b/src/mol-model-parsers/structure/mmcif/pair-restraints/cross-links.ts new file mode 100644 index 0000000000000000000000000000000000000000..2964745e9b5723e3621f97551592d9961e1277df --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/pair-restraints/cross-links.ts @@ -0,0 +1,107 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Model } from 'mol-model/structure/model/model' +import { Table } from 'mol-data/db' +import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { findAtomIndexByLabelName } from '../util'; +import { Unit } from 'mol-model/structure'; +import { ElementIndex } from 'mol-model/structure/model/indexing'; + +function findAtomIndex(model: Model, entityId: string, asymId: string, seqId: number, atomId: string) { + if (!model.atomicHierarchy.atoms.auth_atom_id.isDefined) return -1 + const residueIndex = model.atomicHierarchy.index.findResidue(entityId, asymId, seqId) + if (residueIndex < 0) return -1 + return findAtomIndexByLabelName(model, residueIndex, atomId, '') as ElementIndex +} + +export interface IHMCrossLinkRestraint { + getIndicesByElement: (element: ElementIndex, kind: Unit.Kind) => number[] + data: Table<mmCIF_Schema['ihm_cross_link_restraint']> +} + +export namespace IHMCrossLinkRestraint { + export const PropName = '__CrossLinkRestraint__'; + export function fromModel(model: Model): IHMCrossLinkRestraint | undefined { + if (model._staticPropertyData[PropName]) return model._staticPropertyData[PropName] + + if (model.sourceData.kind !== 'mmCIF') return + const { ihm_cross_link_restraint } = model.sourceData.data; + if (!ihm_cross_link_restraint._rowCount) return + + const p1 = { + entity_id: ihm_cross_link_restraint.entity_id_1, + asym_id: ihm_cross_link_restraint.asym_id_1, + seq_id: ihm_cross_link_restraint.seq_id_1, + atom_id: ihm_cross_link_restraint.atom_id_1, + } + + const p2: typeof p1 = { + entity_id: ihm_cross_link_restraint.entity_id_2, + asym_id: ihm_cross_link_restraint.asym_id_2, + seq_id: ihm_cross_link_restraint.seq_id_2, + atom_id: ihm_cross_link_restraint.atom_id_2, + } + + function _add(map: Map<ElementIndex, number[]>, element: ElementIndex, row: number) { + const indices = map.get(element) + if (indices) indices.push(row) + else map.set(element, [ row ]) + } + + function add(row: number, ps: typeof p1) { + const entityId = ps.entity_id.value(row) + const asymId = ps.asym_id.value(row) + const seqId = ps.seq_id.value(row) + + if (ihm_cross_link_restraint.model_granularity.value(row) === 'by-atom') { + const atomicElement = findAtomIndex(model, entityId, asymId, seqId, ps.atom_id.value(row)) + if (atomicElement >= 0) _add(atomicElementMap, atomicElement as ElementIndex, row) + } else if (model.coarseHierarchy.isDefined) { + const sphereElement = model.coarseHierarchy.spheres.findSequenceKey(entityId, asymId, seqId) + if (sphereElement >= 0) { + _add(sphereElementMap, sphereElement, row) + } else { + const gaussianElement = model.coarseHierarchy.gaussians.findSequenceKey(entityId, asymId, seqId) + if (gaussianElement >= 0) _add(gaussianElementMap, gaussianElement, row) + } + } + } + + function getMapByKind(kind: Unit.Kind) { + switch (kind) { + case Unit.Kind.Atomic: return atomicElementMap; + case Unit.Kind.Spheres: return sphereElementMap; + case Unit.Kind.Gaussians: return gaussianElementMap; + } + } + + /** map from atomic element to cross link indices */ + const atomicElementMap: Map<ElementIndex, number[]> = new Map() + /** map from sphere element to cross link indices */ + const sphereElementMap: Map<ElementIndex, number[]> = new Map() + /** map from gaussian element to cross link indices */ + const gaussianElementMap: Map<ElementIndex, number[]> = new Map() + + const emptyIndexArray: number[] = []; + + for (let i = 0; i < ihm_cross_link_restraint._rowCount; ++i) { + add(i, p1) + add(i, p2) + } + + const crossLinkRestraint = { + getIndicesByElement: (element: ElementIndex, kind: Unit.Kind) => { + const map = getMapByKind(kind) + const idx = map.get(element) + return idx !== undefined ? idx : emptyIndexArray + }, + data: ihm_cross_link_restraint + } + model._staticPropertyData[PropName] = crossLinkRestraint + return crossLinkRestraint + } +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/pair-restraints/predicted-contacts.ts b/src/mol-model-parsers/structure/mmcif/pair-restraints/predicted-contacts.ts new file mode 100644 index 0000000000000000000000000000000000000000..d736eabdf2c073838585736ba735813d76143d75 --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/pair-restraints/predicted-contacts.ts @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +// TODO +// ihm_predicted_contact_restraint: { +// id: int, +// entity_id_1: str, +// entity_id_2: str, +// asym_id_1: str, +// asym_id_2: str, +// comp_id_1: str, +// comp_id_2: str, +// seq_id_1: int, +// seq_id_2: int, +// atom_id_1: str, +// atom_id_2: str, +// distance_upper_limit: float, +// probability: float, +// restraint_type: Aliased<'lower bound' | 'upper bound' | 'lower and upper bound'>(str), +// model_granularity: Aliased<'by-residue' | 'by-feature' | 'by-atom'>(str), +// dataset_list_id: int, +// software_id: int, +// }, diff --git a/src/mol-model-parsers/structure/mmcif/secondary-structure.ts b/src/mol-model-parsers/structure/mmcif/secondary-structure.ts new file mode 100644 index 0000000000000000000000000000000000000000..3c0def78910279781a359199a4a06a0e841dad20 --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/secondary-structure.ts @@ -0,0 +1,175 @@ + +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database as mmCIF, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif' +import { SecondaryStructureType } from 'mol-model/structure/model/types'; +import { AtomicHierarchy } from 'mol-model/structure/model/properties/atomic'; +import { SecondaryStructure } from 'mol-model/structure/model/properties/seconday-structure'; +import { Column } from 'mol-data/db'; +import { ChainIndex, ResidueIndex } from 'mol-model/structure/model/indexing'; + +export function getSecondaryStructureMmCif(data: mmCIF_Database, hierarchy: AtomicHierarchy): SecondaryStructure { + const map: SecondaryStructureMap = new Map(); + const elements: SecondaryStructure.Element[] = [{ kind: 'none' }]; + addHelices(data.struct_conf, map, elements); + // must add Helices 1st because of 'key' value assignment. + addSheets(data.struct_sheet_range, map, data.struct_conf._rowCount, elements); + + const secStruct: SecondaryStructureData = { + type: new Int32Array(hierarchy.residues._rowCount) as any, + key: new Int32Array(hierarchy.residues._rowCount) as any, + elements + }; + + if (map.size > 0) assignSecondaryStructureRanges(hierarchy, map, secStruct); + return secStruct; +} + +type SecondaryStructureEntry = { + startSeqNumber: number, + startInsCode: string | null, + endSeqNumber: number, + endInsCode: string | null, + type: SecondaryStructureType, + key: number +} +type SecondaryStructureMap = Map<string, Map<number, SecondaryStructureEntry>> +type SecondaryStructureData = { type: SecondaryStructureType[], key: number[], elements: SecondaryStructure.Element[] } + +function addHelices(cat: mmCIF['struct_conf'], map: SecondaryStructureMap, elements: SecondaryStructure.Element[]) { + if (!cat._rowCount) return; + + const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat; + const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat; + const { pdbx_PDB_helix_class, conf_type_id, details } = cat; + + for (let i = 0, _i = cat._rowCount; i < _i; i++) { + const type = SecondaryStructureType.create(pdbx_PDB_helix_class.valueKind(i) === Column.ValueKind.Present + ? SecondaryStructureType.SecondaryStructurePdb[pdbx_PDB_helix_class.value(i)] + : conf_type_id.valueKind(i) === Column.ValueKind.Present + ? SecondaryStructureType.SecondaryStructureMmcif[conf_type_id.value(i)] + : SecondaryStructureType.Flag.NA); + + const element: SecondaryStructure.Helix = { + kind: 'helix', + flags: type, + type_id: conf_type_id.valueKind(i) === Column.ValueKind.Present ? conf_type_id.value(i) : 'HELIX_P', + helix_class: pdbx_PDB_helix_class.value(i), + details: details.valueKind(i) === Column.ValueKind.Present ? details.value(i) : void 0 + }; + const entry: SecondaryStructureEntry = { + startSeqNumber: beg_label_seq_id.value(i), + startInsCode: pdbx_beg_PDB_ins_code.value(i), + endSeqNumber: end_label_seq_id.value(i), + endInsCode: pdbx_end_PDB_ins_code.value(i), + type, + key: elements.length + }; + + + elements[elements.length] = element; + + const asymId = beg_label_asym_id.value(i)!; + if (map.has(asymId)) { + map.get(asymId)!.set(entry.startSeqNumber, entry); + } else { + map.set(asymId, new Map([[entry.startSeqNumber, entry]])); + } + } +} + +function addSheets(cat: mmCIF['struct_sheet_range'], map: SecondaryStructureMap, sheetCount: number, elements: SecondaryStructure.Element[]) { + if (!cat._rowCount) return; + + const { beg_label_asym_id, beg_label_seq_id, pdbx_beg_PDB_ins_code } = cat; + const { end_label_seq_id, pdbx_end_PDB_ins_code } = cat; + const { sheet_id } = cat; + + const sheet_id_key = new Map<string, number>(); + let currentKey = sheetCount + 1; + + for (let i = 0, _i = cat._rowCount; i < _i; i++) { + const id = sheet_id.value(i); + let key: number; + if (sheet_id_key.has(id)) key = sheet_id_key.get(id)!; + else { + key = currentKey++; + sheet_id_key.set(id, key); + } + + const type = SecondaryStructureType.create(SecondaryStructureType.Flag.Beta | SecondaryStructureType.Flag.BetaSheet); + const element: SecondaryStructure.Sheet = { + kind: 'sheet', + flags: type, + sheet_id: id, + symmetry: void 0 + } + const entry: SecondaryStructureEntry = { + startSeqNumber: beg_label_seq_id.value(i), + startInsCode: pdbx_beg_PDB_ins_code.value(i), + endSeqNumber: end_label_seq_id.value(i), + endInsCode: pdbx_end_PDB_ins_code.value(i), + type, + key: elements.length + }; + + elements[elements.length] = element; + + + const asymId = beg_label_asym_id.value(i)!; + if (map.has(asymId)) { + map.get(asymId)!.set(entry.startSeqNumber, entry); + } else { + map.set(asymId, new Map([[entry.startSeqNumber, entry]])); + } + } + + return; +} + +function assignSecondaryStructureEntry(hierarchy: AtomicHierarchy, entry: SecondaryStructureEntry, resStart: ResidueIndex, resEnd: ResidueIndex, data: SecondaryStructureData) { + const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues; + const { endSeqNumber, endInsCode, key, type } = entry; + + let rI = resStart; + while (rI < resEnd) { + const seqNumber = label_seq_id.value(rI); + data.type[rI] = type; + data.key[rI] = key; + + if ((seqNumber > endSeqNumber) || + (seqNumber === endSeqNumber && pdbx_PDB_ins_code.value(rI) === endInsCode)) { + break; + } + + rI++; + } +} + +function assignSecondaryStructureRanges(hierarchy: AtomicHierarchy, map: SecondaryStructureMap, data: SecondaryStructureData) { + const { count: chainCount } = hierarchy.chainAtomSegments; + const { label_asym_id } = hierarchy.chains; + const { label_seq_id, pdbx_PDB_ins_code } = hierarchy.residues; + + for (let cI = 0 as ChainIndex; cI < chainCount; cI++) { + const resStart = AtomicHierarchy.chainStartResidueIndex(hierarchy, cI), resEnd = AtomicHierarchy.chainEndResidueIndexExcl(hierarchy, cI); + const asymId = label_asym_id.value(cI); + if (map.has(asymId)) { + const entries = map.get(asymId)!; + + for (let rI = resStart; rI < resEnd; rI++) { + const seqNumber = label_seq_id.value(rI); + if (entries.has(seqNumber)) { + const entry = entries.get(seqNumber)!; + const insCode = pdbx_PDB_ins_code.value(rI); + if (entry.startInsCode !== insCode) continue; + assignSecondaryStructureEntry(hierarchy, entry, rI, resEnd, data); + } + } + } + } +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/sequence.ts b/src/mol-model-parsers/structure/mmcif/sequence.ts new file mode 100644 index 0000000000000000000000000000000000000000..9915651b634980c5c522935caa45eabc3c702aee --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/sequence.ts @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif' +import StructureSequence from 'mol-model/structure/model/properties/sequence' +import { Column } from 'mol-data/db'; +import { AtomicHierarchy } from 'mol-model/structure/model/properties/atomic'; +import { Entities } from 'mol-model/structure/model/properties/common'; +import { Sequence } from 'mol-model/sequence'; + +// TODO how to handle microheterogeneity +// see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html +// +// Data items in the ENTITY_POLY_SEQ category specify the sequence +// of monomers in a polymer. Allowance is made for the possibility +// of microheterogeneity in a sample by allowing a given sequence +// number to be correlated with more than one monomer ID. The +// corresponding ATOM_SITE entries should reflect this +// heterogeneity. + +export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy, modResMap: ReadonlyMap<string, string>): StructureSequence { + if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy, modResMap); + + const { entity_id, num, mon_id } = cif.entity_poly_seq; + + const byEntityKey: StructureSequence['byEntityKey'] = {}; + const sequences: StructureSequence.Entity[] = []; + const count = entity_id.rowCount; + + let i = 0; + while (i < count) { + const start = i; + while (i < count - 1 && entity_id.areValuesEqual(i, i + 1)) i++; + i++; + + const id = entity_id.value(start); + const _compId = Column.window(mon_id, start, i); + const _num = Column.window(num, start, i); + const entityKey = entities.getEntityIndex(id); + + byEntityKey[entityKey] = { + entityId: id, + compId: _compId, + num: _num, + sequence: Sequence.ofResidueNames(_compId, _num, modResMap) + }; + + sequences.push(byEntityKey[entityKey]); + } + + return { byEntityKey, sequences }; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/sort.ts b/src/mol-model-parsers/structure/mmcif/sort.ts new file mode 100644 index 0000000000000000000000000000000000000000..7cbee9e5b102854bb2c80120fbc4fbfe2baa57bf --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/sort.ts @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; +import { createRangeArray, makeBuckets } from 'mol-data/util'; +import { Column, Table } from 'mol-data/db'; +import { RuntimeContext } from 'mol-task'; + +function isIdentity(xs: ArrayLike<number>) { + for (let i = 0, _i = xs.length; i < _i; i++) { + if (xs[i] !== i) return false; + } + return true; +} + +export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Database['atom_site'], start: number, end: number) { + const indices = createRangeArray(start, end - 1); + + const { label_entity_id, label_asym_id, label_seq_id } = atom_site; + const entityBuckets = makeBuckets(indices, label_entity_id.value); + if (ctx.shouldUpdate) await ctx.update(); + for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) { + const chainBuckets = makeBuckets(indices, label_asym_id.value, { start: entityBuckets[ei], end: entityBuckets[ei + 1] }); + for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) { + const aI = chainBuckets[cI]; + // are we in HETATM territory? + if (label_seq_id.valueKind(aI) !== Column.ValueKind.Present) continue; + + makeBuckets(indices, label_seq_id.value, { sort: true, start: aI, end: chainBuckets[cI + 1] }); + if (ctx.shouldUpdate) await ctx.update(); + } + if (ctx.shouldUpdate) await ctx.update(); + } + + if (isIdentity(indices) && indices.length === atom_site._rowCount) { + return atom_site; + } + + return Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site']; +} \ No newline at end of file diff --git a/src/mol-model-parsers/structure/mmcif/util.ts b/src/mol-model-parsers/structure/mmcif/util.ts new file mode 100644 index 0000000000000000000000000000000000000000..1f398a11bee1cfd425447854ecc9e0bc36bfe71a --- /dev/null +++ b/src/mol-model-parsers/structure/mmcif/util.ts @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { Model } from 'mol-model/structure/model' +import { ElementIndex } from 'mol-model/structure/model/indexing'; + +export function findEntityIdByAsymId(model: Model, asymId: string) { + if (model.sourceData.kind !== 'mmCIF') return '' + const { struct_asym } = model.sourceData.data + for (let i = 0, n = struct_asym._rowCount; i < n; ++i) { + if (struct_asym.id.value(i) === asymId) return struct_asym.entity_id.value(i) + } + return '' +} + +export function findAtomIndexByLabelName(model: Model, residueIndex: number, atomName: string, altLoc: string | null): ElementIndex { + const { offsets } = model.atomicHierarchy.residueAtomSegments; + const { label_atom_id, label_alt_id } = model.atomicHierarchy.atoms; + for (let i = offsets[residueIndex], n = offsets[residueIndex + 1]; i < n; ++i) { + if (label_atom_id.value(i) === atomName && (!altLoc || label_alt_id.value(i) === altLoc)) return i as ElementIndex; + } + return -1 as ElementIndex; +} \ No newline at end of file diff --git a/src/mol-model/structure/model/formats/pdb.ts b/src/mol-model/structure/model/formats/pdb.ts deleted file mode 100644 index 6ff81ec997b08de3118495b0167ca80b8a327a47..0000000000000000000000000000000000000000 --- a/src/mol-model/structure/model/formats/pdb.ts +++ /dev/null @@ -1,269 +0,0 @@ -/** - * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. - * - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import Format from '../format'; -import { Model } from '../model'; -import { Task } from 'mol-task'; -import { PdbFile } from 'mol-io/reader/pdb/schema'; -import from_mmCIF from './mmcif'; -import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; -import { substringStartsWith } from 'mol-util/string'; -import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer'; -import { CifField, CifCategory } from 'mol-io/reader/cif'; -import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field'; - -function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory { - return { - name, - fieldNames: Object.keys(fields), - rowCount, - getField(f: string) { - return fields[f]; - } - } -} - -function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { - return { - id: CifTextValueField(['1', '2', '3']), - type: CifTextValueField(['polymer', 'non-polymer', 'water']) - } -} - -function atom_site_template(data: string, count: number) { - const str = () => new Array(count) as string[]; - const ts = () => TokenBuilder.create(data, 2 * count); - return { - count, - group_PDB: ts(), - id: str(), - auth_atom_id: ts(), - label_alt_id: ts(), - auth_comp_id: ts(), - auth_asym_id: ts(), - auth_seq_id: ts(), - pdbx_PDB_ins_code: ts(), - Cartn_x: ts(), - Cartn_y: ts(), - Cartn_z: ts(), - occupancy: ts(), - B_iso_or_equiv: ts(), - type_symbol: ts(), - pdbx_PDB_model_num: str(), - label_entity_id: str() - }; -} - -function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } { - const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count); - const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count); - const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count); - const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count); - - return { - auth_asym_id, - auth_atom_id, - auth_comp_id, - auth_seq_id, - B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count), - Cartn_x: CifTextField(sites.Cartn_x, sites.count), - Cartn_y: CifTextField(sites.Cartn_y, sites.count), - Cartn_z: CifTextField(sites.Cartn_z, sites.count), - group_PDB: CifTextField(sites.group_PDB, sites.count), - id: CifTextValueField(sites.id), - - label_alt_id: CifTextField(sites.label_alt_id, sites.count), - - label_asym_id: auth_asym_id, - label_atom_id: auth_atom_id, - label_comp_id: auth_comp_id, - label_seq_id: auth_seq_id, - label_entity_id: CifTextValueField(sites.label_entity_id), - - occupancy: CifTextField(sites.occupancy, sites.count), - type_symbol: CifTextField(sites.type_symbol, sites.count), - - pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count), - pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num) - }; -} - -function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) { - const { data: str } = data; - let startPos = s; - let start = s; - const end = e; - const length = end - start; - - // TODO: filter invalid atoms - - // COLUMNS DATA TYPE CONTENTS - // -------------------------------------------------------------------------------- - // 1 - 6 Record name "ATOM " - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd); - - // 7 - 11 Integer Atom serial number. - // TODO: support HEX - start = startPos + 6; - Tokenizer.trim(data, start, start + 5); - sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd); - - // 13 - 16 Atom Atom name. - start = startPos + 12; - Tokenizer.trim(data, start, start + 4); - TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd); - - // 17 Character Alternate location indicator. - if (str.charCodeAt(startPos + 16) === 32) { // ' ' - TokenBuilder.add(sites.label_alt_id, 0, 0); - } else { - TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17); - } - - // 18 - 20 Residue name Residue name. - start = startPos + 17; - Tokenizer.trim(data, start, start + 3); - TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd); - - // 22 Character Chain identifier. - TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22); - - // 23 - 26 Integer Residue sequence number. - // TODO: support HEX - start = startPos + 22; - Tokenizer.trim(data, start, start + 4); - TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd); - - // 27 AChar Code for insertion of residues. - if (str.charCodeAt(startPos + 26) === 32) { // ' ' - TokenBuilder.add(sites.label_alt_id, 0, 0); - } else { - TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27); - } - - // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. - start = startPos + 30; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd); - - // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. - start = startPos + 38; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd); - - // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. - start = startPos + 46; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd); - - // 55 - 60 Real(6.2) Occupancy. - start = startPos + 54; - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd); - - // 61 - 66 Real(6.2) Temperature factor (Default = 0.0). - if (length >= 66) { - start = startPos + 60; - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd); - } else { - TokenBuilder.add(sites.label_alt_id, 0, 0); - } - - // 73 - 76 LString(4) Segment identifier, left-justified. - // ignored - - // 77 - 78 LString(2) Element symbol, right-justified. - if (length >= 78) { - start = startPos + 76; - Tokenizer.trim(data, start, start + 2); - - if (data.tokenStart < data.tokenEnd) { - TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd); - } else { - // "guess" the symbol - TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13); - } - } else { - TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13); - } - - // TODO - sites.label_entity_id.push('1'); - sites.pdbx_PDB_model_num.push(model); - -} - -type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never - -async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> { - const { lines } = pdb; - const { data, indices } = lines; - const tokenizer = Tokenizer(data); - - // Count the atoms - let atomCount = 0; - for (let i = 0, _i = lines.count; i < _i; i++) { - const s = indices[2 * i], e = indices[2 * i + 1]; - switch (data[s]) { - case 'A': - if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++; - break; - case 'H': - if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++; - break; - } - } - - const atom_site = atom_site_template(data, atomCount); - - let modelNum = 0, modelStr = ''; - - for (let i = 0, _i = lines.count; i < _i; i++) { - const s = indices[2 * i], e = indices[2 * i + 1]; - switch (data[s]) { - case 'A': - if (!substringStartsWith(data, s, e, 'ATOM ')) continue; - if (!modelNum) { modelNum++; modelStr = '' + modelNum; } - addAtom(atom_site, modelStr, tokenizer, s, e); - break; - case 'H': - if (!substringStartsWith(data, s, e, 'HETATM')) continue; - if (!modelNum) { modelNum++; modelStr = '' + modelNum; } - addAtom(atom_site, modelStr, tokenizer, s, e); - break; - case 'M': - if (substringStartsWith(data, s, e, 'MODEL ')) { - modelNum++; - modelStr = '' + modelNum; - } - break; - - } - } - - const categories = { - entity: toCategory('entity', _entity(), 3), - atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount) - } - - return Format.mmCIF({ - header: pdb.id || 'PDB', - categoryNames: Object.keys(categories), - categories - }); -} - -function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> { - return Task.create('Create PDB Model', async ctx => { - await ctx.update('Converting to mmCIF...'); - const cif = await pdbToMmCIF(format.data); - return from_mmCIF(cif).runInContext(ctx); - }); -} - -export default buildModels; \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index 70aa52c9b25f08ccd1f961c4cf42d4dc0138d4aa..a4dc45eb08df77d06720a54c0635828beabb0956 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -23,6 +23,7 @@ "mol-math": ["./mol-math"], "mol-model": ["./mol-model"], "mol-model-props": ["./mol-model-props", "./mol-model-props/index.ts"], + "mol-model-parsers": ["./mol-model-parsers"], "mol-ql": ["./mol-ql"], "mol-repr": ["./mol-repr"], "mol-script": ["./mol-script"],