Newer
Older
* Copyright (c) 2017-2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
import { Column, Table } from '../../../mol-data/db';
import { mmCIF_Database, mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
import { Spacegroup, SpacegroupCell, SymmetryOperator } from '../../../mol-math/geometry';
import { Tensor, Vec3, Mat3 } from '../../../mol-math/linear-algebra';
import { RuntimeContext } from '../../../mol-task';
import UUID from '../../../mol-util/uuid';
import { Model } from '../../../mol-model/structure/model/model';
Alexander Rose
committed
import { Entities, ChemicalComponent, MissingResidue, EntitySubtype } from '../../../mol-model/structure/model/properties/common';
import { CustomProperties } from '../../../mol-model/structure';
import { ModelSymmetry } from '../../../mol-model/structure/model/properties/symmetry';
import { createAssemblies } from './assembly';
import { getAtomicHierarchyAndConformation } from './atomic';
import { ComponentBond } from './bonds';
import { getIHMCoarse, EmptyIHMCoarse, IHMData } from './ihm';
import { getSecondaryStructure } from './secondary-structure';
import { getSequence } from './sequence';
import { sortAtomSite } from './sort';
import { StructConn } from './bonds/struct_conn';
Alexander Rose
committed
import { getMoleculeType, MoleculeType, getEntityType, getEntitySubtype } from '../../../mol-model/structure/model/types';
import { SaccharideComponentMap, SaccharideComponent, SaccharidesSnfgMap, SaccharideCompIdMap, UnknownSaccharideComponent } from '../../../mol-model/structure/structure/carbohydrates/constants';
import { memoize1 } from '../../../mol-util/memoize';
Alexander Rose
committed
import { ElementIndex, EntityIndex } from '../../../mol-model/structure/model';
import { AtomSiteAnisotrop } from './anisotropic';
export async function _parse_mmCif(format: mmCIF_Format, ctx: RuntimeContext) {
const formatData = getFormatData(format)
const isIHM = format.data.ihm_model_list._rowCount > 0;
return isIHM ? await readIHM(ctx, format, formatData) : await readStandard(ctx, format, formatData);
}
type AtomSite = mmCIF_Database['atom_site']
function getSymmetry(format: mmCIF_Format): ModelSymmetry {
const assemblies = createAssemblies(format);
const spacegroup = getSpacegroup(format);
const isNonStandardCrytalFrame = checkNonStandardCrystalFrame(format, spacegroup);
return { assemblies, spacegroup, isNonStandardCrytalFrame, ncsOperators: getNcsOperators(format) };
}
function checkNonStandardCrystalFrame(format: mmCIF_Format, spacegroup: Spacegroup) {
const { atom_sites } = format.data;
if (atom_sites._rowCount === 0) return false;
// TODO: parse atom_sites transform and check if it corresponds to the toFractional matrix
return false;
}
function getSpacegroup(format: mmCIF_Format): Spacegroup {
const { symmetry, cell } = format.data;
if (symmetry._rowCount === 0 || cell._rowCount === 0) return Spacegroup.ZeroP1;
const groupName = symmetry['space_group_name_H-M'].value(0);
const spaceCell = SpacegroupCell.create(groupName,
Vec3.create(cell.length_a.value(0), cell.length_b.value(0), cell.length_c.value(0)),
Vec3.scale(Vec3.zero(), Vec3.create(cell.angle_alpha.value(0), cell.angle_beta.value(0), cell.angle_gamma.value(0)), Math.PI / 180));
return Spacegroup.create(spaceCell);
function getNcsOperators(format: mmCIF_Format) {
const { struct_ncs_oper } = format.data;
if (struct_ncs_oper._rowCount === 0) return void 0;
const { id, matrix, vector } = struct_ncs_oper;
const matrixSpace = mmCIF_Schema.struct_ncs_oper.matrix.space, vectorSpace = mmCIF_Schema.struct_ncs_oper.vector.space;
const opers: SymmetryOperator[] = [];
for (let i = 0; i < struct_ncs_oper._rowCount; i++) {
const m = Tensor.toMat3(Mat3(), matrixSpace, matrix.value(i));
const v = Tensor.toVec3(Vec3(), vectorSpace, vector.value(i));
if (!SymmetryOperator.checkIfRotationAndTranslation(m, v)) continue;
// ignore non-identity 'given' NCS operators
if (struct_ncs_oper.code.value(i) === 'given' && !Mat3.isIdentity(m) && !Vec3.isZero(v)) continue;
const ncsId = id.value(i)
opers[opers.length] = SymmetryOperator.ofRotationAndOffset(`ncs_${ncsId}`, m, v, ncsId);
function getModifiedResidueNameMap(format: mmCIF_Format): Model['properties']['modifiedResidues'] {
const data = format.data.pdbx_struct_mod_residue;
const parentId = new Map<string, string>();
const details = new Map<string, string>();
const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
const parent_id = data.parent_comp_id, details_data = data.details;
for (let i = 0; i < data._rowCount; i++) {
const id = comp_id.value(i);
parentId.set(id, parent_id.value(i));
details.set(id, details_data.value(i));
return { parentId, details };
function getMissingResidues(format: mmCIF_Format): Model['properties']['missingResidues'] {
const map = new Map<string, MissingResidue>();
const c = format.data.pdbx_unobs_or_zero_occ_residues
const getKey = (model_num: number, asym_id: string, seq_id: number) => {
return `${model_num}|${asym_id}|${seq_id}`
}
for (let i = 0, il = c._rowCount; i < il; ++i) {
const key = getKey(c.PDB_model_num.value(i), c.label_asym_id.value(i), c.label_seq_id.value(i))
map.set(key, { polymer_flag: c.polymer_flag.value(i), occupancy_flag: c.occupancy_flag.value(i) })
}
return {
has: (model_num: number, asym_id: string, seq_id: number) => {
return map.has(getKey(model_num, asym_id, seq_id))
},
get: (model_num: number, asym_id: string, seq_id: number) => {
return map.get(getKey(model_num, asym_id, seq_id))
},
size: map.size
}
}
function getChemicalComponentMap(format: mmCIF_Format): Model['properties']['chemicalComponentMap'] {
const map = new Map<string, ChemicalComponent>();
const { id } = chem_comp
for (let i = 0, il = id.rowCount; i < il; ++i) {
map.set(id.value(i), Table.getRow(chem_comp, i))
Alexander Rose
committed
function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
const map = new Map<string, SaccharideComponent>();
const { pdbx_chem_comp_identifier } = format.data
if (pdbx_chem_comp_identifier._rowCount > 0) {
const { comp_id, type, identifier } = pdbx_chem_comp_identifier
Alexander Rose
committed
for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) {
if (type.value(i) === 'SNFG CARB SYMBOL') {
const snfgName = identifier.value(i)
const saccharideComp = SaccharidesSnfgMap.get(snfgName)
if (saccharideComp) {
map.set(comp_id.value(i), saccharideComp)
} else {
console.warn(`Unknown SNFG name '${snfgName}'`)
}
}
}
} else if (format.data.chem_comp._rowCount > 0) {
const { id, type } = format.data.chem_comp
for (let i = 0, il = id.rowCount; i < il; ++i) {
const _id = id.value(i)
const _type = type.value(i)
if (SaccharideCompIdMap.has(_id)) {
map.set(_id, SaccharideCompIdMap.get(_id)!)
} else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) {
map.set(_id, UnknownSaccharideComponent)
}
}
const uniqueNames = getUniqueComponentNames(format)
SaccharideCompIdMap.forEach((v, k) => {
if (uniqueNames.has(k)) map.set(k, v)
})
Alexander Rose
committed
}
Alexander Rose
committed
}
const getUniqueComponentNames = memoize1((format: mmCIF_Format) => {
const uniqueNames = new Set<string>()
const data = format.data.atom_site
const comp_id = data.label_comp_id.isDefined ? data.label_comp_id : data.auth_comp_id;
for (let i = 0, il = comp_id.rowCount; i < il; ++i) {
uniqueNames.add(comp_id.value(i))
}
return uniqueNames
})
export interface FormatData {
modifiedResidues: Model['properties']['modifiedResidues']
missingResidues: Model['properties']['missingResidues']
chemicalComponentMap: Model['properties']['chemicalComponentMap']
Alexander Rose
committed
saccharideComponentMap: Model['properties']['saccharideComponentMap']
}
function getFormatData(format: mmCIF_Format): FormatData {
return {
modifiedResidues: getModifiedResidueNameMap(format),
missingResidues: getMissingResidues(format),
Alexander Rose
committed
chemicalComponentMap: getChemicalComponentMap(format),
saccharideComponentMap: getSaccharideComponentMap(format)
function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, formatData: FormatData, previous?: Model): Model {
const atomic = getAtomicHierarchyAndConformation(atom_site, sourceIndex, entities, formatData, previous);
const modelNum = atom_site.pdbx_PDB_model_num.value(0)
if (previous && atomic.sameAsPrevious) {
atomicConformation: atomic.conformation,
_dynamicPropertyData: Object.create(null)
const coarse = EmptyIHMCoarse;
const entry = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
? format.data.entry.id.value(0)
: format.data._name;
const label: string[] = []
if (entry) label.push(entry)
if (format.data.struct.title.valueKind(0) === Column.ValueKind.Present) label.push(format.data.struct.title.value(0))
David Sehnal
committed
entryId: entry,
entities,
sequence: getSequence(format.data, entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId),
atomicHierarchy: atomic.hierarchy,
atomicConformation: atomic.conformation,
coarseHierarchy: coarse.hierarchy,
coarseConformation: coarse.conformation,
secondaryStructure: getSecondaryStructure(format.data, atomic.hierarchy),
},
customProperties: new CustomProperties(),
_staticPropertyData: Object.create(null),
_dynamicPropertyData: Object.create(null)
};
}
function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model {
const atomic = getAtomicHierarchyAndConformation(data.atom_site, data.atom_site_sourceIndex, data.entities, formatData);
const coarse = getIHMCoarse(data, formatData);
const entry = format.data.entry.id.valueKind(0) === Column.ValueKind.Present
? format.data.entry.id.value(0)
: format.data._name;
const label: string[] = []
if (entry) label.push(entry)
if (format.data.struct.title.valueKind(0) === Column.ValueKind.Present) label.push(format.data.struct.title.value(0))
if (data.model_group_name) label.push(data.model_name)
if (data.model_group_name) label.push(data.model_group_name)
David Sehnal
committed
entryId: entry,
sourceData: format,
modelNum: data.model_id,
entities: data.entities,
symmetry: getSymmetry(format),
sequence: getSequence(format.data, data.entities, atomic.hierarchy, coarse.hierarchy, formatData.modifiedResidues.parentId),
atomicHierarchy: atomic.hierarchy,
atomicConformation: atomic.conformation,
coarseHierarchy: coarse.hierarchy,
coarseConformation: coarse.conformation,
properties: {
secondaryStructure: getSecondaryStructure(format.data, atomic.hierarchy),
customProperties: new CustomProperties(),
_staticPropertyData: Object.create(null),
_dynamicPropertyData: Object.create(null)
function attachProps(model: Model) {
ComponentBond.attachFromMmCif(model);
StructConn.attachFromMmCif(model);
AtomSiteAnisotrop.attachFromMmCif(model);
function findModelEnd(num: Column<number>, startIndex: number) {
const rowCount = num.rowCount;
if (!num.isDefined) return rowCount;
let endIndex = startIndex + 1;
while (endIndex < rowCount && num.areValuesEqual(startIndex, endIndex)) endIndex++;
return endIndex;
}
function getEntities(format: mmCIF_Format): Entities {
let entityData: Table<mmCIF_Schema['entity']>
if (!format.data.entity.id.isDefined) {
const entityIds = new Set<string>()
const ids: mmCIF_Schema['entity']['id']['T'][] = []
const types: mmCIF_Schema['entity']['type']['T'][] = []
const { label_entity_id, label_comp_id } = format.data.atom_site;
for (let i = 0 as ElementIndex, il = format.data.atom_site._rowCount; i < il; i++) {
const entityId = label_entity_id.value(i);
if (!entityIds.has(entityId)) {
ids.push(entityId)
types.push(getEntityType(label_comp_id.value(i)))
Alexander Rose
committed
entityIds.add(entityId)
}
}
const { entity_id: sphere_entity_id } = format.data.ihm_sphere_obj_site;
for (let i = 0 as ElementIndex, il = format.data.ihm_sphere_obj_site._rowCount; i < il; i++) {
const entityId = sphere_entity_id.value(i);
if (!entityIds.has(entityId)) {
Alexander Rose
committed
entityIds.add(entityId)
}
}
const { entity_id: gaussian_entity_id } = format.data.ihm_gaussian_obj_site;
for (let i = 0 as ElementIndex, il = format.data.ihm_gaussian_obj_site._rowCount; i < il; i++) {
const entityId = gaussian_entity_id.value(i);
if (!entityIds.has(entityId)) {
Alexander Rose
committed
entityIds.add(entityId)
entityData = Table.ofColumns(mmCIF_Schema.entity, {
...format.data.entity,
id: Column.ofArray({ array: ids, schema: mmCIF_Schema.entity.id }),
type: Column.ofArray({ array: types, schema: mmCIF_Schema.entity.type }),
})
} else {
entityData = format.data.entity;
}
Alexander Rose
committed
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
const getEntityIndex = Column.createIndexer<string, EntityIndex>(entityData.id)
//
const subtypes: EntitySubtype[] = new Array(entityData._rowCount)
subtypes.fill('other')
const entityIds = new Set<string>()
let assignSubtype = false
if (format.data.entity_poly.entity_id.isDefined) {
const { entity_id, type, _rowCount } = format.data.entity_poly
for (let i = 0; i < _rowCount; ++i) {
const entityId = entity_id.value(i)
subtypes[getEntityIndex(entityId)] = type.value(i)
entityIds.add(entityId)
}
} else {
assignSubtype = true
}
if (format.data.pdbx_entity_branch.entity_id.isDefined) {
const { entity_id, type, _rowCount } = format.data.pdbx_entity_branch
for (let i = 0; i < _rowCount; ++i) {
const entityId = entity_id.value(i)
subtypes[getEntityIndex(entityId)] = type.value(i)
entityIds.add(entityId)
}
} else {
assignSubtype = true
}
if (assignSubtype) {
const { label_entity_id, label_comp_id } = format.data.atom_site;
for (let i = 0 as ElementIndex, il = format.data.atom_site._rowCount; i < il; i++) {
const entityId = label_entity_id.value(i);
if (!entityIds.has(entityId)) {
subtypes[getEntityIndex(entityId)] = getEntitySubtype(label_comp_id.value(i))
entityIds.add(entityId)
}
}
// TODO how to handle coarse?
}
const subtypeColumn = Column.ofArray({ array: subtypes, schema: EntitySubtype })
//
return { data: entityData, subtype: subtypeColumn, getEntityIndex };
async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
const atomCount = format.data.atom_site._rowCount;
const entities = getEntities(format)
const models: Model[] = [];
let modelStart = 0;
while (modelStart < atomCount) {
const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart);
const { atom_site, sourceIndex } = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd);
const model = createStandardModel(format, atom_site, sourceIndex, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0);
attachProps(model);
models.push(model);
modelStart = modelEnd;
}
return models;
}
function splitTable<T extends Table<any>>(table: T, col: Column<number>) {
const ret = new Map<number, { table: T, start: number, end: number }>()
const rowCount = table._rowCount;
let modelStart = 0;
while (modelStart < rowCount) {
const modelEnd = findModelEnd(col, modelStart);
const id = col.value(modelStart);
ret.set(id, {
table: Table.window(table, table._schema, modelStart, modelEnd) as T,
start: modelStart,
end: modelEnd
});
modelStart = modelEnd;
}
return ret;
}
function getModelGroupName(model_id: number, format: mmCIF_Format) {
const { ihm_model_group, ihm_model_group_link } = format.data;
const link = Table.pickRow(ihm_model_group_link, i => ihm_model_group_link.model_id.value(i) === model_id)
if (link) {
const group = Table.pickRow(ihm_model_group, i => ihm_model_group.id.value(i) === link.group_id)
if (group) return group.name
}
return ''
}
async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: FormatData) {
// when `atom_site.ihm_model_id` is undefined fall back to `atom_site.pdbx_PDB_model_num`
const atom_sites_modelColumn = format.data.atom_site.ihm_model_id.isDefined ? format.data.atom_site.ihm_model_id : format.data.atom_site.pdbx_PDB_model_num
const { ihm_model_list } = format.data;
const entities = getEntities(format)
const atom_sites = splitTable(format.data.atom_site, atom_sites_modelColumn);
// TODO: will coarse IHM records require sorting or will we trust it?
// ==> Probably implement a sort as as well and store the sourceIndex same as with atomSite
// If the sorting is implemented, updated mol-model/structure/properties: atom.sourceIndex
const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id);
const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id);
const models: Model[] = [];
for (let i = 0; i < ihm_model_list._rowCount; i++) {
const id = model_id.value(i);
let atom_site, atom_site_sourceIndex;
if (atom_sites.has(id)) {
const e = atom_sites.get(id)!;
// need to sort `format.data.atom_site` as `e.start` and `e.end` are indices into that
const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, format.data.atom_site, e.start, e.end);
atom_site = sorted;
atom_site_sourceIndex = sourceIndex;
} else {
atom_site = Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0);
atom_site_sourceIndex = Column.ofIntArray([]);
}
const data: IHMData = {
model_id: id,
model_name: model_name.value(i),
entities: entities,
atom_site,
atom_site_sourceIndex,
ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)!.table : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0),
ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)!.table : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0)
const model = createModelIHM(format, data, formatData);
attachProps(model);
}
return models;