diff --git a/data/mmcif-field-names.csv b/data/mmcif-field-names.csv index e7d8b64c4dbc8e512ab6775f8f846f71f7acc807..45602d31ed609b3422c9bff19906fd5ffe8f1ce8 100644 --- a/data/mmcif-field-names.csv +++ b/data/mmcif-field-names.csv @@ -106,6 +106,16 @@ pdbx_entity_branch_link.atom_stereo_config_2 pdbx_entity_branch_link.value_order pdbx_entity_branch_link.details +pdbx_branch_scheme.asym_id +pdbx_branch_scheme.entity_id +pdbx_branch_scheme.mon_id +pdbx_branch_scheme.num +pdbx_branch_scheme.auth_seq_num +pdbx_branch_scheme.auth_mon_id +pdbx_branch_scheme.auth_strand_id +pdbx_branch_scheme.auth_ins_code +pdbx_branch_scheme.hetero + pdbx_entity_descriptor.ordinal pdbx_entity_descriptor.entity_id pdbx_entity_descriptor.descriptor diff --git a/src/apps/schema-generator/schema-from-cif-dic.ts b/src/apps/schema-generator/schema-from-cif-dic.ts index 72dd804af122e1d8442f3f9f99002a2f2d6214b6..ecef972dd62070a65ff748f39688db29b95483c4 100644 --- a/src/apps/schema-generator/schema-from-cif-dic.ts +++ b/src/apps/schema-generator/schema-from-cif-dic.ts @@ -24,11 +24,17 @@ async function runGenerateSchema(name: string, fieldNamesPath?: string, typescri const ihmDic = await CIF.parseText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run(); if (ihmDic.isError) throw ihmDic + await ensureBranchDicAvailable() + const branchDic = await CIF.parseText(fs.readFileSync(BRANCH_DIC_PATH, 'utf8')).run(); + if (branchDic.isError) throw branchDic + const mmcifDicVersion = CIF.schema.dic(mmcifDic.result.blocks[0]).dictionary.version.value(0) const ihmDicVersion = CIF.schema.dic(ihmDic.result.blocks[0]).dictionary.version.value(0) - const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}.` + // const branchDicVersion = CIF.schema.dic(branchDic.result.blocks[0]).dictionary.version.value(0) + const branchDicVersion = 'draft' + const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, entity_branch ${branchDicVersion}.` - const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames] + const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...branchDic.result.blocks[0].saveFrames] const schema = generateSchema(frames) const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined @@ -70,15 +76,20 @@ async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL) } +async function ensureBranchDicAvailable() { + await ensureDicAvailable(BRANCH_DIC_PATH, BRANCH_DIC_URL) +} + async function ensureDicAvailable(dicPath: string, dicUrl: string) { if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) { - console.log('downloading mmcif dic...') + const name = dicUrl.substr(dicUrl.lastIndexOf('/') + 1) + console.log(`downloading ${name}...`) const data = await fetch(dicUrl) if (!fs.existsSync(DIC_DIR)) { fs.mkdirSync(DIC_DIR); } fs.writeFileSync(dicPath, await data.text()) - console.log('done downloading mmcif dic') + console.log(`done downloading ${name}`) } } @@ -87,10 +98,12 @@ const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic` const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic' const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic` const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic' +const BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic` +const BRANCH_DIC_URL = 'https://raw.githubusercontent.com/wwpdb-dictionaries/mmcif_pdbx/master/extensions/entity_branch-extension.dic' const parser = new argparse.ArgumentParser({ addHelp: true, - description: 'Create schema from mmcif dictionary (v50, downloaded from wwPDB)' + description: 'Create schema from mmcif dictionary (v50 plus IHM and entity_branch extensions, downloaded from wwPDB)' }); parser.addArgument([ '--name', '-n' ], { defaultValue: 'mmCIF', @@ -123,5 +136,7 @@ const args: Args = parser.parseArgs(); const FORCE_DIC_DOWNLOAD = args.forceDicDownload if (args.name) { - runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out) + runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out).catch(e => { + console.error(e) + }) } diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index c40dc41590eaaff7db46d245e6ba3c963f05baf0..b0799e894d3049013f18076b7fe913df6b2df5fd 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134. + * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134, entity_branch draft. * * @author mol-star package (src/apps/schema-generator/generate) */ @@ -3746,6 +3746,168 @@ export const mmCIF_Schema = { */ dataset_list_id: int, }, + /** + * Data items in the PDBX_ENTITY_BRANCH_LIST category specify the list + * of monomers in a branched entity. Allowance is made for the possibility + * of microheterogeneity in a sample by allowing a given sequence + * number to be correlated with more than one monomer ID. The + * corresponding ATOM_SITE entries should reflect this + * heterogeneity. + */ + pdbx_entity_branch_list: { + /** + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * A flag to indicate whether this monomer in the entity is + * heterogeneous in sequence. + */ + hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str), + /** + * This data item is a pointer to _chem_comp.id in the CHEM_COMP + * category. + */ + comp_id: str, + /** + * The value pair _pdbx_entity_branch_list.num and _pdbx_entity_branch_list.comp_id + * must uniquely identify a record in the PDBX_ENTITY_BRANCH_LIST list. + */ + num: int, + }, + /** + * Data items in the PDBX_ENTITY_BRANCH_LINK category give details about + * the linkages between components within branched entities. + */ + pdbx_entity_branch_link: { + /** + * The value of _pdbx_entity_branch_link.link_id uniquely identifies + * linkages within the branched entity. + */ + link_id: int, + /** + * A description of special aspects of this linkage. + */ + details: str, + /** + * The entity id for this branched entity. + * + * This data item is a pointer to _pdbx_entity_branch_list.entity_id + * in the PDBX_ENTITY_BRANCH_LIST category. + */ + entity_id: str, + /** + * The component number for the first component making the linkage. + * + * This data item is a pointer to _pdbx_entity_branch_list.num + * in the PDBX_ENTITY_BRANCH_LIST category. + */ + entity_branch_list_num_1: int, + /** + * The component number for the second component making the linkage. + * + * This data item is a pointer to _pdbx_entity_branch_list.num + * in the PDBX_ENTITY_BRANCH_LIST category. + */ + entity_branch_list_num_2: int, + /** + * The component identifier for the first component making the linkage. + * + * This data item is a pointer to _pdbx_entity_branch_list.comp_id + * in the PDBX_ENTITY_BRANCH_LIST category. + */ + comp_id_1: str, + /** + * The component identifier for the second component making the linkage. + * + * This data item is a pointer to _pdbx_entity_branch_list.comp_id + * in the PDBX_ENTITY_BRANCH_LIST category. + */ + comp_id_2: str, + /** + * The atom identifier/name for the first atom making the linkage. + */ + atom_id_1: str, + /** + * The leaving atom identifier/name bonded to the first atom making the linkage. + */ + leaving_atom_id_1: str, + /** + * The chiral configuration of the first atom making the linkage. + */ + atom_stereo_config_1: Aliased<'R' | 'S' | 'N'>(str), + /** + * The atom identifier/name for the second atom making the linkage. + */ + atom_id_2: str, + /** + * The leaving atom identifier/name bonded to the second atom making the linkage. + */ + leaving_atom_id_2: str, + /** + * The chiral configuration of the second atom making the linkage. + */ + atom_stereo_config_2: Aliased<'R' | 'S' | 'N'>(str), + /** + * The bond order target for the chemical linkage. + */ + value_order: Aliased<'sing' | 'doub' | 'trip' | 'quad' | 'arom' | 'poly' | 'delo' | 'pi'>(str), + }, + /** + * Data items in the PDBX_ENTITY_BRANCH category specify the list + * of branched entities and the type. + */ + pdbx_entity_branch: { + /** + * The entity id for this branched entity. + * + * This data item is a pointer to _entity.id + */ + entity_id: str, + /** + * The type of this branched oligosaccharide. + */ + type: Aliased<'oligosaccharide'>(str), + }, + /** + * The PDBX_BRANCH_SCHEME category provides residue level nomenclature + * mapping for branch chain entitie. + */ + pdbx_branch_scheme: { + /** + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * A flag to indicate whether this monomer in the entity is + * heterogeneous in sequence. + */ + hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str), + /** + * Pointer to _atom_site.label_asym_id. + */ + asym_id: str, + /** + * This data item is a pointer to _atom_site.label_comp_id in the + * PDBX_ENTITY_BRANCH_LIST category. + */ + mon_id: str, + /** + * This data item is a pointer to _pdbx_entity_branch_list.num in the + * PDBX_ENTITY_BRANCH_LIST category. + */ + num: int, + /** + * This data item is a pointer to _atom_site.pdbx_auth_seq_id in the + * ATOM_SITE category. + */ + auth_seq_num: str, + /** + * This data item is a pointer to _atom_site.pdbx_auth_comp_id in the + * ATOM_SITE category. + */ + auth_mon_id: str, + }, } export type mmCIF_Schema = typeof mmCIF_Schema; diff --git a/src/mol-model/structure/export/mmcif.ts b/src/mol-model/structure/export/mmcif.ts index dd9c71b1f3b64573eacbb03d828bc96f812b62b9..2bfa34c5609985e4f10e006109f0baffbeb80905 100644 --- a/src/mol-model/structure/export/mmcif.ts +++ b/src/mol-model/structure/export/mmcif.ts @@ -70,9 +70,15 @@ const Categories = [ copy_mmCif_category('entity_poly'), copy_mmCif_category('entity_poly_seq'), + // Branch + copy_mmCif_category('pdbx_entity_branch'), + copy_mmCif_category('pdbx_entity_branch_link'), + copy_mmCif_category('pdbx_branch_scheme'), + // Misc // TODO: filter for actual present residues? copy_mmCif_category('chem_comp'), + copy_mmCif_category('pdbx_chem_comp_identifier'), copy_mmCif_category('atom_sites'), _pdbx_struct_mod_residue,