wip, carbohydrates and branched entity

af192a3b · Alexander Rose · ddbda278 · af192a3b · af192a3b · af192a3b
Commit af192a3b authored 6 years ago by Alexander Rose
--- a/data/mmcif-field-names.csv
+++ b/data/mmcif-field-names.csv
@@ -106,6 +106,16 @@ pdbx_entity_branch_link.atom_stereo_config_2
 pdbx_entity_branch_link.value_order
 pdbx_entity_branch_link.details
+pdbx_branch_scheme.asym_id
+pdbx_branch_scheme.entity_id
+pdbx_branch_scheme.mon_id
+pdbx_branch_scheme.num
+pdbx_branch_scheme.auth_seq_num
+pdbx_branch_scheme.auth_mon_id
+pdbx_branch_scheme.auth_strand_id
+pdbx_branch_scheme.auth_ins_code
+pdbx_branch_scheme.hetero
 pdbx_entity_descriptor.ordinal
 pdbx_entity_descriptor.entity_id
 pdbx_entity_descriptor.descriptor

--- a/src/apps/schema-generator/schema-from-cif-dic.ts
+++ b/src/apps/schema-generator/schema-from-cif-dic.ts
@@ -24,11 +24,17 @@ async function runGenerateSchema(name: string, fieldNamesPath?: string, typescri
    const ihmDic = await CIF.parseText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run();
    if (ihmDic.isError) throw ihmDic
+    await ensureBranchDicAvailable()
+    const branchDic = await CIF.parseText(fs.readFileSync(BRANCH_DIC_PATH, 'utf8')).run();
+    if (branchDic.isError) throw branchDic
    const mmcifDicVersion = CIF.schema.dic(mmcifDic.result.blocks[0]).dictionary.version.value(0)
    const ihmDicVersion = CIF.schema.dic(ihmDic.result.blocks[0]).dictionary.version.value(0)
-    const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}.`
+    // const branchDicVersion = CIF.schema.dic(branchDic.result.blocks[0]).dictionary.version.value(0)
+    const branchDicVersion = 'draft'
+    const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, entity_branch ${branchDicVersion}.`
-    const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames]
+    const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...branchDic.result.blocks[0].saveFrames]
    const schema = generateSchema(frames)
    const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined
@@ -70,15 +76,20 @@ async function ensureIhmDicAvailable() {
    await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL)
 }
+async function ensureBranchDicAvailable() {
+    await ensureDicAvailable(BRANCH_DIC_PATH, BRANCH_DIC_URL)
+}
 async function ensureDicAvailable(dicPath: string, dicUrl: string) {
    if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) {
-        console.log('downloading mmcif dic...')
+        const name = dicUrl.substr(dicUrl.lastIndexOf('/') + 1)
+        console.log(`downloading ${name}...`)
        const data = await fetch(dicUrl)
        if (!fs.existsSync(DIC_DIR)) {
            fs.mkdirSync(DIC_DIR);
        }
        fs.writeFileSync(dicPath, await data.text())
-        console.log('done downloading mmcif dic')
+        console.log(`done downloading ${name}`)
    }
 }
@@ -87,10 +98,12 @@ const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`
 const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic'
 const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`
 const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic'
+const BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic`
+const BRANCH_DIC_URL = 'https://raw.githubusercontent.com/wwpdb-dictionaries/mmcif_pdbx/master/extensions/entity_branch-extension.dic'
 const parser = new argparse.ArgumentParser({
  addHelp: true,
-  description: 'Create schema from mmcif dictionary (v50, downloaded from wwPDB)'
+  description: 'Create schema from mmcif dictionary (v50 plus IHM and entity_branch extensions, downloaded from wwPDB)'
 });
 parser.addArgument([ '--name', '-n' ], {
    defaultValue: 'mmCIF',
@@ -123,5 +136,7 @@ const args: Args = parser.parseArgs();
 const FORCE_DIC_DOWNLOAD = args.forceDicDownload
 if (args.name) {
-    runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out)
+    runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out).catch(e => {
+        console.error(e)
+    })
 }
--- a/src/mol-io/reader/cif/schema/mmcif.ts
+++ b/src/mol-io/reader/cif/schema/mmcif.ts
 /**
 * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
- * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134.
+ * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134, entity_branch draft.
 *
 * @author mol-star package (src/apps/schema-generator/generate)
 */
@@ -3746,6 +3746,168 @@ export const mmCIF_Schema = {
         */
        dataset_list_id: int,
    },
+    /**
+     * Data items in the PDBX_ENTITY_BRANCH_LIST category specify the list
+     * of monomers in a branched entity.  Allowance is made for the possibility
+     * of microheterogeneity in a sample by allowing a given sequence
+     * number to be correlated with more than one monomer ID. The
+     * corresponding ATOM_SITE entries should reflect this
+     * heterogeneity.
+     */
+    pdbx_entity_branch_list: {
+        /**
+         * This data item is a pointer to _entity.id in the ENTITY category.
+         */
+        entity_id: str,
+        /**
+         * A flag to indicate whether this monomer in the entity is
+         * heterogeneous in sequence.
+         */
+        hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str),
+        /**
+         * This data item is a pointer to _chem_comp.id in the CHEM_COMP
+         * category.
+         */
+        comp_id: str,
+        /**
+         * The value pair  _pdbx_entity_branch_list.num and _pdbx_entity_branch_list.comp_id
+         * must uniquely identify a record in the PDBX_ENTITY_BRANCH_LIST list.
+         */
+        num: int,
+    },
+    /**
+     * Data items in the PDBX_ENTITY_BRANCH_LINK category give details about
+     * the linkages between components within branched entities.
+     */
+    pdbx_entity_branch_link: {
+        /**
+         * The value of _pdbx_entity_branch_link.link_id uniquely identifies
+         * linkages within the branched entity.
+         */
+        link_id: int,
+        /**
+         * A description of special aspects of this linkage.
+         */
+        details: str,
+        /**
+         * The entity id for this branched entity.
+         *
+         * This data item is a pointer to _pdbx_entity_branch_list.entity_id
+         * in the PDBX_ENTITY_BRANCH_LIST category.
+         */
+        entity_id: str,
+        /**
+         * The component number for the first component making the linkage.
+         *
+         * This data item is a pointer to _pdbx_entity_branch_list.num
+         * in the PDBX_ENTITY_BRANCH_LIST category.
+         */
+        entity_branch_list_num_1: int,
+        /**
+         * The component number for the second component making the linkage.
+         *
+         * This data item is a pointer to _pdbx_entity_branch_list.num
+         * in the PDBX_ENTITY_BRANCH_LIST category.
+         */
+        entity_branch_list_num_2: int,
+        /**
+         * The component identifier for the first component making the linkage.
+         *
+         * This data item is a pointer to _pdbx_entity_branch_list.comp_id
+         * in the PDBX_ENTITY_BRANCH_LIST category.
+         */
+        comp_id_1: str,
+        /**
+         * The component identifier for the second component making the linkage.
+         *
+         * This data item is a pointer to _pdbx_entity_branch_list.comp_id
+         * in the PDBX_ENTITY_BRANCH_LIST category.
+         */
+        comp_id_2: str,
+        /**
+         * The atom identifier/name for the first atom making the linkage.
+         */
+        atom_id_1: str,
+        /**
+         * The leaving atom identifier/name bonded to the first atom making the linkage.
+         */
+        leaving_atom_id_1: str,
+        /**
+         * The chiral configuration of the first atom making the linkage.
+         */
+        atom_stereo_config_1: Aliased<'R' | 'S' | 'N'>(str),
+        /**
+         * The atom identifier/name for the second atom making the linkage.
+         */
+        atom_id_2: str,
+        /**
+         * The leaving atom identifier/name bonded to the second atom making the linkage.
+         */
+        leaving_atom_id_2: str,
+        /**
+         * The chiral configuration of the second atom making the linkage.
+         */
+        atom_stereo_config_2: Aliased<'R' | 'S' | 'N'>(str),
+        /**
+         * The bond order target for the chemical linkage.
+         */
+        value_order: Aliased<'sing' | 'doub' | 'trip' | 'quad' | 'arom' | 'poly' | 'delo' | 'pi'>(str),
+    },
+    /**
+     * Data items in the PDBX_ENTITY_BRANCH category specify the list
+     * of branched entities and the type.
+     */
+    pdbx_entity_branch: {
+        /**
+         * The entity id for this branched entity.
+         *
+         * This data item is a pointer to _entity.id
+         */
+        entity_id: str,
+        /**
+         * The type of this branched oligosaccharide.
+         */
+        type: Aliased<'oligosaccharide'>(str),
+    },
+    /**
+     * The PDBX_BRANCH_SCHEME category provides residue level nomenclature
+     * mapping for branch chain entitie.
+     */
+    pdbx_branch_scheme: {
+        /**
+         * This data item is a pointer to _entity.id in the ENTITY category.
+         */
+        entity_id: str,
+        /**
+         * A flag to indicate whether this monomer in the entity is
+         * heterogeneous in sequence.
+         */
+        hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str),
+        /**
+         * Pointer to _atom_site.label_asym_id.
+         */
+        asym_id: str,
+        /**
+         * This data item is a pointer to _atom_site.label_comp_id in the
+         * PDBX_ENTITY_BRANCH_LIST category.
+         */
+        mon_id: str,
+        /**
+         * This data item is a pointer to _pdbx_entity_branch_list.num in the
+         * PDBX_ENTITY_BRANCH_LIST category.
+         */
+        num: int,
+        /**
+         * This data item is a pointer to _atom_site.pdbx_auth_seq_id in the
+         * ATOM_SITE category.
+         */
+        auth_seq_num: str,
+        /**
+         * This data item is a pointer to _atom_site.pdbx_auth_comp_id in the
+         * ATOM_SITE category.
+         */
+        auth_mon_id: str,
+    },
 }
 export type mmCIF_Schema = typeof mmCIF_Schema;

--- a/src/mol-model/structure/export/mmcif.ts
+++ b/src/mol-model/structure/export/mmcif.ts
@@ -70,9 +70,15 @@ const Categories = [
    copy_mmCif_category('entity_poly'),
    copy_mmCif_category('entity_poly_seq'),
+    // Branch
+    copy_mmCif_category('pdbx_entity_branch'),
+    copy_mmCif_category('pdbx_entity_branch_link'),
+    copy_mmCif_category('pdbx_branch_scheme'),
    // Misc
    // TODO: filter for actual present residues?
    copy_mmCif_category('chem_comp'),
+    copy_mmCif_category('pdbx_chem_comp_identifier'),
    copy_mmCif_category('atom_sites'),
    _pdbx_struct_mod_residue,