diff --git a/README.md b/README.md index 6544cb77d7cdc4517cbac92842c2c3452093d095..56b4a5658081517fc00215f598ab4d98e9578421 100644 --- a/README.md +++ b/README.md @@ -100,8 +100,6 @@ Run the image node build/node_modules/apps/schema-generator/schema-from-cif-dic.js -ts -o src/mol-io/reader/cif/schema/bird.ts --fieldNamesPath data/bird-field-names.csv --name BIRD - node --max-old-space-size=8192 build/node_modules/apps/chem-comp-bond/create-table.js build/data/ccb.bcif -b - **GraphQL schemas** node data/rcsb-graphql/codegen.js diff --git a/data/mmcif-field-names.csv b/data/mmcif-field-names.csv index fba153c636ab478f0cdf50ec74a25b7e2245bfa5..cf042045e4dd08ac3965b595057ec935e8484c9d 100644 --- a/data/mmcif-field-names.csv +++ b/data/mmcif-field-names.csv @@ -47,6 +47,15 @@ pdbx_chem_comp_identifier.program pdbx_chem_comp_identifier.program_version pdbx_chem_comp_identifier.identifier +pdbx_chem_comp_related.comp_id +pdbx_chem_comp_related.related_comp_id +pdbx_chem_comp_related.relationship_type +pdbx_chem_comp_related.details + +pdbx_chem_comp_synonyms.comp_id +pdbx_chem_comp_synonyms.name +pdbx_chem_comp_synonyms.provenance + cell.entry_id cell.length_a cell.length_b diff --git a/src/apps/schema-generator/schema-from-cif-dic.ts b/src/apps/schema-generator/schema-from-cif-dic.ts index ecef972dd62070a65ff748f39688db29b95483c4..88bd34a8fbb078fe171f0a0c9957d87772df3498 100644 --- a/src/apps/schema-generator/schema-from-cif-dic.ts +++ b/src/apps/schema-generator/schema-from-cif-dic.ts @@ -24,17 +24,20 @@ async function runGenerateSchema(name: string, fieldNamesPath?: string, typescri const ihmDic = await CIF.parseText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run(); if (ihmDic.isError) throw ihmDic - await ensureBranchDicAvailable() - const branchDic = await CIF.parseText(fs.readFileSync(BRANCH_DIC_PATH, 'utf8')).run(); - if (branchDic.isError) throw branchDic + await ensureCarbBranchDicAvailable() + const carbBranchDic = await CIF.parseText(fs.readFileSync(CARB_BRANCH_DIC_PATH, 'utf8')).run(); + if (carbBranchDic.isError) throw carbBranchDic + + await ensureCarbCompDicAvailable() + const carbCompDic = await CIF.parseText(fs.readFileSync(CARB_COMP_DIC_PATH, 'utf8')).run(); + if (carbCompDic.isError) throw carbCompDic const mmcifDicVersion = CIF.schema.dic(mmcifDic.result.blocks[0]).dictionary.version.value(0) const ihmDicVersion = CIF.schema.dic(ihmDic.result.blocks[0]).dictionary.version.value(0) - // const branchDicVersion = CIF.schema.dic(branchDic.result.blocks[0]).dictionary.version.value(0) - const branchDicVersion = 'draft' - const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, entity_branch ${branchDicVersion}.` + const carbDicVersion = 'draft' + const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, CARB ${carbDicVersion}.` - const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...branchDic.result.blocks[0].saveFrames] + const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames] const schema = generateSchema(frames) const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined @@ -68,17 +71,10 @@ async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> { return filter } -async function ensureMmcifDicAvailable() { - await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL) -} - -async function ensureIhmDicAvailable() { - await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL) -} - -async function ensureBranchDicAvailable() { - await ensureDicAvailable(BRANCH_DIC_PATH, BRANCH_DIC_URL) -} +async function ensureMmcifDicAvailable() { await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL) } +async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL) } +async function ensureCarbBranchDicAvailable() { await ensureDicAvailable(CARB_BRANCH_DIC_PATH, CARB_BRANCH_DIC_URL) } +async function ensureCarbCompDicAvailable() { await ensureDicAvailable(CARB_COMP_DIC_PATH, CARB_COMP_DIC_URL) } async function ensureDicAvailable(dicPath: string, dicUrl: string) { if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) { @@ -98,8 +94,10 @@ const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic` const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic' const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic` const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic' -const BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic` -const BRANCH_DIC_URL = 'https://raw.githubusercontent.com/wwpdb-dictionaries/mmcif_pdbx/master/extensions/entity_branch-extension.dic' +const CARB_BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic` +const CARB_BRANCH_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic' +const CARB_COMP_DIC_PATH = `${DIC_DIR}/chem_comp-extension.dic` +const CARB_COMP_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/chem_comp-extension.txt' const parser = new argparse.ArgumentParser({ addHelp: true, diff --git a/src/mol-io/reader/cif/schema/bird.ts b/src/mol-io/reader/cif/schema/bird.ts index 5cf18d4663f9ec48ae251730e3174961f9cb5c1b..a4398188b62edbe7dac06619f83260f744fc6ccb 100644 --- a/src/mol-io/reader/cif/schema/bird.ts +++ b/src/mol-io/reader/cif/schema/bird.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134. + * Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.300, IHM 0.136, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ diff --git a/src/mol-io/reader/cif/schema/ccd.ts b/src/mol-io/reader/cif/schema/ccd.ts index 06a77ba67de4616307baa458397f54a9a9809e5c..d8d61e76b4a783703f0f842da7710b4db9c77914 100644 --- a/src/mol-io/reader/cif/schema/ccd.ts +++ b/src/mol-io/reader/cif/schema/ccd.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134. + * Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.300, IHM 0.136, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index d2565a478cdb4bb7a9deebaacf3afec883279d59..8cb54134c190888954e50550a6bdb0571ac75dcf 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.299, IHM 0.134, entity_branch draft. + * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.300, IHM 0.136, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ @@ -1470,6 +1470,54 @@ export const mmCIF_Schema = { */ 'space_group_name_H-M': str, }, + /** + * The PDBX_NONPOLY_SCHEME category provides residue level nomenclature + * mapping for non-polymer entities. + */ + pdbx_nonpoly_scheme: { + /** + * Pointer to _atom_site.label_asym_id. + */ + asym_id: str, + /** + * Pointer to _atom_site.label_entity_id. + */ + entity_id: str, + /** + * Pointer to _atom_site.label_comp_id. + */ + mon_id: str, + /** + * PDB strand/chain id. + */ + pdb_strand_id: str, + /** + * NDB/RCSB residue number. + */ + ndb_seq_num: str, + /** + * PDB residue number. + */ + pdb_seq_num: str, + /** + * Author provided residue numbering. This value may differ from the PDB residue + * number and may not correspond to residue numbering within the coordinate records. + */ + auth_seq_num: str, + /** + * PDB residue identifier. + */ + pdb_mon_id: str, + /** + * Author provided residue identifier. This value may differ from the PDB residue + * identifier and may not correspond to residue identification within the coordinate records. + */ + auth_mon_id: str, + /** + * PDB insertion code. + */ + pdb_ins_code: str, + }, /** * Data items in the CHEM_COMP_IDENTIFIER category provide * identifiers for chemical components. @@ -1934,7 +1982,7 @@ export const mmCIF_Schema = { /** * This data item contains the descriptor type. */ - type: Aliased<'LINUCS' | 'IUPAC' | 'IUPAC Abbreviated'>(str), + type: Aliased<'LINUCS'>(str), /** * This data item contains the name of the program * or library used to compute the descriptor. @@ -1950,18 +1998,6 @@ export const mmCIF_Schema = { */ ordinal: int, }, - pdbx_nonpoly_scheme: { - asym_id: str, - entity_id: str, - mon_id: str, - ndb_seq_num: int, - pdb_seq_num: int, - auth_seq_num: int, - pdb_mon_id: str, - auth_mon_id: str, - pdb_strand_id: str, - pdb_ins_code: str - }, /** * Data items in the IHM_STARTING_MODEL_DETAILS category records the * details about structural models used as starting inputs in @@ -1974,7 +2010,7 @@ export const mmCIF_Schema = { starting_model_id: str, /** * A unique identifier for the distinct molecular entities. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -2165,8 +2201,8 @@ export const mmCIF_Schema = { segment_id: int, /** * A unique identifier distinct molecular entities. - * This data item is a pointer to _entity_poly_seq.entity_id in the - * ENTITY_POLY_SEQ category. + * This data item is a pointer to _entity.id in the + * ENTITY category. */ entity_id: str, /** @@ -2181,10 +2217,12 @@ export const mmCIF_Schema = { entity_asym_id: str, /** * The leading residue index for the sequence segment modeled using this starting model. + * This data item is a pointer to _entity_poly_seq.num in the ENTITY_POLY_SEQ category. */ seq_id_begin: int, /** * The trailing residue index for the sequence segment modeled using this starting model. + * This data item is a pointer to _entity_poly_seq.num in the ENTITY_POLY_SEQ category. */ seq_id_end: int, /** @@ -2244,8 +2282,8 @@ export const mmCIF_Schema = { entity_description: str, /** * A unique identifier for distinct molecular entities. - * This data item is a pointer to _entity_poly_seq.entity_id in the - * ENTITY_POLY_SEQ category. + * This data item is a pointer to _entity.id in the + * ENTITY category. */ entity_id: str, /** @@ -2257,11 +2295,13 @@ export const mmCIF_Schema = { /** * The starting residue index for the sequence segment of the entity instance * that is part of the assembly. + * This data item is a pointer to _entity_poly_seq.num in the ENTITY_POLY_SEQ category. */ seq_id_begin: int, /** * The ending residue index for the sequence segment of the entity instance * that is part of the assembly. + * This data item is a pointer to _entity_poly_seq.num in the ENTITY_POLY_SEQ category. */ seq_id_end: int, }, @@ -2612,7 +2652,7 @@ export const mmCIF_Schema = { /** * The type of data held in the dataset. */ - data_type: Aliased<'NMR data' | '3DEM volume' | '2DEM class average' | 'EM raw micrographs' | 'SAS data' | 'CX-MS data' | 'Mass Spectrometry data' | 'EPR data' | 'H/D exchange data' | 'Single molecule FRET data' | 'Experimental model' | 'Comparative model' | 'Integrative model' | 'De Novo model' | 'Predicted contacts' | 'Mutagenesis data' | 'DNA footprinting data' | 'Yeast two-hybrid screening data' | 'Other'>(str), + data_type: Aliased<'NMR data' | '3DEM volume' | '2DEM class average' | 'EM raw micrographs' | 'SAS data' | 'CX-MS data' | 'Mass Spectrometry data' | 'EPR data' | 'H/D exchange data' | 'Single molecule FRET data' | 'Experimental model' | 'Comparative model' | 'Integrative model' | 'De Novo model' | 'Predicted contacts' | 'Mutagenesis data' | 'DNA footprinting data' | 'Hydroxyl radical footprinting data' | 'Yeast two-hybrid screening data' | 'Other'>(str), /** * A flag that indicates whether the dataset is archived in * an IHM related database or elsewhere. @@ -2821,7 +2861,7 @@ export const mmCIF_Schema = { ensemble_id: int, /** * The entity identifier corresponding to this localization density. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -3009,7 +3049,7 @@ export const mmCIF_Schema = { /** * The type of crosslinker used. */ - linker_type: Aliased<'EDC' | 'DSS' | 'EGS' | 'BS3' | 'BS2G' | 'DST' | 'sulfo-SDA' | 'sulfo-SMCC' | 'Other'>(str), + linker_type: Aliased<'EDC' | 'DSS' | 'EGS' | 'BS3' | 'BS2G' | 'DST' | 'sulfo-SDA' | 'sulfo-SMCC' | 'DSSO' | 'Other'>(str), /** * Identifier to the crosslinking dataset. * This data item is a pointer to the _ihm_dataset_list.id in the @@ -3423,7 +3463,7 @@ export const mmCIF_Schema = { type_symbol: str, /** * The entity identifier corresponding to this coordinate position. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -3434,7 +3474,7 @@ export const mmCIF_Schema = { atom_id: str, /** * The component identifier corresponding to this coordinate position. - * This data item is a pointer to _entity_poly_seq.mon_id in the ENTITY_POLY category. + * This data item is a pointer to _chem_comp.id in the CHEM_COMP category. */ comp_id: str, /** @@ -3477,7 +3517,7 @@ export const mmCIF_Schema = { ordinal_id: int, /** * The entity identifier corresponding to this sphere object. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -3534,7 +3574,7 @@ export const mmCIF_Schema = { ordinal_id: int, /** * The entity identifier corresponding to this gaussian object. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -3590,7 +3630,7 @@ export const mmCIF_Schema = { ordinal_id: int, /** * The entity identifier corresponding to this gaussian object. - * This data item is a pointer to _entity_poly_seq.entity_id in the ENTITY_POLY category. + * This data item is a pointer to _entity.id in the ENTITY category. */ entity_id: str, /** @@ -3789,7 +3829,7 @@ export const mmCIF_Schema = { }, /** * Data items in the PDBX_ENTITY_BRANCH_LINK category give details about - * the linkages between components within branched entities. + * the linkages between components within a branched entity. */ pdbx_entity_branch_link: { /** @@ -3883,7 +3923,7 @@ export const mmCIF_Schema = { }, /** * The PDBX_BRANCH_SCHEME category provides residue level nomenclature - * mapping for branch chain entitie. + * mapping for branch chain entities. */ pdbx_branch_scheme: { /** @@ -3920,6 +3960,44 @@ export const mmCIF_Schema = { */ auth_mon_id: str, }, + /** + * PDBX_CHEM_COMP_SYNONYMS holds chemical name and synonym correspondences. + */ + pdbx_chem_comp_synonyms: { + /** + * The synonym of this particular chemical component. + */ + name: str, + /** + * The chemical component for which this synonym applies. + */ + comp_id: str, + /** + * The provenance of this synonym. + */ + provenance: Aliased<'AUTHOR' | 'DRUGBANK' | 'CHEBI' | 'CHEMBL' | 'PDB' | 'PUBCHEM'>(str), + }, + /** + * PDBX_CHEM_COMP_RELATED describes the relationship between two chemical components. + */ + pdbx_chem_comp_related: { + /** + * The chemical component for which this relationship applies. + */ + comp_id: str, + /** + * The related chemical component for which this chemical component is based. + */ + related_comp_id: str, + /** + * Describes the type of relationship + */ + relationship_type: Aliased<'Carbohydrate core' | 'Precursor'>(str), + /** + * Describes the type of relationship + */ + details: str, + }, } export type mmCIF_Schema = typeof mmCIF_Schema;