diff --git a/data/cif-field-names/mmcif-field-names.csv b/data/cif-field-names/mmcif-field-names.csv index 825f2b931474b08d233bd113c7c5854f65b58d20..add635b14b6eb41a87392180d37f885f59bf3fd0 100644 --- a/data/cif-field-names/mmcif-field-names.csv +++ b/data/cif-field-names/mmcif-field-names.csv @@ -246,6 +246,14 @@ citation_author.ordinal exptl.entry_id exptl.method +software.classification +software.date +software.description +software.name +software.pdbx_ordinal +software.type +software.version + struct.entry_id struct.title struct.pdbx_descriptor @@ -802,4 +810,58 @@ ihm_multi_state_modeling.population_fraction_sd ihm_multi_state_modeling.state_type ihm_multi_state_modeling.state_name ihm_multi_state_modeling.experiment_type -ihm_multi_state_modeling.details \ No newline at end of file +ihm_multi_state_modeling.details + +ma_data.content_type +ma_data.content_type_other_details +ma_data.id +ma_data.name + +ma_model_list.data_id +ma_model_list.model_group_id +ma_model_list.model_group_name +ma_model_list.model_id +ma_model_list.model_name +ma_model_list.model_type +ma_model_list.ordinal_id + +ma_qa_metric.id +ma_qa_metric.mode +ma_qa_metric.name +ma_qa_metric.software_group_id +ma_qa_metric.type + +ma_qa_metric_global.metric_id +ma_qa_metric_global.metric_value +ma_qa_metric_global.model_id +ma_qa_metric_global.ordinal_id + +ma_qa_metric_local.label_asym_id +ma_qa_metric_local.label_comp_id +ma_qa_metric_local.label_seq_id +ma_qa_metric_local.metric_id +ma_qa_metric_local.metric_value +ma_qa_metric_local.model_id +ma_qa_metric_local.ordinal_id + +ma_software_group.group_id +ma_software_group.ordinal_id +ma_software_group.software_id + +ma_target_entity.data_id +ma_target_entity.entity_id +ma_target_entity.origin + +ma_target_entity_instance.asym_id +ma_target_entity_instance.details +ma_target_entity_instance.entity_id + +ma_target_ref_db_details.db_accession +ma_target_ref_db_details.db_code +ma_target_ref_db_details.db_name +ma_target_ref_db_details.ncbi_taxonomy_id +ma_target_ref_db_details.organism_scientific +ma_target_ref_db_details.seq_db_align_begin +ma_target_ref_db_details.seq_db_align_end +ma_target_ref_db_details.seq_db_isoform +ma_target_ref_db_details.target_entity_id \ No newline at end of file diff --git a/src/cli/cifschema/index.ts b/src/cli/cifschema/index.ts index 3b8d96a01ce47682762765f3585d4a8256e9040f..84c5a98972ac326bb58bb8866272485e69073076 100644 --- a/src/cli/cifschema/index.ts +++ b/src/cli/cifschema/index.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node /** - * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> */ @@ -35,6 +35,10 @@ async function runGenerateSchemaMmcif(name: string, fieldNamesPath: string, type const ihmDic = await parseCifText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run(); if (ihmDic.isError) throw ihmDic; + await ensureMaDicAvailable(); + const maDic = await parseCifText(fs.readFileSync(MA_DIC_PATH, 'utf8')).run(); + if (maDic.isError) throw maDic; + await ensureCarbBranchDicAvailable(); const carbBranchDic = await parseCifText(fs.readFileSync(CARB_BRANCH_DIC_PATH, 'utf8')).run(); if (carbBranchDic.isError) throw carbBranchDic; @@ -45,10 +49,11 @@ async function runGenerateSchemaMmcif(name: string, fieldNamesPath: string, type const mmcifDicVersion = getDicVersion(mmcifDic.result.blocks[0]); const ihmDicVersion = getDicVersion(ihmDic.result.blocks[0]); + const maDicVersion = getDicVersion(maDic.result.blocks[0]); const carbDicVersion = 'draft'; - const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, CARB ${carbDicVersion}.`; + const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, MA ${maDicVersion}, CARB ${carbDicVersion}.`; - const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames]; + const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...maDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames]; const schema = generateSchema(frames); await runGenerateSchema(name, version, schema, fieldNamesPath, typescript, out, moldbImportPath, addAliases); @@ -139,6 +144,7 @@ async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> { async function ensureMmcifDicAvailable() { await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL); } async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL); } +async function ensureMaDicAvailable() { await ensureDicAvailable(MA_DIC_PATH, MA_DIC_URL); } async function ensureCarbBranchDicAvailable() { await ensureDicAvailable(CARB_BRANCH_DIC_PATH, CARB_BRANCH_DIC_URL); } async function ensureCarbCompDicAvailable() { await ensureDicAvailable(CARB_COMP_DIC_PATH, CARB_COMP_DIC_URL); } async function ensureCifCoreDicAvailable() { @@ -165,6 +171,8 @@ const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`; const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic'; const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`; const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic'; +const MA_DIC_PATH = `${DIC_DIR}/ma-extension.dic`; +const MA_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/MA-dictionary/master/mmcif_ma.dic'; const CARB_BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic`; const CARB_BRANCH_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic'; const CARB_COMP_DIC_PATH = `${DIC_DIR}/chem_comp-extension.dic`; diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index 0a0455bf5c945e04a5b61f239dbb0a086d3f0c13..6a0c57a31f7668bbfe567f84443d422ab4daa23d 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.352, IHM 1.17, CARB draft. + * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.352, IHM 1.17, MA 1.3.3, CARB draft. * * @author molstar/ciftools package */ @@ -942,6 +942,48 @@ export const mmCIF_Schema = { */ method: Aliased<'X-RAY DIFFRACTION' | 'NEUTRON DIFFRACTION' | 'FIBER DIFFRACTION' | 'ELECTRON CRYSTALLOGRAPHY' | 'ELECTRON MICROSCOPY' | 'SOLUTION NMR' | 'SOLID-STATE NMR' | 'SOLUTION SCATTERING' | 'POWDER DIFFRACTION' | 'INFRARED SPECTROSCOPY' | 'EPR' | 'FLUORESCENCE TRANSFER' | 'THEORETICAL MODEL'>(str), }, + /** + * Data items in the SOFTWARE category record details about + * the software used in the structure analysis, which implies + * any software used in the generation of any data items + * associated with the structure determination and + * structure representation. + * + * These data items allow computer programs to be referenced + * in more detail than data items in the COMPUTING category do. + */ + software: { + /** + * The classification of the program according to its + * major function. + */ + classification: str, + /** + * The date the software was released. + */ + date: str, + /** + * Description of the software. + */ + description: str, + /** + * The name of the software. + */ + name: str, + /** + * The classification of the software according to the most + * common types. + */ + type: Aliased<'program' | 'library' | 'package' | 'filter' | 'jiffy' | 'other'>(str), + /** + * The version of the software. + */ + version: str, + /** + * An ordinal index for this category + */ + pdbx_ordinal: int, + }, /** * Data items in the STRUCT category record details about the * description of the crystallographic structure. @@ -4717,6 +4759,289 @@ export const mmCIF_Schema = { */ dataset_list_id: int, }, + /** + * Data items in the MA_MODEL_LIST category record the + * details of the models being deposited. + */ + ma_model_list: { + /** + * A unique identifier for the model / model group combination. + */ + ordinal_id: int, + /** + * A unique identifier for the structural model being deposited. + */ + model_id: int, + /** + * An identifier to group structural models into collections or sets. + * A cluster of models and its representative can either be grouped together + * or can be separate groups in the ma_model_list table. The choice between + * the two options should be decided based on how the modeling was carried out + * and how the representative was chosen. If the representative is a member of + * the ensemble (i.e., best scoring model), then it is recommended that the + * representative and the ensemble belong to the same model group. If the + * representative is calculated from the ensemble (i.e., centroid), then it is + * recommended that the representative be separated into a different group. + * If the models do not need to be grouped into collections, then the + * _ma_model_list.model_group_id is the same as _ma_model_list.model_id. + */ + model_group_id: int, + /** + * A decsriptive name for the model. + */ + model_name: str, + /** + * A decsriptive name for the model group. + */ + model_group_name: str, + /** + * The type of model. + */ + model_type: Aliased<'Homology model' | 'Ab initio model' | 'Other'>(str), + /** + * The data_id identifier. This data item is a pointer to + * _ma_data.id in the MA_DATA category. + */ + data_id: int, + }, + /** + * Data items in the MA_TARGET_ENTITY category record details about + * the target entities. The details are provided for each entity + * being modeled. + */ + ma_target_entity: { + /** + * A unique identifier for the distinct molecular entity of the target. + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * The data_id identifier. This data item is a pointer to + * _ma_data.id in the MA_DATA category. + */ + data_id: int, + /** + * The origin of the target entity. + */ + origin: Aliased<'reference database' | 'designed'>(str), + }, + /** + * Data items in the MA_TARGET_ENTITY_INSTANCE category record details about + * the instances of target entities modeled. + */ + ma_target_entity_instance: { + /** + * A unique identifier for the instance of the entity. + */ + asym_id: str, + /** + * A unique identifier for the distinct molecular entity of the target. + * This data item is a pointer to _ma_target_entity.entity_id in the + * MA_TARGET_ENTITY category. + */ + entity_id: str, + /** + * Additional details about the entity instance. + */ + details: str, + }, + /** + * Data items in the MA_TARGET_REF_DB_DETAILS category record details about + * the reference databases for the target sequences. + */ + ma_target_ref_db_details: { + /** + * An identifier for the target entity. + */ + target_entity_id: str, + /** + * The name of the database containing reference information about + * this entity or biological unit. + */ + db_name: Aliased<'UNP' | 'GB' | 'OrthoDB' | 'NCBI' | 'JGI' | 'Other'>(str), + /** + * The code for this entity or biological unit or for a closely + * related entity or biological unit in the named database. + * This can include the version number. + */ + db_code: str, + /** + * Accession code assigned by the reference database. + */ + db_accession: str, + /** + * Database code assigned by the reference database for a sequence isoform. An isoform sequence is an + * alternative protein sequence that can be generated from the same gene by a single or by a combination of + * biological events such as: alternative promoter usage, alternative splicing, alternative initiation + * and ribosomal frameshifting. + */ + seq_db_isoform: str, + /** + * Beginning index in the chemical sequence from the + * reference database. + */ + seq_db_align_begin: str, + /** + * Ending index in the chemical sequence from the + * reference database. + */ + seq_db_align_end: str, + /** + * Taxonomy identifier provided by NCBI. + */ + ncbi_taxonomy_id: str, + /** + * Scientific name of the organism. + */ + organism_scientific: str, + }, + /** + * Data items in the MA_DATA category capture the different kinds of + * data used in the modeling. These can be multiple sequence + * alignments, spatial restraints, template structures etc. + */ + ma_data: { + /** + * A unique identifier for the data. + */ + id: int, + /** + * The type of data held in the dataset. + */ + content_type: Aliased<'target' | 'template structure' | 'polymeric template library' | 'spatial restraints' | 'target-template alignment' | 'coevolution MSA' | 'model coordinates' | 'other'>(str), + /** + * Details for other content types. + */ + content_type_other_details: str, + /** + * An author-given name for the content held in the dataset. + */ + name: str, + }, + /** + * Data items in the MA_SOFTWARE_GROUP category describes the + * collection of software into groups so that they can be used + * efficiently in the MA_PROTOCOL_STEP category. + */ + ma_software_group: { + /** + * A unique identifier for the category. + */ + ordinal_id: int, + /** + * An identifier for the group entry. + * If data does not need to be grouped, then _ma_software_group.group_id + * is the same as _ma_software_group.software_id. + */ + group_id: int, + /** + * The identifier for the software. + * This data item is a pointer to _software.pdbx_ordinal + * in the SOFTWARE category. + */ + software_id: int, + }, + /** + * Data items in the MA_QA_METRIC category record the + * details of the metrics use to assess model quality. + */ + ma_qa_metric: { + /** + * An identifier for the QA metric. + */ + id: int, + /** + * Name of the QA metric. + */ + name: str, + /** + * The type of QA metric. + */ + type: Aliased<'zscore' | 'energy' | 'distance' | 'normalized score' | 'pLDDT' | 'PAE' | 'contact probability' | 'other'>(str), + /** + * The mode of calculation of the QA metric. + */ + mode: Aliased<'local' | 'global' | 'local-pairwise'>(str), + /** + * Identifier to the set of software used to calculate the QA metric. + * This data item is a pointer to the _ma_software_group.group_id in the + * MA_SOFTWARE_GROUP category. + */ + software_group_id: int, + }, + /** + * Data items in the MA_QA_METRIC_GLOBAL category captures the + * details of the global QA metrics, calculated at the model-level. + */ + ma_qa_metric_global: { + /** + * A unique identifier for the category. + */ + ordinal_id: int, + /** + * The identifier for the structural model, for which global QA metric is provided. + * This data item is a pointer to _ma_model_list.model_id + * in the MA_MODEL_LIST category. + */ + model_id: int, + /** + * The identifier for the QA metric. + * This data item is a pointer to _ma_qa_metric.id in the + * MA_QA_METRIC category. + */ + metric_id: int, + /** + * The value of the global QA metric. + */ + metric_value: float, + }, + /** + * Data items in the MA_QA_METRIC_LOCAL category captures the + * details of the local QA metrics, calculated at the residue-level. + */ + ma_qa_metric_local: { + /** + * A unique identifier for the category. + */ + ordinal_id: int, + /** + * The identifier for the structural model, for which local QA metric is provided. + * This data item is a pointer to _ma_model_list.model_id + * in the MA_MODEL_LIST category. + */ + model_id: int, + /** + * The identifier for the asym id of the residue in the + * structural model, for which local QA metric is provided. + * This data item is a pointer to _atom_site.label_asym_id + * in the ATOM_SITE category. + */ + label_asym_id: str, + /** + * The identifier for the sequence index of the residue + * in the structural model, for which local QA metric is provided. + * This data item is a pointer to _atom_site.label_seq_id + * in the ATOM_SITE category. + */ + label_seq_id: int, + /** + * The component identifier for the residue in the + * structural model, for which local QA metric is provided. + * This data item is a pointer to _atom_site.label_comp_id + * in the ATOM_SITE category. + */ + label_comp_id: str, + /** + * The identifier for the QA metric. + * This data item is a pointer to _ma_qa_metric.id in the + * MA_QA_METRIC category. + */ + metric_id: int, + /** + * The value of the local QA metric. + */ + metric_value: float, + }, }; export type mmCIF_Schema = typeof mmCIF_Schema;