diff --git a/data/mmcif-field-names.csv b/data/mmcif-field-names.csv index cf042045e4dd08ac3965b595057ec935e8484c9d..f05f299f39864c70572447630a3bf934951ef3cc 100644 --- a/data/mmcif-field-names.csv +++ b/data/mmcif-field-names.csv @@ -91,6 +91,14 @@ entity_poly_seq.num entity_poly_seq.mon_id entity_poly_seq.hetero +entity_src_gen.entity_id +entity_src_gen.pdbx_src_id +entity_src_gen.pdbx_alt_source_flag +entity_src_gen.pdbx_seq_type +entity_src_gen.pdbx_beg_seq_num +entity_src_gen.pdbx_end_seq_num +entity_src_gen.pdbx_gene_src_gene + pdbx_entity_branch.entity_id pdbx_entity_branch.type diff --git a/src/apps/schema-generator/util/cif-dic.ts b/src/apps/schema-generator/util/cif-dic.ts index e800ba3d350fc92f6610d57f3d41ad7079780bf8..befaea5a5ea61c021fd8492b3ea74035ee50fe78 100644 --- a/src/apps/schema-generator/util/cif-dic.ts +++ b/src/apps/schema-generator/util/cif-dic.ts @@ -52,6 +52,8 @@ export function getFieldType (type: string, description: string, values?: string case 'date_dep': case 'url': case 'symop': + case 'exp_data_doi': + case 'asym_id': return StrCol(description) case 'int': case 'non_negative_int': @@ -63,6 +65,8 @@ export function getFieldType (type: string, description: string, values?: string case 'ucode-alphanum-csv': case 'id_list': return ListCol('str', ',', description) + case 'id_list_spc': + return ListCol('str', ' ', description) } console.log(`unknown type '${type}'`) return StrCol(description) @@ -163,6 +167,7 @@ const COMMA_SEPARATED_LIST_FIELDS = [ '_entity.pdbx_description', // Endolysin,Beta-2 adrenergic receptor '_entity.pdbx_ec', '_entity_poly.pdbx_strand_id', // A,B + '_entity_src_gen.pdbx_gene_src_gene', // ADRB2, ADRB2R, B2AR '_pdbx_depui_entry_details.experimental_methods', '_pdbx_depui_entry_details.requested_accession_types', '_pdbx_soln_scatter_model.software_list', // INSIGHT II, HOMOLOGY, DISCOVERY, BIOPOLYMER, DELPHI @@ -196,12 +201,7 @@ const SEMICOLON_SEPARATED_LIST_FIELDS = [ * values are available in the existing dictionary. */ const EXTRA_ENUM_VALUES: { [k: string]: string[] } = { - // TODO for carbohydrate extension draft, remove when added to chem_comp dic - '_pdbx_chem_comp_identifier.type': [ - 'CONDENSED IUPAC CARB SYMBOL', - 'IUPAC CARB SYMBOL', - 'SNFG CARB SYMBOL' - ] + } export function generateSchema (frames: CifFrame[]) { diff --git a/src/mol-io/reader/cif/schema/bird.ts b/src/mol-io/reader/cif/schema/bird.ts index d8687247fa51c82a8d4ba95f85db40a5b92f7ca8..a66b4499915dace768239da49d067b5a5c9d6d30 100644 --- a/src/mol-io/reader/cif/schema/bird.ts +++ b/src/mol-io/reader/cif/schema/bird.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft. + * Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ diff --git a/src/mol-io/reader/cif/schema/ccd.ts b/src/mol-io/reader/cif/schema/ccd.ts index d053c6e1160b3034cd2c5328ccbea1fec2a2f27c..86007f71edaaaa565e1f2c9db6994e61861dcdeb 100644 --- a/src/mol-io/reader/cif/schema/ccd.ts +++ b/src/mol-io/reader/cif/schema/ccd.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft. + * Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index 4da0f48ed23c3b4711710280254c10cf11117726..39d20bcd83d4312a874eb2598fe6132005316e87 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft. + * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ @@ -485,7 +485,7 @@ export const mmCIF_Schema = { * Water entities are not expected to have corresponding * entries in the ENTITY category. */ - type: Aliased<'polymer' | 'non-polymer' | 'macrolide' | 'water'>(str), + type: Aliased<'polymer' | 'non-polymer' | 'macrolide' | 'water' | 'branched'>(str), /** * A description of the entity. * @@ -496,7 +496,7 @@ export const mmCIF_Schema = { * A place holder for the number of molecules of the entity in * the entry. */ - pdbx_number_of_molecules: float, + pdbx_number_of_molecules: int, /** * Details about any entity mutation(s). */ @@ -534,7 +534,7 @@ export const mmCIF_Schema = { /** * The type of the polymer. */ - type: Aliased<'polypeptide(D)' | 'polypeptide(L)' | 'polydeoxyribonucleotide' | 'polyribonucleotide' | 'polysaccharide(D)' | 'polysaccharide(L)' | 'polydeoxyribonucleotide/polyribonucleotide hybrid' | 'cyclic-pseudo-peptide' | 'peptide nucleic acid' | 'other'>(str), + type: Aliased<'polypeptide(D)' | 'polypeptide(L)' | 'polydeoxyribonucleotide' | 'polyribonucleotide' | 'polydeoxyribonucleotide/polyribonucleotide hybrid' | 'cyclic-pseudo-peptide' | 'peptide nucleic acid' | 'other'>(str), /** * The PDB strand/chain id(s) corresponding to this polymer entity. */ @@ -1964,6 +1964,52 @@ export const mmCIF_Schema = { */ details: str, }, + /** + * Data items in the ENTITY_SRC_GEN category record details of + * the source from which the entity was obtained in cases + * where the source was genetically manipulated. The + * following are treated separately: items pertaining to the tissue + * from which the gene was obtained, items pertaining to the host + * organism for gene expression and items pertaining to the actual + * producing organism (plasmid). + */ + entity_src_gen: { + /** + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * Identifies the gene. + */ + pdbx_gene_src_gene: List(',', x => x), + /** + * This data item is an ordinal identifier for entity_src_gen data records. + */ + pdbx_src_id: int, + /** + * This data item identifies cases in which an alternative source + * modeled. + */ + pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str), + /** + * This data item povides additional information about the sequence type. + */ + pdbx_seq_type: Aliased<'N-terminal tag' | 'C-terminal tag' | 'Biological sequence' | 'Linker'>(str), + /** + * The beginning polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_beg_seq_num: int, + /** + * The ending polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_end_seq_num: int, + }, /** * Data items in the PDBX_ENTITY_DESCRIPTOR category provide * string descriptors of entity chemical structure. @@ -1982,7 +2028,7 @@ export const mmCIF_Schema = { /** * This data item contains the descriptor type. */ - type: Aliased<'LINUCS'>(str), + type: Aliased<'LINUCS' | 'Glycam Condensed Sequence' | 'Glycam Condensed Core Sequence'>(str), /** * This data item contains the name of the program * or library used to compute the descriptor. @@ -3716,7 +3762,7 @@ export const mmCIF_Schema = { */ entity_id: str, /** - * An asym/strand identifier for the residue / residue range. + * An asym/strand identifier for the residue / residue range, if applicable. * This data item is a pointer to _struct_asym.id in the * STRUCT_ASYM category. */ @@ -3794,6 +3840,10 @@ export const mmCIF_Schema = { * Identifier to the input data from which the distance restraint is derived. * This data item is a pointer to the _ihm_dataset_list.id in the * IHM_DATASET_LIST category. + * This data item may not be applicable for all cases. For example, in case of + * ambiguous interface restraints where the interface residues are identified + * from multiple experiments, the reference to the _ihm_dataset_list.id is + * handled in the IHM_INTERFACE_RESIDUE_FEATURE category rather than here. */ dataset_list_id: int, },