From 3400c8e94ad6363071f52d17100546e3d06f3353 Mon Sep 17 00:00:00 2001 From: Alexander Rose <alexander.rose@weirdbyte.de> Date: Sun, 12 Sep 2021 16:19:23 -0700 Subject: [PATCH] update coreCif dictionary --- data/cif-field-names/cif-core-field-names.csv | 39 ++-- src/cli/cifschema/util/cif-dic.ts | 35 ++-- src/cli/cifschema/util/helper.ts | 4 +- src/mol-io/reader/cif/schema/cif-core.ts | 188 +++++++----------- .../structure/_spec/cif-core.spec.ts | 13 +- src/mol-model-formats/structure/cif-core.ts | 25 ++- 6 files changed, 123 insertions(+), 181 deletions(-) diff --git a/data/cif-field-names/cif-core-field-names.csv b/data/cif-field-names/cif-core-field-names.csv index f935944d7..9425fc19f 100644 --- a/data/cif-field-names/cif-core-field-names.csv +++ b/data/cif-field-names/cif-core-field-names.csv @@ -2,11 +2,11 @@ audit.block_doi database_code.depnum_ccdc_archive database_code.depnum_ccdc_fiz -database_code.ICSD -database_code.MDF -database_code.NBS -database_code.CSD -database_code.COD +database_code.icsd +database_code.mdf +database_code.nbs +database_code.csd +database_code.cod chemical.name_systematic chemical.name_common @@ -24,8 +24,8 @@ atom_type_scat.dispersion_imag atom_type_scat.source space_group.crystal_system -space_group.name_H-M_full -space_group.IT_number +space_group.name_h-m_full +space_group.it_number space_group_symop.operation_xyz cell.length_a @@ -35,14 +35,14 @@ cell.angle_alpha cell.angle_beta cell.angle_gamma cell.volume -cell.formula_units_Z +cell.formula_units_z atom_site.label atom_site.type_symbol atom_site.fract_x atom_site.fract_y atom_site.fract_z -atom_site.U_iso_or_equiv +atom_site.u_iso_or_equiv atom_site.adp_type atom_site.occupancy atom_site.calc_flag @@ -52,20 +52,13 @@ atom_site.disorder_group atom_site.site_symmetry_multiplicity atom_site_aniso.label -atom_site_aniso.U -atom_site_aniso.U_11 -atom_site_aniso.U_22 -atom_site_aniso.U_33 -atom_site_aniso.U_23 -atom_site_aniso.U_13 -atom_site_aniso.U_12 -atom_site_aniso.U_su -atom_site_aniso.U_11_su -atom_site_aniso.U_22_su -atom_site_aniso.U_33_su -atom_site_aniso.U_23_su -atom_site_aniso.U_13_su -atom_site_aniso.U_12_su +atom_site_aniso.u +atom_site_aniso.u_11 +atom_site_aniso.u_22 +atom_site_aniso.u_33 +atom_site_aniso.u_23 +atom_site_aniso.u_13 +atom_site_aniso.u_12 geom_bond.atom_site_label_1 geom_bond.atom_site_label_2 diff --git a/src/cli/cifschema/util/cif-dic.ts b/src/cli/cifschema/util/cif-dic.ts index 0498f47c1..8e1eadc14 100644 --- a/src/cli/cifschema/util/cif-dic.ts +++ b/src/cli/cifschema/util/cif-dic.ts @@ -81,7 +81,7 @@ export function getFieldType(type: string, description: string, values?: string[ case 'List(Real,Real)': case 'List(Real,Real,Real,Real)': case 'Date': - case 'Datetime': + case 'DateTime': case 'Tag': case 'Implied': return wrapContainer('str', ',', description, container); @@ -234,29 +234,26 @@ const FORCE_INT_FIELDS = [ '_struct_sheet_range.end_auth_seq_id', ]; +/** + * Note that name and mapped name must share a prefix. This is not always the case in + * the cifCore dictionary, but for downstream code to work a container field with the + * same prefix as the member fields must be given here and in the field names filter + * list. + */ const FORCE_MATRIX_FIELDS_MAP: { [k: string]: string } = { - 'atom_site_aniso.U_11': 'U', - 'atom_site_aniso.U_22': 'U', - 'atom_site_aniso.U_33': 'U', - 'atom_site_aniso.U_23': 'U', - 'atom_site_aniso.U_13': 'U', - 'atom_site_aniso.U_12': 'U', - 'atom_site_aniso.U_11_su': 'U_su', - 'atom_site_aniso.U_22_su': 'U_su', - 'atom_site_aniso.U_33_su': 'U_su', - 'atom_site_aniso.U_23_su': 'U_su', - 'atom_site_aniso.U_13_su': 'U_su', - 'atom_site_aniso.U_12_su': 'U_su', + 'atom_site_aniso.u_11': 'u', // is matrix_u in the the dic + 'atom_site_aniso.u_22': 'u', + 'atom_site_aniso.u_33': 'u', + 'atom_site_aniso.u_23': 'u', + 'atom_site_aniso.u_13': 'u', + 'atom_site_aniso.u_12': 'u', }; const FORCE_MATRIX_FIELDS = Object.keys(FORCE_MATRIX_FIELDS_MAP); const EXTRA_ALIASES: Database['aliases'] = { - 'atom_site_aniso.U': [ - 'atom_site_anisotrop_U' - ], - 'atom_site_aniso.U_su': [ - 'atom_site_aniso_U_esd', - 'atom_site_anisotrop_U_esd', + 'atom_site_aniso.matrix_u': [ + 'atom_site_anisotrop_U', + 'atom_site_aniso.U' ], }; diff --git a/src/cli/cifschema/util/helper.ts b/src/cli/cifschema/util/helper.ts index d7e9ab4d8..5da37cc68 100644 --- a/src/cli/cifschema/util/helper.ts +++ b/src/cli/cifschema/util/helper.ts @@ -10,8 +10,8 @@ export function parseImportGet(s: string): Import[] { // [{'save':hi_ang_Fox_coeffs 'file':templ_attr.cif} {'save':hi_ang_Fox_c0 'file':templ_enum.cif}] // [{"file":'templ_enum.cif' "save":'H_M_ref'}] return s.trim().substring(2, s.length - 2).split(/}[ \n\t]*{/g).map(s => { - const save = s.match(/('save'|"save"):([^ \t\n]+)/); - const file = s.match(/('file'|"file"):([^ \t\n]+)/); + const save = s.match(/('save'|"save"):([^ \t\n{}]+)/); + const file = s.match(/('file'|"file"):([^ \t\n{}]+)/); return { save: save ? save[0].substr(7).replace(/['"]/g, '') : undefined, file: file ? file[0].substr(7).replace(/['"]/g, '') : undefined diff --git a/src/mol-io/reader/cif/schema/cif-core.ts b/src/mol-io/reader/cif/schema/cif-core.ts index 711bcd0fd..63ec3b3fc 100644 --- a/src/mol-io/reader/cif/schema/cif-core.ts +++ b/src/mol-io/reader/cif/schema/cif-core.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'CifCore' schema file. Dictionary versions: CifCore 3.0.14. + * Code-generated 'CifCore' schema file. Dictionary versions: CifCore 3.1.0. * * @author molstar/ciftools package */ @@ -10,8 +10,8 @@ import { Database, Column } from '../../../../mol-data/db'; import Schema = Column.Schema; -const int = Schema.int; const float = Schema.float; +const int = Schema.int; const str = Schema.str; const Matrix = Schema.Matrix; @@ -21,16 +21,6 @@ export const CifCore_Schema = { * the crystal unit cell and their measurement. */ cell: { - /** - * The number of the formula units in the unit cell as specified - * by _chemical_formula.structural, _chemical_formula.moiety or - * _chemical_formula.sum. - */ - formula_units_Z: int, - /** - * Volume of the crystal unit cell. - */ - volume: float, /** * The angle between the bounding cell axes. */ @@ -43,6 +33,12 @@ export const CifCore_Schema = { * The angle between the bounding cell axes. */ angle_gamma: float, + /** + * The number of the formula units in the unit cell as specified + * by _chemical_formula.structural, _chemical_formula.moiety or + * _chemical_formula.sum. + */ + formula_units_z: int, /** * The length of each cell axis. */ @@ -55,6 +51,10 @@ export const CifCore_Schema = { * The length of each cell axis. */ length_c: float, + /** + * Volume of the crystal unit cell. + */ + volume: float, }, /** * The CATEGORY of data items which describe the composition and @@ -184,12 +184,12 @@ export const CifCore_Schema = { crystal_system: str, /** * The number as assigned in International Tables for Crystallography - * Vol A, specifying the proper affine class (i.e. the orientation + * Vol. A, specifying the proper affine class (i.e. the orientation * preserving affine class) of space groups (crystallographic space * group type) to which the space group belongs. This number defines * the space group type but not the coordinate system expressed. */ - IT_number: int, + it_number: int, /** * The full international Hermann-Mauguin space-group symbol as * defined in Section 2.2.3 and given as the second item of the @@ -220,7 +220,7 @@ export const CifCore_Schema = { * Space-group symmetry, edited by Th. Hahn, 5th ed. * Dordrecht: Kluwer Academic Publishers. */ - 'name_H-M_full': str, + 'name_h-m_full': str, }, /** * The CATEGORY of data items used to describe symmetry equivalent sites @@ -340,8 +340,8 @@ export const CifCore_Schema = { /** * The digital object identifier (DOI) registered to identify * the data set publication represented by the current - * datablock. This can be used as a unique identifier for - * the datablock so long as the code used is a valid DOI + * data block. This can be used as a unique identifier for + * the data block so long as the code used is a valid DOI * (i.e. begins with a valid publisher prefix assigned by a * Registration Agency and a suffix guaranteed to be unique * by the publisher) and has had its metadata deposited @@ -354,8 +354,8 @@ export const CifCore_Schema = { * structured extensible way. A DOI is an implementation * of the Internet concepts of Uniform Resource Name and * Universal Resource Locator managed according to the - * specifications of the International DOI Foundation (see - * http://www.doi.org). + * specifications of the International DOI Foundation + * (see http://www.doi.org). */ block_doi: str, }, @@ -366,13 +366,13 @@ export const CifCore_Schema = { */ database_code: { /** - * Code assigned by Crystallography Open Database (COD). + * Code assigned by the Crystallography Open Database (COD). */ - COD: str, + cod: str, /** * Code assigned by the Cambridge Structural Database. */ - CSD: str, + csd: str, /** * Deposition numbers assigned by the Cambridge Crystallographic * Data Centre (CCDC) to files containing structural information @@ -388,15 +388,15 @@ export const CifCore_Schema = { /** * Code assigned by the Inorganic Crystal Structure Database. */ - ICSD: str, + icsd: str, /** * Code assigned in the Metals Data File. */ - MDF: str, + mdf: str, /** * Code assigned by the NBS (NIST) Crystal Data Database. */ - NBS: str, + nbs: str, }, /** * The CATEGORY of data items used to describe atom site information @@ -511,7 +511,7 @@ export const CifCore_Schema = { * a* = the reciprocal-space cell lengths * Ref: Fischer, R. X. & Tillmanns, E. (1988). Acta Cryst. C44, 775-776. */ - U_iso_or_equiv: float, + u_iso_or_equiv: float, }, /** * The CATEGORY of data items used to describe the anisotropic @@ -537,7 +537,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_11: float, + u_11: float, /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -550,21 +550,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U: Matrix(3, 3), - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_11_su: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_su: Matrix(3, 3), + u: Matrix(3, 3), /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -577,14 +563,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_12: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_12_su: float, + u_12: float, /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -597,14 +576,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_13: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_13_su: float, + u_13: float, /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -617,14 +589,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_22: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_22_su: float, + u_22: float, /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -637,14 +602,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_23: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_23_su: float, + u_23: float, /** * These are the standard anisotropic atomic displacement * components in angstroms squared which appear in the @@ -657,14 +615,7 @@ export const CifCore_Schema = { * * The unique elements of the real symmetric matrix are entered by row. */ - U_33: float, - /** - * These are the standard uncertainty values (SU) for the standard - * form of the Uij anisotropic atomic displacement components (see - * _aniso_UIJ. Because these values are TYPE measurand, the su values - * may in practice be auto generated as part of the Uij calculation. - */ - U_33_su: float, + u_33: float, }, /** * The CATEGORY of data items used to describe atomic type information @@ -710,17 +661,14 @@ export const CifCore_Schema = { }; export const CifCore_Aliases = { - 'atom_site_aniso.U': [ - 'atom_site_anisotrop_U', - ], - 'atom_site_aniso.U_su': [ - 'atom_site_aniso_U_esd', - 'atom_site_anisotrop_U_esd', + 'cell.formula_units_z': [ + 'cell_formula_units_Z', ], - 'space_group.IT_number': [ + 'space_group.it_number': [ + 'space_group_IT_number', 'symmetry_Int_Tables_number', ], - 'space_group.name_H-M_full': [ + 'space_group.name_h-m_full': [ 'symmetry_space_group_name_H-M', ], 'space_group_symop.operation_xyz': [ @@ -735,6 +683,21 @@ export const CifCore_Aliases = { 'geom_bond.distance': [ 'geom_bond_dist', ], + 'database_code.cod': [ + 'database_code_COD', + ], + 'database_code.csd': [ + 'database_code_CSD', + ], + 'database_code.icsd': [ + 'database_code_ICSD', + ], + 'database_code.mdf': [ + 'database_code_MDF', + ], + 'database_code.nbs': [ + 'database_code_NBS', + ], 'atom_site.adp_type': [ 'atom_site_thermal_displace_type', ], @@ -744,51 +707,36 @@ export const CifCore_Aliases = { 'atom_site.site_symmetry_multiplicity': [ 'atom_site_symmetry_multiplicity', ], + 'atom_site.u_iso_or_equiv': [ + 'atom_site_U_iso_or_equiv', + ], 'atom_site_aniso.label': [ 'atom_site_anisotrop_id', ], - 'atom_site_aniso.U_11': [ + 'atom_site_aniso.u_11': [ + 'atom_site_aniso_U_11', 'atom_site_anisotrop_U_11', ], - 'atom_site_aniso.U_11_su': [ - 'atom_site_aniso_U_11_esd', - 'atom_site_anisotrop_U_11_esd', - ], - 'atom_site_aniso.U_12': [ + 'atom_site_aniso.u_12': [ + 'atom_site_aniso_U_12', 'atom_site_anisotrop_U_12', ], - 'atom_site_aniso.U_12_su': [ - 'atom_site_aniso_U_12_esd', - 'atom_site_anisotrop_U_12_esd', - ], - 'atom_site_aniso.U_13': [ + 'atom_site_aniso.u_13': [ + 'atom_site_aniso_U_13', 'atom_site_anisotrop_U_13', ], - 'atom_site_aniso.U_13_su': [ - 'atom_site_aniso_U_13_esd', - 'atom_site_anisotrop_U_13_esd', - ], - 'atom_site_aniso.U_22': [ + 'atom_site_aniso.u_22': [ + 'atom_site_aniso_U_22', 'atom_site_anisotrop_U_22', ], - 'atom_site_aniso.U_22_su': [ - 'atom_site_aniso_U_22_esd', - 'atom_site_anisotrop_U_22_esd', - ], - 'atom_site_aniso.U_23': [ + 'atom_site_aniso.u_23': [ + 'atom_site_aniso_U_23', 'atom_site_anisotrop_U_23', ], - 'atom_site_aniso.U_23_su': [ - 'atom_site_aniso_U_23_esd', - 'atom_site_anisotrop_U_23_esd', - ], - 'atom_site_aniso.U_33': [ + 'atom_site_aniso.u_33': [ + 'atom_site_aniso_U_33', 'atom_site_anisotrop_U_33', ], - 'atom_site_aniso.U_33_su': [ - 'atom_site_aniso_U_33_esd', - 'atom_site_anisotrop_U_33_esd', - ], }; export type CifCore_Schema = typeof CifCore_Schema; diff --git a/src/mol-model-formats/structure/_spec/cif-core.spec.ts b/src/mol-model-formats/structure/_spec/cif-core.spec.ts index 559faac64..373ed08d3 100644 --- a/src/mol-model-formats/structure/_spec/cif-core.spec.ts +++ b/src/mol-model-formats/structure/_spec/cif-core.spec.ts @@ -1,5 +1,5 @@ /** - * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2019-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> */ @@ -32,6 +32,9 @@ N N 0.0311 0.0180 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' O O 0.0492 0.0322 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' F F 0.0727 0.0534 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' +_symmetry_cell_setting Triclinic +_symmetry_space_group_name_H-M P-1 + _cell_length_a 11.0829(8) _cell_length_b 14.6829(10) _cell_length_c 16.8532(17) @@ -82,7 +85,8 @@ describe('cif-core read', () => { const block = cifFile.blocks[0]; expect(block.getField('cell_length_a')!.float(0)).toBe(11.0829); - expect.assertions(1); + expect(block.getField('symmetry_space_group_name_H-M')!.str(0)).toBe('P-1'); + expect.assertions(2); }); it('schema', async () => { @@ -93,7 +97,8 @@ describe('cif-core read', () => { const cifCore = CIF.schema.cifCore(block); expect(cifCore.cell.length_a.value(0)).toBe(11.0829); - expect(cifCore.atom_site_aniso.U.value(0)).toEqual(new Float64Array([ 0.0425, 0, 0, 0.00089, 0.0423, 0, 0.01515, 0.00066, 0.0375 ])); - expect.assertions(2); + expect(cifCore.space_group['name_h-m_full'].value(0)).toBe('P-1'); + expect(cifCore.atom_site_aniso.u.value(0)).toEqual(new Float64Array([ 0.0425, 0, 0, 0.00089, 0.0423, 0, 0.01515, 0.00066, 0.0375 ])); + expect.assertions(3); }); }); \ No newline at end of file diff --git a/src/mol-model-formats/structure/cif-core.ts b/src/mol-model-formats/structure/cif-core.ts index 75af4d45f..5afdb6d9e 100644 --- a/src/mol-model-formats/structure/cif-core.ts +++ b/src/mol-model-formats/structure/cif-core.ts @@ -25,10 +25,10 @@ import { Trajectory } from '../../mol-model/structure'; import { cantorPairing } from '../../mol-data/util'; function getSpacegroupNameOrNumber(space_group: CifCore_Database['space_group']) { - const groupNumber = space_group.IT_number.value(0); - const groupName = space_group['name_H-M_full'].value(0); - if (!space_group.IT_number.isDefined) return groupName; - if (!space_group['name_H-M_full'].isDefined) return groupNumber; + const groupNumber = space_group.it_number.value(0); + const groupName = space_group['name_h-m_full'].value(0).replace('-', ' '); + if (!space_group.it_number.isDefined) return groupName; + if (!space_group['name_h-m_full'].isDefined) return groupNumber; return groupNumber; } @@ -129,7 +129,7 @@ async function getModels(db: CifCore_Database, format: CifCoreFormat, ctx: Runti pdbx_formal_charge: formalCharge, pdbx_PDB_model_num: Column.ofConst(1, atomCount, Column.Schema.int), - B_iso_or_equiv: db.atom_site.U_iso_or_equiv, + B_iso_or_equiv: db.atom_site.u_iso_or_equiv, }, atomCount); const name = ( @@ -231,15 +231,14 @@ function atomSiteAnisotropFromCifCore(model: Model) { if (!CifCoreFormat.is(model.sourceData)) return; const { atom_site, atom_site_aniso } = model.sourceData.data.db; const data = Table.ofPartialColumns(AtomSiteAnisotrop.Schema, { - U: atom_site_aniso.U, - U_esd: atom_site_aniso.U_su + U: atom_site_aniso.u, }, atom_site_aniso._rowCount); const elementToAnsiotrop = AtomSiteAnisotrop.getElementToAnsiotropFromLabel(atom_site.label, atom_site_aniso.label); return { data, elementToAnsiotrop }; } function atomSiteAnisotropApplicableCifCore(model: Model) { if (!CifCoreFormat.is(model.sourceData)) return false; - return model.sourceData.data.db.atom_site_aniso.U.isDefined; + return model.sourceData.data.db.atom_site_aniso.u.isDefined; } AtomSiteAnisotrop.Provider.formatRegistry.add('cifCore', atomSiteAnisotropFromCifCore, atomSiteAnisotropApplicableCifCore); @@ -261,11 +260,11 @@ namespace CifCoreFormat { const name = ( db.database_code.depnum_ccdc_archive.value(0) || db.database_code.depnum_ccdc_fiz.value(0) || - db.database_code.ICSD.value(0) || - db.database_code.MDF.value(0) || - db.database_code.NBS.value(0) || - db.database_code.CSD.value(0) || - db.database_code.COD.value(0) || + db.database_code.icsd.value(0) || + db.database_code.mdf.value(0) || + db.database_code.nbs.value(0) || + db.database_code.csd.value(0) || + db.database_code.cod.value(0) || db._name ); -- GitLab