Skip to content
Snippets Groups Projects
Commit 47c1ac3b authored by Alexander Rose's avatar Alexander Rose
Browse files

added ihm extension to mmcif schema

parent 5d022ffe
Branches
Tags
No related merge requests found
......@@ -19,6 +19,7 @@ atom_site.auth_comp_id
atom_site.auth_asym_id
atom_site.auth_seq_id
atom_site.pdbx_PDB_model_num
atom_site.ihm_model_id
chem_comp.id
chem_comp.type
......@@ -253,3 +254,337 @@ pdbx_struct_mod_residue.auth_comp_id
pdbx_struct_mod_residue.PDB_ins_code
pdbx_struct_mod_residue.parent_comp_id
pdbx_struct_mod_residue.details
ihm_struct_assembly.ordinal_id
ihm_struct_assembly.assembly_id
ihm_struct_assembly.parent_assembly_id
ihm_struct_assembly.entity_description
ihm_struct_assembly.entity_id
ihm_struct_assembly.asym_id
ihm_struct_assembly.seq_id_begin
ihm_struct_assembly.seq_id_end
ihm_struct_assembly_details.assembly_id
ihm_struct_assembly_details.assembly_name
ihm_struct_assembly_details.assembly_description
ihm_model_representation.ordinal_id
ihm_model_representation.representation_id
ihm_model_representation.segment_id
ihm_model_representation.entity_id
ihm_model_representation.entity_description
ihm_model_representation.entity_asym_id
ihm_model_representation.seq_id_begin
ihm_model_representation.seq_id_end
ihm_model_representation.model_object_primitive
ihm_model_representation.starting_model_id
ihm_model_representation.model_mode
ihm_model_representation.model_granularity
ihm_model_representation.model_object_count
ihm_external_reference_info.reference_id
ihm_external_reference_info.reference_provider
ihm_external_reference_info.reference_type
ihm_external_reference_info.reference
ihm_external_reference_info.refers_to
ihm_external_reference_info.associated_url
ihm_external_files.id
ihm_external_files.reference_id
ihm_external_files.file_path
ihm_external_files.content_type
ihm_external_files.file_size_bytes
ihm_external_files.details
ihm_dataset_list.id
ihm_dataset_list.data_type
ihm_dataset_list.database_hosted
ihm_dataset_group.ordinal_id
ihm_dataset_group.group_id
ihm_dataset_group.dataset_list_id
ihm_dataset_external_reference.id
ihm_dataset_external_reference.dataset_list_id
ihm_dataset_external_reference.file_id
ihm_dataset_related_db_reference.id
ihm_dataset_related_db_reference.dataset_list_id
ihm_dataset_related_db_reference.db_name
ihm_dataset_related_db_reference.accession_code
ihm_dataset_related_db_reference.version
ihm_dataset_related_db_reference.details
ihm_related_datasets.ordinal_id
ihm_related_datasets.dataset_list_id_derived
ihm_related_datasets.dataset_list_id_primary
ihm_poly_residue_feature.ordinal_id
ihm_poly_residue_feature.feature_id
ihm_poly_residue_feature.entity_id
ihm_poly_residue_feature.asym_id
ihm_poly_residue_feature.seq_id_begin
ihm_poly_residue_feature.comp_id_begin
ihm_poly_residue_feature.seq_id_end
ihm_poly_residue_feature.comp_id_end
ihm_feature_list.feature_id
ihm_feature_list.feature_type
ihm_feature_list.entity_type
ihm_cross_link_list.id
ihm_cross_link_list.group_id
ihm_cross_link_list.entity_description_1
ihm_cross_link_list.entity_id_1
ihm_cross_link_list.seq_id_1
ihm_cross_link_list.comp_id_1
ihm_cross_link_list.entity_description_2
ihm_cross_link_list.entity_id_2
ihm_cross_link_list.seq_id_2
ihm_cross_link_list.comp_id_2
ihm_cross_link_list.linker_type
ihm_cross_link_list.dataset_list_id
ihm_cross_link_restraint.id
ihm_cross_link_restraint.group_id
ihm_cross_link_restraint.entity_id_1
ihm_cross_link_restraint.asym_id_1
ihm_cross_link_restraint.seq_id_1
ihm_cross_link_restraint.comp_id_1
ihm_cross_link_restraint.entity_id_2
ihm_cross_link_restraint.asym_id_2
ihm_cross_link_restraint.seq_id_2
ihm_cross_link_restraint.comp_id_2
ihm_cross_link_restraint.restraint_type
ihm_cross_link_restraint.conditional_crosslink_flag
ihm_cross_link_restraint.model_granularity
ihm_cross_link_restraint.distance_threshold
ihm_cross_link_restraint.psi
ihm_cross_link_restraint.sigma_1
ihm_cross_link_restraint.sigma_2
ihm_cross_link_result_parameters.ordinal_id
ihm_cross_link_result_parameters.restraint_id
ihm_cross_link_result_parameters.model_id
ihm_cross_link_result_parameters.psi
ihm_cross_link_result_parameters.sigma_1
ihm_cross_link_result_parameters.sigma_2
ihm_sas_restraint.ordinal_id
ihm_sas_restraint.dataset_list_id
ihm_sas_restraint.model_id
ihm_sas_restraint.struct_assembly_id
ihm_sas_restraint.profile_segment_flag
ihm_sas_restraint.fitting_atom_type
ihm_sas_restraint.fitting_method
ihm_sas_restraint.fitting_state
ihm_sas_restraint.radius_of_gyration
ihm_sas_restraint.chi_value
ihm_sas_restraint.details
ihm_derived_distance_restraint.id
ihm_derived_distance_restraint.group_id
ihm_derived_distance_restraint.feature_id_1
ihm_derived_distance_restraint.feature_id_2
ihm_derived_distance_restraint.group_conditionality
ihm_derived_distance_restraint.restraint_type
ihm_derived_distance_restraint.distance_upper_limit
ihm_derived_distance_restraint.random_exclusion_fraction
ihm_derived_distance_restraint.dataset_list_id
ihm_2dem_class_average_restraint.id
ihm_2dem_class_average_restraint.dataset_list_id
ihm_2dem_class_average_restraint.number_raw_micrographs
ihm_2dem_class_average_restraint.pixel_size_width
ihm_2dem_class_average_restraint.pixel_size_height
ihm_2dem_class_average_restraint.image_resolution
ihm_2dem_class_average_restraint.image_segment_flag
ihm_2dem_class_average_restraint.number_of_projections
ihm_2dem_class_average_restraint.struct_assembly_id
ihm_2dem_class_average_restraint.details
ihm_2dem_class_average_fitting.ordinal_id
ihm_2dem_class_average_fitting.restraint_id
ihm_2dem_class_average_fitting.model_id
ihm_2dem_class_average_fitting.cross_correlation_coefficient
ihm_2dem_class_average_fitting.rot_matrix
ihm_2dem_class_average_fitting.tr_vector
ihm_3dem_restraint.ordinal_id
ihm_3dem_restraint.dataset_list_id
ihm_3dem_restraint.fitting_method
ihm_3dem_restraint.struct_assembly_id
ihm_3dem_restraint.number_of_gaussians
ihm_3dem_restraint.model_id
ihm_3dem_restraint.cross_correlation_coefficient
ihm_predicted_contact_restraint.id
ihm_predicted_contact_restraint.entity_id_1
ihm_predicted_contact_restraint.asym_id_1
ihm_predicted_contact_restraint.seq_id_1
ihm_predicted_contact_restraint.comp_id_1
ihm_predicted_contact_restraint.atom_id_1
ihm_predicted_contact_restraint.entity_id_2
ihm_predicted_contact_restraint.asym_id_2
ihm_predicted_contact_restraint.seq_id_2
ihm_predicted_contact_restraint.comp_id_2
ihm_predicted_contact_restraint.atom_id_2
ihm_predicted_contact_restraint.restraint_type
ihm_predicted_contact_restraint.distance_upper_limit
ihm_predicted_contact_restraint.probability
ihm_predicted_contact_restraint.model_granularity
ihm_predicted_contact_restraint.dataset_list_id
ihm_predicted_contact_restraint.software_id
ihm_starting_model_details.starting_model_id
ihm_starting_model_details.entity_id
ihm_starting_model_details.entity_description
ihm_starting_model_details.asym_id
ihm_starting_model_details.seq_id_begin
ihm_starting_model_details.seq_id_end
ihm_starting_model_details.starting_model_source
ihm_starting_model_details.starting_model_auth_asym_id
ihm_starting_model_details.starting_model_sequence_offset
ihm_starting_model_details.dataset_list_id
ihm_starting_comparative_models.ordinal_id
ihm_starting_comparative_models.starting_model_id
ihm_starting_comparative_models.starting_model_auth_asym_id
ihm_starting_comparative_models.starting_model_seq_id_begin
ihm_starting_comparative_models.starting_model_seq_id_end
ihm_starting_comparative_models.template_auth_asym_id
ihm_starting_comparative_models.template_seq_id_begin
ihm_starting_comparative_models.template_seq_id_end
ihm_starting_comparative_models.template_sequence_identity
ihm_starting_comparative_models.template_sequence_identity_denominator
ihm_starting_comparative_models.template_dataset_list_id
ihm_starting_comparative_models.alignment_file_id
ihm_starting_model_coord.starting_model_id
ihm_starting_model_coord.group_PDB
ihm_starting_model_coord.id
ihm_starting_model_coord.type_symbol
ihm_starting_model_coord.atom_id
ihm_starting_model_coord.comp_id
ihm_starting_model_coord.entity_id
ihm_starting_model_coord.asym_id
ihm_starting_model_coord.seq_id
ihm_starting_model_coord.Cartn_x
ihm_starting_model_coord.Cartn_y
ihm_starting_model_coord.Cartn_z
ihm_starting_model_coord.B_iso_or_equiv
ihm_starting_model_coord.ordinal_id
ihm_starting_model_seq_dif.ordinal_id
ihm_starting_model_seq_dif.entity_id
ihm_starting_model_seq_dif.asym_id
ihm_starting_model_seq_dif.seq_id
ihm_starting_model_seq_dif.comp_id
ihm_starting_model_seq_dif.starting_model_id
ihm_starting_model_seq_dif.db_asym_id
ihm_starting_model_seq_dif.db_seq_id
ihm_starting_model_seq_dif.db_comp_id
ihm_starting_model_seq_dif.details
ihm_modeling_protocol.ordinal_id
ihm_modeling_protocol.protocol_id
ihm_modeling_protocol.step_id
ihm_modeling_protocol.struct_assembly_id
ihm_modeling_protocol.dataset_group_id
ihm_modeling_protocol.struct_assembly_description
ihm_modeling_protocol.protocol_name
ihm_modeling_protocol.step_name
ihm_modeling_protocol.step_method
ihm_modeling_protocol.num_models_begin
ihm_modeling_protocol.num_models_end
ihm_modeling_protocol.multi_scale_flag
ihm_modeling_protocol.multi_state_flag
ihm_modeling_protocol.ordered_flag
ihm_modeling_post_process.id
ihm_modeling_post_process.protocol_id
ihm_modeling_post_process.analysis_id
ihm_modeling_post_process.step_id
ihm_modeling_post_process.type
ihm_modeling_post_process.feature
ihm_modeling_post_process.num_models_begin
ihm_modeling_post_process.num_models_end
ihm_ensemble_info.ensemble_id
ihm_ensemble_info.ensemble_name
ihm_ensemble_info.post_process_id
ihm_ensemble_info.model_group_id
ihm_ensemble_info.ensemble_clustering_method
ihm_ensemble_info.ensemble_clustering_feature
ihm_ensemble_info.num_ensemble_models
ihm_ensemble_info.num_ensemble_models_deposited
ihm_ensemble_info.ensemble_precision_value
ihm_ensemble_info.ensemble_file_id
ihm_localization_density_files.id
ihm_localization_density_files.file_id
ihm_localization_density_files.ensemble_id
ihm_localization_density_files.entity_id
ihm_localization_density_files.asym_id
ihm_localization_density_files.seq_id_begin
ihm_localization_density_files.seq_id_end
ihm_model_list.ordinal_id
ihm_model_list.model_id
ihm_model_list.model_group_id
ihm_model_list.model_name
ihm_model_list.model_group_name
ihm_model_list.assembly_id
ihm_model_list.protocol_id
ihm_model_list.representation_id
ihm_model_representative.id
ihm_model_representative.model_group_id
ihm_model_representative.model_id
ihm_model_representative.selection_criteria
ihm_sphere_obj_site.ordinal_id
ihm_sphere_obj_site.entity_id
ihm_sphere_obj_site.seq_id_begin
ihm_sphere_obj_site.seq_id_end
ihm_sphere_obj_site.asym_id
ihm_sphere_obj_site.Cartn_x
ihm_sphere_obj_site.Cartn_y
ihm_sphere_obj_site.Cartn_z
ihm_sphere_obj_site.object_radius
ihm_sphere_obj_site.rmsf
ihm_sphere_obj_site.model_id
ihm_gaussian_obj_site.ordinal_id
ihm_gaussian_obj_site.entity_id
ihm_gaussian_obj_site.seq_id_begin
ihm_gaussian_obj_site.seq_id_end
ihm_gaussian_obj_site.asym_id
ihm_gaussian_obj_site.mean_Cartn_x
ihm_gaussian_obj_site.mean_Cartn_y
ihm_gaussian_obj_site.mean_Cartn_z
ihm_gaussian_obj_site.weight
ihm_gaussian_obj_site.covariance_matrix
ihm_gaussian_obj_site.model_id
ihm_gaussian_obj_ensemble.ordinal_id
ihm_gaussian_obj_ensemble.entity_id
ihm_gaussian_obj_ensemble.seq_id_begin
ihm_gaussian_obj_ensemble.seq_id_end
ihm_gaussian_obj_ensemble.asym_id
ihm_gaussian_obj_ensemble.mean_Cartn_x
ihm_gaussian_obj_ensemble.mean_Cartn_y
ihm_gaussian_obj_ensemble.mean_Cartn_z
ihm_gaussian_obj_ensemble.weight
ihm_gaussian_obj_ensemble.covariance_matrix
ihm_gaussian_obj_ensemble.ensemble_id
ihm_multi_state_modeling.ordinal_id
ihm_multi_state_modeling.state_id
ihm_multi_state_modeling.state_group_id
ihm_multi_state_modeling.population_fraction
ihm_multi_state_modeling.state_type
ihm_multi_state_modeling.state_name
ihm_multi_state_modeling.model_group_id
ihm_multi_state_modeling.experiment_type
ihm_multi_state_modeling.details
\ No newline at end of file
This diff is collapsed.
......@@ -11,7 +11,7 @@ import { generate } from './util/generate'
function generateSchema (name: string, path: string) {
const str = fs.readFileSync(path, 'utf8')
return generate(name, JSON.parse(str))
return generate(name, '', JSON.parse(str))
}
const parser = new argparse.ArgumentParser({
......
......@@ -10,34 +10,30 @@ import * as fs from 'fs'
import fetch from 'node-fetch'
import Csv from 'mol-io/reader/csv/parser'
import CIF from 'mol-io/reader/cif'
import CIF, { Frame } from 'mol-io/reader/cif'
import { generateSchema } from './util/cif-dic'
import { generate } from './util/generate'
import { Filter, mergeFilters } from './util/json-schema'
import { Run } from 'mol-task';
import { Filter } from './util/json-schema'
import { Run } from 'mol-task'
async function runGenerateSchema(name: string, fieldNamesPath?: string, minCount = 0, typescript = false, out?: string) {
async function runGenerateSchema(name: string, fieldNamesPath?: string, typescript = false, out?: string) {
await ensureMmcifDicAvailable()
const comp = CIF.parseText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8'))
const parsed = await Run(comp);
if (parsed.isError) throw parsed
// console.log(fieldNamesPath, minCount)
let filter: Filter | undefined
if (minCount && fieldNamesPath) {
filter = mergeFilters(
await getUsageCountsFilter(minCount),
await getFieldNamesFilter(fieldNamesPath)
)
} else if (minCount) {
filter = await getUsageCountsFilter(minCount)
} else if (fieldNamesPath) {
filter = await getFieldNamesFilter(fieldNamesPath)
}
const mmcifDic = await Run(CIF.parseText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8')));
if (mmcifDic.isError) throw mmcifDic
await ensureIhmDicAvailable()
const ihmDic = await Run(CIF.parseText(fs.readFileSync(IHM_DIC_PATH, 'utf8')));
if (ihmDic.isError) throw ihmDic
const mmcifDicVersion = CIF.schema.dic(mmcifDic.result.blocks[0]).dictionary.version.value(0)
const ihmDicVersion = CIF.schema.dic(ihmDic.result.blocks[0]).dictionary.version.value(0)
const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}.`
const schema = generateSchema(parsed.result.blocks[0])
const output = typescript ? generate(name, schema, filter) : JSON.stringify(schema, undefined, 4)
const frames: Frame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames]
const schema = generateSchema(frames)
const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined
const output = typescript ? generate(name, version, schema, filter) : JSON.stringify(schema, undefined, 4)
if (out) {
fs.writeFileSync(out, output)
......@@ -67,44 +63,31 @@ async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> {
return filter
}
async function getUsageCountsFilter(minCount: number): Promise<Filter> {
const usageCountsStr = fs.readFileSync(MMCIF_USAGE_COUNTS_PATH, 'utf8')
const parsed = await Run(Csv(usageCountsStr, { delimiter: ' ' }));
if (parsed.isError) throw parser.error
const csvFile = parsed.result;
const fieldNamesCol = csvFile.table.getColumn('field_name')
const usageCountsCol = csvFile.table.getColumn('usage_count')
if (!fieldNamesCol || !usageCountsCol) throw 'error getting usage columns'
const fieldNames = fieldNamesCol.toStringArray()
const usageCounts = usageCountsCol.toIntArray()
async function ensureMmcifDicAvailable() {
await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL)
}
const filter: Filter = {}
fieldNames.forEach((name, i) => {
if (usageCounts[i] < minCount) return
const [ category, field ] = name.substr(1).split('.')
if (!filter[ category ]) filter[ category ] = {}
filter[ category ][ field ] = true
})
return filter
async function ensureIhmDicAvailable() {
await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL)
}
async function ensureMmcifDicAvailable() {
if (FORCE_MMCIF_DOWNLOAD || !fs.existsSync(MMCIF_DIC_PATH)) {
async function ensureDicAvailable(dicPath: string, dicUrl: string) {
if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) {
console.log('downloading mmcif dic...')
const data = await fetch(MMCIF_DIC_URL)
if (!fs.existsSync(MMCIF_DIC_DIR)) {
fs.mkdirSync(MMCIF_DIC_DIR);
const data = await fetch(dicUrl)
if (!fs.existsSync(DIC_DIR)) {
fs.mkdirSync(DIC_DIR);
}
fs.writeFileSync(MMCIF_DIC_PATH, await data.text())
fs.writeFileSync(dicPath, await data.text())
console.log('done downloading mmcif dic')
}
}
const MMCIF_USAGE_COUNTS_PATH = './data/mmcif-usage-counts.txt'
const MMCIF_DIC_DIR = './build/dics'
const MMCIF_DIC_PATH = `${MMCIF_DIC_DIR}/mmcif_pdbx_v50.dic`
const DIC_DIR = './build/dics'
const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`
const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic'
const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`
const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic'
const parser = new argparse.ArgumentParser({
addHelp: true,
......@@ -121,31 +104,25 @@ parser.addArgument([ '--typescript', '-ts' ], {
action: 'storeTrue',
help: 'Output schema as TypeScript instead of as JSON'
});
parser.addArgument([ '--minFieldUsageCount', '-mc' ], {
defaultValue: 0,
type: parseInt,
help: 'Minimum mmcif field usage counts'
});
parser.addArgument([ '--fieldNamesPath', '-fn' ], {
defaultValue: '',
help: 'Field names to include'
});
parser.addArgument([ '--forceMmcifDicDownload', '-f' ], {
parser.addArgument([ '--forceDicDownload', '-f' ], {
action: 'storeTrue',
help: 'Force download of mmcif dictionary'
help: 'Force download of dictionaries'
});
interface Args {
name: string
forceMmcifDicDownload: boolean
forceDicDownload: boolean
fieldNamesPath: string
minFieldUsageCount: number
typescript: boolean
out: string
}
const args: Args = parser.parseArgs();
const FORCE_MMCIF_DOWNLOAD = args.forceMmcifDicDownload
const FORCE_DIC_DOWNLOAD = args.forceDicDownload
if (args.name) {
runGenerateSchema(args.name, args.fieldNamesPath, args.minFieldUsageCount, args.typescript, args.out)
runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out)
}
......@@ -6,6 +6,7 @@
import { Database, ValueColumn, ListColumn } from './json-schema'
import * as Data from 'mol-io/reader/cif/data-model'
import { Frame } from 'mol-io/reader/cif/data-model';
export function getFieldType (type: string, values?: string[]): ValueColumn|ListColumn {
switch (type) {
......@@ -92,7 +93,12 @@ function getField ( category: string, field: string, d: Data.Frame, ctx: FrameDa
return cat.getField(field)
} else {
if (d.header in links) {
return getField(category, field, categories[links[d.header]], ctx)
const linkName = links[d.header]
if (linkName in categories) {
return getField(category, field, categories[linkName], ctx)
} else {
console.log(`link '${linkName}' not found`)
}
} else {
// console.log(`no links found for '${d.header}'`)
}
......@@ -172,14 +178,14 @@ const SPACE_SEPARATED_LIST_FIELDS = [
'_pdbx_soln_scatter.data_analysis_software_list', // SCTPL5 GNOM
];
export function generateSchema (dic: Data.Block) {
export function generateSchema (frames: Frame[]) {
const schema: Database = {}
const categories: FrameCategories = {}
const links: FrameLinks = {}
const ctx = { categories, links }
dic.saveFrames.forEach(d => {
frames.forEach(d => {
if (d.header[0] !== '_') return
categories[d.header] = d
const item_linked = d.categories['item_linked']
......@@ -201,6 +207,10 @@ export function generateSchema (dic: Data.Block) {
Object.keys(categories).forEach(fullName => {
const d = categories[fullName]
if (!d) {
console.log('foo', fullName)
return
}
const categoryName = d.header.substring(1, d.header.indexOf('.'))
const itemName = d.header.substring(d.header.indexOf('.') + 1)
let fields
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment