Skip to content
Snippets Groups Projects
Commit 1694a8d5 authored by Alexander Rose's avatar Alexander Rose
Browse files

removed code factored out into outher packages

parent 9dd3a466
Branches
Tags
No related merge requests found
pdbx_reference_molecule.prd_id
pdbx_reference_molecule.name
pdbx_reference_molecule.represent_as
pdbx_reference_molecule.type
pdbx_reference_molecule.type_evidence_code
pdbx_reference_molecule.class
pdbx_reference_molecule.class_evidence_code
pdbx_reference_molecule.formula
pdbx_reference_molecule.chem_comp_id
pdbx_reference_molecule.formula_weight
pdbx_reference_molecule.release_status
pdbx_reference_molecule.replaces
pdbx_reference_molecule.replaced_by
pdbx_reference_molecule.compound_detail
pdbx_reference_molecule.description
pdbx_reference_molecule.representative_PDB_id_code
pdbx_reference_entity_list.prd_id
pdbx_reference_entity_list.ref_entity_id
pdbx_reference_entity_list.component_id
pdbx_reference_entity_list.type
pdbx_reference_entity_list.details
pdbx_reference_entity_nonpoly.prd_id
pdbx_reference_entity_nonpoly.ref_entity_id
pdbx_reference_entity_nonpoly.name
pdbx_reference_entity_nonpoly.chem_comp_id
pdbx_reference_entity_link.prd_id
pdbx_reference_entity_link.link_id
pdbx_reference_entity_link.link_class
pdbx_reference_entity_link.ref_entity_id_1
pdbx_reference_entity_link.entity_seq_num_1
pdbx_reference_entity_link.comp_id_1
pdbx_reference_entity_link.atom_id_1
pdbx_reference_entity_link.ref_entity_id_2
pdbx_reference_entity_link.entity_seq_num_2
pdbx_reference_entity_link.comp_id_2
pdbx_reference_entity_link.atom_id_2
pdbx_reference_entity_link.value_order
pdbx_reference_entity_link.component_1
pdbx_reference_entity_link.component_2
pdbx_reference_entity_link.details
pdbx_reference_entity_poly_link.prd_id
pdbx_reference_entity_poly_link.ref_entity_id
pdbx_reference_entity_poly_link.link_id
pdbx_reference_entity_poly_link.atom_id_1
pdbx_reference_entity_poly_link.comp_id_1
pdbx_reference_entity_poly_link.entity_seq_num_1
pdbx_reference_entity_poly_link.atom_id_2
pdbx_reference_entity_poly_link.comp_id_2
pdbx_reference_entity_poly_link.entity_seq_num_2
pdbx_reference_entity_poly_link.value_order
pdbx_reference_entity_poly_link.component_id
pdbx_reference_entity_poly.prd_id
pdbx_reference_entity_poly.ref_entity_id
pdbx_reference_entity_poly.db_code
pdbx_reference_entity_poly.db_name
pdbx_reference_entity_poly.type
pdbx_reference_entity_sequence.prd_id
pdbx_reference_entity_sequence.ref_entity_id
pdbx_reference_entity_sequence.type
pdbx_reference_entity_sequence.NRP_flag
pdbx_reference_entity_sequence.one_letter_codes
pdbx_reference_entity_poly_seq.prd_id
pdbx_reference_entity_poly_seq.ref_entity_id
pdbx_reference_entity_poly_seq.num
pdbx_reference_entity_poly_seq.mon_id
pdbx_reference_entity_poly_seq.parent_mon_id
pdbx_reference_entity_poly_seq.hetero
pdbx_reference_entity_poly_seq.observed
pdbx_reference_entity_src_nat.prd_id
pdbx_reference_entity_src_nat.ref_entity_id
pdbx_reference_entity_src_nat.ordinal
pdbx_reference_entity_src_nat.taxid
pdbx_reference_entity_src_nat.organism_scientific
pdbx_reference_entity_src_nat.db_code
pdbx_reference_entity_src_nat.db_name
pdbx_prd_audit.prd_id
pdbx_prd_audit.date
pdbx_prd_audit.processing_site
pdbx_prd_audit.action_type
\ No newline at end of file
chem_comp.id
chem_comp.name
chem_comp.type
chem_comp.pdbx_type
chem_comp.formula
chem_comp.mon_nstd_parent_comp_id
chem_comp.pdbx_synonyms
chem_comp.pdbx_formal_charge
chem_comp.pdbx_initial_date
chem_comp.pdbx_modified_date
chem_comp.pdbx_ambiguous_flag
chem_comp.pdbx_release status
chem_comp.pdbx_replaced_by
chem_comp.pdbx_replaces
chem_comp.formula_weight
chem_comp.one_letter_code
chem_comp.three_letter_code
chem_comp.pdbx_model_coordinates_details
chem_comp.pdbx_model_coordinates_missing_flag
chem_comp.pdbx_ideal_coordinates_details
chem_comp.pdbx_ideal_coordinates_missing_flag
chem_comp.pdbx_model_coordinates_db_code
chem_comp.pdbx_processing_site
chem_comp_atom.comp_id
chem_comp_atom.atom_id
chem_comp_atom.alt_atom_id
chem_comp_atom.type_symbol
chem_comp_atom.charge
chem_comp_atom.pdbx_align
chem_comp_atom.pdbx_aromatic_flag
chem_comp_atom.pdbx_leaving_atom_flag
chem_comp_atom.pdbx_stereo_config
chem_comp_atom.model_Cartn_x
chem_comp_atom.model_Cartn_y
chem_comp_atom.model_Cartn_z
chem_comp_atom.pdbx_model_Cartn_x_ideal
chem_comp_atom.pdbx_model_Cartn_y_ideal
chem_comp_atom.pdbx_model_Cartn_z_ideal
chem_comp_atom.pdbx_ordinal
chem_comp_bond.comp_id
chem_comp_bond.atom_id_1
chem_comp_bond.atom_id_2
chem_comp_bond.value_order
chem_comp_bond.pdbx_aromatic_flag
chem_comp_bond.pdbx_stereo_config
chem_comp_bond.pdbx_ordinal
pdbx_chem_comp_descriptor.comp_id
pdbx_chem_comp_descriptor.type
pdbx_chem_comp_descriptor.program
pdbx_chem_comp_descriptor.program_version
pdbx_chem_comp_descriptor.descriptor
pdbx_chem_comp_identifier.comp_id
pdbx_chem_comp_identifier.type
pdbx_chem_comp_identifier.program
pdbx_chem_comp_identifier.program_version
pdbx_chem_comp_identifier.identifier
\ No newline at end of file
This diff is collapsed.
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { CIF, CifCategory, getCifFieldType, CifField } from '../../mol-io/reader/cif'
import { CifWriter } from '../../mol-io/writer/cif'
import * as fs from 'fs'
import { Progress, Task, RuntimeContext } from '../../mol-task';
import { classifyFloatArray, classifyIntArray } from '../../mol-io/common/binary-cif';
function showProgress(p: Progress) {
process.stdout.write(`\r${new Array(80).join(' ')}`);
process.stdout.write(`\r${Progress.format(p)}`);
}
async function getCIF(ctx: RuntimeContext, path: string) {
const str = fs.readFileSync(path, 'utf8');
const parsed = await CIF.parseText(str).runInContext(ctx);
if (parsed.isError) {
throw new Error(parsed.toString());
}
return parsed.result;
}
function getCategoryInstanceProvider(cat: CifCategory, fields: CifWriter.Field[]): CifWriter.Category {
return {
name: cat.name,
instance: () => CifWriter.categoryInstance(fields, { data: cat, rowCount: cat.rowCount })
};
}
function classify(name: string, field: CifField): CifWriter.Field {
const type = getCifFieldType(field);
if (type['@type'] === 'str') {
return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind };
} else if (type['@type'] === 'float') {
const encoder = classifyFloatArray(field.toFloatArray({ array: Float64Array }));
return CifWriter.Field.float(name, field.float, { valueKind: field.valueKind, encoder, typedArray: Float64Array });
} else {
const encoder = classifyIntArray(field.toIntArray({ array: Int32Array }));
return CifWriter.Field.int(name, field.int, { valueKind: field.valueKind, encoder, typedArray: Int32Array });
}
}
export default function convert(path: string, asText = false) {
return Task.create<Uint8Array>('BinaryCIF', async ctx => {
const cif = await getCIF(ctx, path);
const encoder = CifWriter.createEncoder({ binary: !asText, encoderName: 'mol* cif2bcif' });
let maxProgress = 0;
for (const b of cif.blocks) {
maxProgress += b.categoryNames.length;
for (const c of b.categoryNames) maxProgress += b.categories[c].fieldNames.length;
}
let current = 0;
for (const b of cif.blocks) {
encoder.startDataBlock(b.header);
for (const c of b.categoryNames) {
const cat = b.categories[c];
const fields: CifWriter.Field[] = [];
for (const f of cat.fieldNames) {
fields.push(classify(f, cat.getField(f)!))
current++;
if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: maxProgress });
}
encoder.writeCategory(getCategoryInstanceProvider(b.categories[c], fields));
current++;
if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: maxProgress });
}
}
await ctx.update('Exporting...');
const ret = encoder.getData() as Uint8Array;
await ctx.update('Done.');
return ret;
}).run(showProgress, 250);
}
\ No newline at end of file
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as fs from 'fs'
import convert from './converter'
(async function () {
if (process.argv.length !== 4) {
console.log('Usage:\nnode cif2bcif input.cif output.bcif');
return;
}
const src = process.argv[2];
const out = process.argv[3];
const res = await convert(src);
fs.writeFileSync(out, res);
}());
\ No newline at end of file
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import * as argparse from 'argparse'
import createContext = require('gl')
import fs = require('fs')
import { PNG } from 'pngjs'
import { Canvas3D, Canvas3DParams } from '../../mol-canvas3d/canvas3d';
import InputObserver from '../../mol-util/input/input-observer';
import { ColorTheme } from '../../mol-theme/color';
import { SizeTheme } from '../../mol-theme/size';
import { CartoonRepresentationProvider } from '../../mol-repr/structure/representation/cartoon';
import { CIF, CifFrame } from '../../mol-io/reader/cif'
import { trajectoryFromMmCIF } from '../../mol-model-formats/structure/mmcif';
import { Model, Structure } from '../../mol-model/structure';
import { ajaxGet } from '../../mol-util/data-source';
import { ColorNames } from '../../mol-util/color/tables';
const width = 2048
const height = 1536
const gl = createContext(width, height, {
alpha: false,
antialias: true,
depth: true,
preserveDrawingBuffer: true
})
const input = InputObserver.create()
const canvas3d = Canvas3D.create(gl, input, {
multiSample: {
mode: 'on',
sampleLevel: 3
},
renderer: {
...Canvas3DParams.renderer.defaultValue,
lightIntensity: 0,
ambientIntensity: 1,
backgroundColor: ColorNames.white
},
postprocessing: {
...Canvas3DParams.postprocessing.defaultValue,
occlusionEnable: true,
outlineEnable: true
}
})
canvas3d.animate()
const reprCtx = {
wegbl: canvas3d.webgl,
colorThemeRegistry: ColorTheme.createRegistry(),
sizeThemeRegistry: SizeTheme.createRegistry()
}
function getCartoonRepr() {
return CartoonRepresentationProvider.factory(reprCtx, CartoonRepresentationProvider.getParams)
}
async function parseCif(data: string|Uint8Array) {
const comp = CIF.parse(data);
const parsed = await comp.run();
if (parsed.isError) throw parsed;
return parsed.result;
}
async function downloadCif(url: string, isBinary: boolean) {
const data = await ajaxGet({ url, type: isBinary ? 'binary' : 'string' }).run();
return parseCif(data);
}
async function downloadFromPdb(pdb: string) {
const parsed = await downloadCif(`https://files.rcsb.org/download/${pdb}.cif`, false);
// const parsed = await downloadCif(`https://webchem.ncbr.muni.cz/ModelServer/static/bcif/${pdb}`, true);
return parsed.blocks[0];
}
async function getModels(frame: CifFrame) {
return await trajectoryFromMmCIF(frame).run();
}
async function getStructure(model: Model) {
return Structure.ofModel(model);
}
async function run(id: string, out: string) {
try {
const cif = await downloadFromPdb(id)
const models = await getModels(cif)
const structure = await getStructure(models[0])
const cartoonRepr = getCartoonRepr()
cartoonRepr.setTheme({
color: reprCtx.colorThemeRegistry.create('sequence-id', { structure }),
size: reprCtx.sizeThemeRegistry.create('uniform', { structure })
})
await cartoonRepr.createOrUpdate({ ...CartoonRepresentationProvider.defaultValues, quality: 'auto' }, structure).run()
canvas3d.add(cartoonRepr)
canvas3d.resetCamera()
} catch (e) {
console.log(e)
process.exit(1)
}
setTimeout(() => {
const pixelData = canvas3d.getPixelData('color')
const png = new PNG({ width, height })
png.data = Buffer.from(pixelData.array)
png.pack().pipe(fs.createWriteStream(out)).on('finish', () => {
process.exit()
})
}, 2000)
}
//
const parser = new argparse.ArgumentParser({
addHelp: true,
description: 'render image as PNG (work in progress)'
});
parser.addArgument([ '--id', '-i' ], {
required: true,
help: 'PDB ID'
});
parser.addArgument([ '--out', '-o' ], {
required: true,
help: 'image output path'
});
interface Args {
id: string
out: string
}
const args: Args = parser.parseArgs();
run(args.id, args.out)
\ No newline at end of file
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import * as argparse from 'argparse'
// import * as util from 'util'
import * as fs from 'fs'
import fetch from 'node-fetch'
import { parseCsv } from '../../mol-io/reader/csv/parser'
import { CIF, CifFrame } from '../../mol-io/reader/cif'
import { generateSchema } from './util/cif-dic'
import { generate } from './util/generate'
import { Filter } from './util/schema'
async function runGenerateSchema(name: string, fieldNamesPath?: string, typescript = false, out?: string) {
await ensureMmcifDicAvailable()
const mmcifDic = await CIF.parseText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8')).run();
if (mmcifDic.isError) throw mmcifDic
await ensureIhmDicAvailable()
const ihmDic = await CIF.parseText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run();
if (ihmDic.isError) throw ihmDic
await ensureCarbBranchDicAvailable()
const carbBranchDic = await CIF.parseText(fs.readFileSync(CARB_BRANCH_DIC_PATH, 'utf8')).run();
if (carbBranchDic.isError) throw carbBranchDic
await ensureCarbCompDicAvailable()
const carbCompDic = await CIF.parseText(fs.readFileSync(CARB_COMP_DIC_PATH, 'utf8')).run();
if (carbCompDic.isError) throw carbCompDic
const mmcifDicVersion = CIF.schema.dic(mmcifDic.result.blocks[0]).dictionary.version.value(0)
const ihmDicVersion = CIF.schema.dic(ihmDic.result.blocks[0]).dictionary.version.value(0)
const carbDicVersion = 'draft'
const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, CARB ${carbDicVersion}.`
const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames]
const schema = generateSchema(frames)
const filter = fieldNamesPath ? await getFieldNamesFilter(fieldNamesPath) : undefined
const output = typescript ? generate(name, version, schema, filter) : JSON.stringify(schema, undefined, 4)
if (out) {
fs.writeFileSync(out, output)
} else {
console.log(output)
}
}
async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> {
const fieldNamesStr = fs.readFileSync(fieldNamesPath, 'utf8')
const parsed = await parseCsv(fieldNamesStr, { noColumnNames: true }).run();
if (parsed.isError) throw parser.error
const csvFile = parsed.result;
const fieldNamesCol = csvFile.table.getColumn('0')
if (!fieldNamesCol) throw 'error getting fields columns'
const fieldNames = fieldNamesCol.toStringArray()
const filter: Filter = {}
fieldNames.forEach((name, i) => {
const [ category, field ] = name.split('.')
// console.log(category, field)
if (!filter[ category ]) filter[ category ] = {}
filter[ category ][ field ] = true
})
// console.log(filter)
return filter
}
async function ensureMmcifDicAvailable() { await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL) }
async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL) }
async function ensureCarbBranchDicAvailable() { await ensureDicAvailable(CARB_BRANCH_DIC_PATH, CARB_BRANCH_DIC_URL) }
async function ensureCarbCompDicAvailable() { await ensureDicAvailable(CARB_COMP_DIC_PATH, CARB_COMP_DIC_URL) }
async function ensureDicAvailable(dicPath: string, dicUrl: string) {
if (FORCE_DIC_DOWNLOAD || !fs.existsSync(dicPath)) {
const name = dicUrl.substr(dicUrl.lastIndexOf('/') + 1)
console.log(`downloading ${name}...`)
const data = await fetch(dicUrl)
if (!fs.existsSync(DIC_DIR)) {
fs.mkdirSync(DIC_DIR);
}
fs.writeFileSync(dicPath, await data.text())
console.log(`done downloading ${name}`)
}
}
const DIC_DIR = './build/dics'
const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`
const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic'
const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`
const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic'
const CARB_BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic`
const CARB_BRANCH_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic'
const CARB_COMP_DIC_PATH = `${DIC_DIR}/chem_comp-extension.dic`
const CARB_COMP_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/chem_comp-extension.dic'
const parser = new argparse.ArgumentParser({
addHelp: true,
description: 'Create schema from mmcif dictionary (v50 plus IHM and entity_branch extensions, downloaded from wwPDB)'
});
parser.addArgument([ '--name', '-n' ], {
defaultValue: 'mmCIF',
help: 'Schema name'
});
parser.addArgument([ '--out', '-o' ], {
help: 'Generated schema output path, if not given printed to stdout'
});
parser.addArgument([ '--typescript', '-ts' ], {
action: 'storeTrue',
help: 'Output schema as TypeScript instead of as JSON'
});
parser.addArgument([ '--fieldNamesPath', '-fn' ], {
defaultValue: '',
help: 'Field names to include'
});
parser.addArgument([ '--forceDicDownload', '-f' ], {
action: 'storeTrue',
help: 'Force download of dictionaries'
});
interface Args {
name: string
forceDicDownload: boolean
fieldNamesPath: string
typescript: boolean
out: string
}
const args: Args = parser.parseArgs();
const FORCE_DIC_DOWNLOAD = args.forceDicDownload
if (args.name) {
runGenerateSchema(args.name, args.fieldNamesPath, args.typescript, args.out).catch(e => {
console.error(e)
})
}
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Database, Column, EnumCol, StrCol, IntCol, ListCol, FloatCol, CoordCol, MatrixCol, VectorCol } from './schema'
import * as Data from '../../../mol-io/reader/cif/data-model'
import { CifFrame } from '../../../mol-io/reader/cif/data-model';
export function getFieldType (type: string, description: string, values?: string[]): Column {
switch (type) {
case 'code':
case 'ucode':
case 'line':
case 'uline':
case 'text':
case 'char':
case 'uchar3':
case 'uchar1':
case 'boolean':
return values && values.length ? EnumCol(values, 'str', description) : StrCol(description)
case 'aliasname':
case 'name':
case 'idname':
case 'any':
case 'atcode':
case 'fax':
case 'phone':
case 'email':
case 'code30':
case 'seq-one-letter-code':
case 'author':
case 'orcid_id':
case 'sequence_dep':
case 'pdb_id':
case 'emd_id':
// todo, consider adding specialised fields
case 'yyyy-mm-dd':
case 'yyyy-mm-dd:hh:mm':
case 'yyyy-mm-dd:hh:mm-flex':
case 'int-range':
case 'float-range':
case 'binary':
case 'operation_expression':
case 'point_symmetry':
case '4x3_matrix':
case '3x4_matrices':
case 'point_group':
case 'point_group_helical':
case 'symmetry_operation':
case 'date_dep':
case 'url':
case 'symop':
case 'exp_data_doi':
case 'asym_id':
return StrCol(description)
case 'int':
case 'non_negative_int':
case 'positive_int':
return values && values.length ? EnumCol(values, 'int', description) : IntCol(description)
case 'float':
return FloatCol(description)
case 'ec-type':
case 'ucode-alphanum-csv':
case 'id_list':
return ListCol('str', ',', description)
case 'id_list_spc':
return ListCol('str', ' ', description)
}
console.log(`unknown type '${type}'`)
return StrCol(description)
}
type FrameCategories = { [category: string]: Data.CifFrame }
type FrameLinks = { [k: string]: string }
interface FrameData {
categories: FrameCategories
links: FrameLinks
}
// get field from given or linked category
function getField (category: string, field: string, d: Data.CifFrame, ctx: FrameData): Data.CifField|undefined {
const { categories, links } = ctx
const cat = d.categories[category]
if (cat) {
return cat.getField(field)
} else {
if (d.header in links) {
const linkName = links[d.header]
if (linkName in categories) {
return getField(category, field, categories[linkName], ctx)
} else {
console.log(`link '${linkName}' not found`)
}
} else {
// console.log(`no links found for '${d.header}'`)
}
}
}
function getEnums (d: Data.CifFrame, ctx: FrameData) {
const value = getField('item_enumeration', 'value', d, ctx)
const enums: string[] = []
if (value) {
for (let i = 0; i < value.rowCount; ++i) {
enums.push(value.str(i))
// console.log(value.str(i))
}
return enums
} else {
// console.log(`item_enumeration.value not found for '${d.header}'`)
}
}
function getCode (d: Data.CifFrame, ctx: FrameData): [string, string[]|undefined]|undefined {
const code = getField('item_type', 'code', d, ctx)
if (code) {
return [ code.str(0), getEnums(d, ctx) ]
} else {
console.log(`item_type.code not found for '${d.header}'`)
}
}
function getSubCategory (d: Data.CifFrame, ctx: FrameData): string|undefined {
const value = getField('item_sub_category', 'id', d, ctx)
if (value) {
return value.str(0)
}
}
function getDescription (d: Data.CifFrame, ctx: FrameData): string|undefined {
const value = getField('item_description', 'description', d, ctx)
if (value) {
// trim (after newlines) and remove references to square brackets
return value.str(0).trim()
.replace(/(\r\n|\r|\n)([ \t]+)/g, '\n')
.replace(/(\[[1-3]\])+ element/, 'elements')
.replace(/(\[[1-3]\])+/, '')
}
}
const reMatrixField = /\[[1-3]\]\[[1-3]\]/
const reVectorField = /\[[1-3]\]/
const FORCE_INT_FIELDS = [
'_atom_site.id',
'_atom_site.auth_seq_id',
'_pdbx_struct_mod_residue.auth_seq_id',
'_struct_conf.beg_auth_seq_id',
'_struct_conf.end_auth_seq_id',
'_struct_conn.ptnr1_auth_seq_id',
'_struct_conn.ptnr2_auth_seq_id',
'_struct_sheet_range.beg_auth_seq_id',
'_struct_sheet_range.end_auth_seq_id',
];
const COMMA_SEPARATED_LIST_FIELDS = [
'_atom_site.pdbx_struct_group_id',
'_chem_comp.mon_nstd_parent_comp_id',
'_diffrn_radiation.pdbx_wavelength_list',
'_diffrn_source.pdbx_wavelength_list',
'_em_diffraction.tilt_angle_list', // 20,40,50,55
'_em_entity_assembly.entity_id_list',
'_entity.pdbx_description', // Endolysin,Beta-2 adrenergic receptor
'_entity.pdbx_ec',
'_entity_poly.pdbx_strand_id', // A,B
'_entity_src_gen.pdbx_gene_src_gene', // ADRB2, ADRB2R, B2AR
'_pdbx_depui_entry_details.experimental_methods',
'_pdbx_depui_entry_details.requested_accession_types',
'_pdbx_soln_scatter_model.software_list', // INSIGHT II, HOMOLOGY, DISCOVERY, BIOPOLYMER, DELPHI
'_pdbx_soln_scatter_model.software_author_list', // MSI
'_pdbx_soln_scatter_model.entry_fitting_list', // Odd example: 'PDB CODE 1HFI, 1HCC, 1HFH, 1VCC'
'_pdbx_struct_assembly_gen.entity_inst_id',
'_pdbx_struct_assembly_gen.asym_id_list',
'_pdbx_struct_assembly_gen.auth_asym_id_list',
'_pdbx_struct_assembly_gen_depositor_info.asym_id_list',
'_pdbx_struct_assembly_gen_depositor_info.chain_id_list',
'_pdbx_struct_group_list.group_enumeration_type',
'_reflns.pdbx_diffrn_id',
'_refine.pdbx_diffrn_id',
'_reflns_shell.pdbx_diffrn_id',
'_struct_keywords.text',
];
const SPACE_SEPARATED_LIST_FIELDS = [
'_chem_comp.pdbx_subcomponent_list', // TSM DPH HIS CHF EMR
'_pdbx_soln_scatter.data_reduction_software_list', // OTOKO
'_pdbx_soln_scatter.data_analysis_software_list', // SCTPL5 GNOM
];
const SEMICOLON_SEPARATED_LIST_FIELDS = [
'_chem_comp.pdbx_synonyms' // GLYCERIN; PROPANE-1,2,3-TRIOL
]
/**
* Useful when a dictionary extension will add enum values to an existing dictionary.
* By adding them here, the dictionary extension can be tested before the added enum
* values are available in the existing dictionary.
*/
const EXTRA_ENUM_VALUES: { [k: string]: string[] } = {
}
export function generateSchema (frames: CifFrame[]) {
const schema: Database = {}
const categories: FrameCategories = {}
const links: FrameLinks = {}
const ctx = { categories, links }
// get category metadata
frames.forEach(d => {
if (d.header[0] === '_') return
const categoryKeyNames = new Set<string>()
const categoryKey = d.categories['category_key']
if (categoryKey) {
const categoryKey_names = categoryKey.getField('name')
if (categoryKey_names) {
for (let i = 0, il = categoryKey_names.rowCount; i < il; ++i) {
categoryKeyNames.add(categoryKey_names.str(i))
}
}
}
let description = ''
const category = d.categories['category']
if (category) {
const category_description = category.getField('description')
if (category_description) {
description = category_description.str(0).trim()
.replace(/(\r\n|\r|\n)([ \t]+)/g, '\n') // remove padding after newlines
} else {
console.log(`no description given for category '${category}'`)
}
}
if (categoryKeyNames.size === 0) {
console.log(`no key given for category '${category}'`)
}
schema[d.header] = { description, key: categoryKeyNames, columns: {} }
// console.log('++++++++++++++++++++++++++++++++++++++++++')
// console.log('name', d.header)
// console.log('desc', description)
// console.log('key', categoryKeyNames)
})
// build list of links between categories
frames.forEach(d => {
if (d.header[0] !== '_') return
categories[d.header] = d
const item_linked = d.categories['item_linked']
if (item_linked) {
const child_name = item_linked.getField('child_name')
const parent_name = item_linked.getField('parent_name')
if (child_name && parent_name) {
for (let i = 0; i < item_linked.rowCount; ++i) {
const childName = child_name.str(i)
const parentName = parent_name.str(i)
if (childName in links && links[childName] !== parentName) {
console.log(`${childName} linked to ${links[childName]}, ignoring link to ${parentName}`)
}
links[childName] = parentName
}
}
}
})
// get field data
Object.keys(categories).forEach(fullName => {
const d = categories[fullName]
if (!d) {
console.log(`${fullName} not found, moving on`)
return
}
const categoryName = d.header.substring(1, d.header.indexOf('.'))
const itemName = d.header.substring(d.header.indexOf('.') + 1)
let fields: { [k: string]: Column }
if (categoryName in schema) {
fields = schema[categoryName].columns
} else {
console.log(`category '${categoryName}' has no metadata`)
fields = {}
schema[categoryName] = {
description: '',
key: new Set(),
columns: fields
}
}
const description = getDescription(d, ctx) || ''
// need to use regex to check for matrix or vector items
// as sub_category assignment is missing for some entries
const subCategory = getSubCategory(d, ctx)
if (subCategory === 'cartesian_coordinate' || subCategory === 'fractional_coordinate') {
fields[itemName] = CoordCol(description)
} else if (FORCE_INT_FIELDS.includes(d.header)) {
fields[itemName] = IntCol(description)
console.log(`forcing int: ${d.header}`)
} else if (subCategory === 'matrix') {
fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description)
} else if (subCategory === 'vector') {
fields[itemName.replace(reVectorField, '')] = VectorCol(3, description)
} else {
if (itemName.match(reMatrixField)) {
fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description)
console.log(`${d.header} should have 'matrix' _item_sub_category.id`)
} else if (itemName.match(reVectorField)) {
fields[itemName.replace(reVectorField, '')] = VectorCol(3, description)
console.log(`${d.header} should have 'vector' _item_sub_category.id`)
} else {
const code = getCode(d, ctx)
if (code) {
let fieldType = getFieldType(code[0], description, code[1]);
if (fieldType.type === 'str') {
if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ',', description)
console.log(`forcing comma separated: ${d.header}`)
} else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ' ', description)
console.log(`forcing space separated: ${d.header}`)
} else if (SEMICOLON_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ';', description)
console.log(`forcing space separated: ${d.header}`)
}
}
if (d.header in EXTRA_ENUM_VALUES) {
if (fieldType.type === 'enum') {
fieldType.values.push(...EXTRA_ENUM_VALUES[d.header])
} else {
console.warn(`expected enum: ${d.header}`)
}
}
fields[itemName] = fieldType
} else {
console.log(`could not determine code for '${d.header}'`)
}
}
}
})
return schema
}
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Database, Filter, Column } from './schema'
import { indentString } from '../../../mol-util/string';
function header (name: string, info: string, importDatabasePath = 'mol-data/db') {
return `/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated '${name}' schema file. ${info}
*
* @author mol-star package (src/apps/schema-generator/generate)
*/
import { Database, Column } from '${importDatabasePath}'
import Schema = Column.Schema`
}
function footer (name: string) {
return `
export type ${name}_Schema = typeof ${name}_Schema;
export interface ${name}_Database extends Database<${name}_Schema> {}`
}
function getTypeShorthands(schema: Database, fields?: Filter) {
const types = new Set<string>()
Object.keys(schema).forEach(table => {
if (fields && !fields[table]) return
const { columns} = schema[table]
Object.keys(columns).forEach(columnName => {
if (fields && !fields[table][columnName]) return
types.add(schema[table].columns[columnName].type)
})
})
const shorthands: string[] = []
types.forEach(type => {
switch (type) {
case 'str': shorthands.push('const str = Schema.str;'); break
case 'int': shorthands.push('const int = Schema.int;'); break
case 'float': shorthands.push('const float = Schema.float;'); break
case 'coord': shorthands.push('const coord = Schema.coord;'); break
case 'enum': shorthands.push('const Aliased = Schema.Aliased;'); break
case 'matrix': shorthands.push('const Matrix = Schema.Matrix;'); break
case 'vector': shorthands.push('const Vector = Schema.Vector;'); break
case 'list': shorthands.push('const List = Schema.List;'); break
}
})
return shorthands.join('\n')
}
function getTypeDef(c: Column): string {
switch (c.type) {
case 'str': return 'str'
case 'int': return 'int'
case 'float': return 'float'
case 'coord': return 'coord'
case 'enum':
return `Aliased<'${c.values.map(v => v.replace(/'/g, '\\\'')).join(`' | '`)}'>(${c.subType})`
case 'matrix':
return `Matrix(${c.rows}, ${c.columns})`
case 'vector':
return `Vector(${c.length})`
case 'list':
if (c.subType === 'int') {
return `List('${c.separator}', x => parseInt(x, 10))`
} else if (c.subType === 'float' || c.subType === 'coord') {
return `List('${c.separator}', x => parseFloat(x))`
} else {
return `List('${c.separator}', x => x)`
}
}
}
const reSafePropertyName = /^[a-zA-Z_$][0-9a-zA-Z_$]*$/
function safePropertyString(name: string) { return name.match(reSafePropertyName) ? name : `'${name}'` }
function doc(description: string, spacesCount: number) {
const spaces = ' '.repeat(spacesCount)
return [
`${spaces}/**`,
`${indentString(description, 1, `${spaces} * `)}`.replace(/ +\n/g, '\n'),
`${spaces} */`
].join('\n')
}
export function generate (name: string, info: string, schema: Database, fields?: Filter, importDatabasePath?: string) {
const codeLines: string[] = []
codeLines.push(`export const ${name}_Schema = {`)
Object.keys(schema).forEach(table => {
if (fields && !fields[table]) return
const { description, columns} = schema[table]
if (description) codeLines.push(doc(description, 4))
codeLines.push(` ${safePropertyString(table)}: {`)
Object.keys(columns).forEach(columnName => {
if (fields && !fields[table][columnName]) return
const c = columns[columnName]
const typeDef = getTypeDef(c)
if (c.description) codeLines.push(doc(c.description, 8))
codeLines.push(` ${safePropertyString(columnName)}: ${typeDef},`)
})
codeLines.push(' },')
})
codeLines.push('}')
return `${header(name, info, importDatabasePath)}\n\n${getTypeShorthands(schema, fields)}\n\n${codeLines.join('\n')}\n${footer(name)}`
}
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
export interface Database { [ tableName: string ]: Table }
export interface Table {
description: string
key: Set<string>
columns: { [ columnName: string ]: Column }
}
export type Column = IntCol | StrCol | FloatCol | CoordCol | EnumCol | VectorCol | MatrixCol | ListCol
type BaseCol = { description: string }
export type IntCol = { type: 'int' } & BaseCol
export function IntCol(description: string): IntCol { return { type: 'int', description } }
export type StrCol = { type: 'str' } & BaseCol
export function StrCol(description: string): StrCol { return { type: 'str', description } }
export type FloatCol = { type: 'float' } & BaseCol
export function FloatCol(description: string): FloatCol { return { type: 'float', description } }
export type CoordCol = { type: 'coord' } & BaseCol
export function CoordCol(description: string): CoordCol { return { type: 'coord', description } }
export type EnumCol = { type: 'enum', subType: 'int' | 'str', values: string[] } & BaseCol
export function EnumCol(values: string[], subType: 'int' | 'str', description: string): EnumCol {
return { type: 'enum', description, values, subType }
}
export type VectorCol = { type: 'vector', length: number } & BaseCol
export function VectorCol(length: number, description: string): VectorCol {
return { type: 'vector', description, length }
}
export type MatrixCol = { type: 'matrix', rows: number, columns: number } & BaseCol
export function MatrixCol(columns: number, rows: number, description: string): MatrixCol {
return { type: 'matrix', description, columns, rows }
}
export type ListCol = { type: 'list', subType: 'int' | 'str' | 'float' | 'coord', separator: string } & BaseCol
export function ListCol(subType: 'int' | 'str' | 'float' | 'coord', separator: string, description: string): ListCol {
return { type: 'list', description, separator, subType }
}
export type Filter = { [ table: string ]: { [ column: string ]: true } }
export function mergeFilters (...filters: Filter[]) {
const n = filters.length
const mergedFilter: Filter = {}
const fields: Map<string, number> = new Map()
filters.forEach(filter => {
Object.keys(filter).forEach(category => {
Object.keys(filter[ category ]).forEach(field => {
const key = `${category}.${field}`
const value = fields.get(key) || 0
fields.set(key, value + 1)
})
})
})
fields.forEach((v, k) => {
if (v !== n) return
const [categoryName, fieldName] = k.split('.')
if (categoryName in mergedFilter) {
mergedFilter[categoryName][fieldName] = true
} else {
mergedFilter[categoryName] = { fieldName: true }
}
})
return mergedFilter
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment