Skip to content
Snippets Groups Projects
Commit 01a69004 authored by Alexander Rose's avatar Alexander Rose
Browse files

added description from dictionary to mmcif schema

parent 4edb0a6e
Branches
Tags
No related merge requests found
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import * as argparse from 'argparse'
import * as fs from 'fs'
import { generate } from './util/generate'
function generateSchema (name: string, path: string) {
const str = fs.readFileSync(path, 'utf8')
return generate(name, '', JSON.parse(str))
}
const parser = new argparse.ArgumentParser({
addHelp: true,
description: 'Argparse example'
});
parser.addArgument([ 'name' ], {
help: 'schema name'
});
parser.addArgument([ 'path' ], {
help: 'json schema file path'
});
parser.addArgument([ '--out', '-o' ], {
help: 'generated typescript output path'
});
const args = parser.parseArgs();
if (args.name && args.path) {
const schema = generateSchema(args.name, args.path)
if (args.out) {
fs.writeFileSync(args.out, schema)
} else {
console.log(schema)
}
}
...@@ -13,7 +13,7 @@ import Csv from 'mol-io/reader/csv/parser' ...@@ -13,7 +13,7 @@ import Csv from 'mol-io/reader/csv/parser'
import CIF, { CifFrame } from 'mol-io/reader/cif' import CIF, { CifFrame } from 'mol-io/reader/cif'
import { generateSchema } from './util/cif-dic' import { generateSchema } from './util/cif-dic'
import { generate } from './util/generate' import { generate } from './util/generate'
import { Filter } from './util/json-schema' import { Filter } from './util/schema'
async function runGenerateSchema(name: string, fieldNamesPath?: string, typescript = false, out?: string) { async function runGenerateSchema(name: string, fieldNamesPath?: string, typescript = false, out?: string) {
await ensureMmcifDicAvailable() await ensureMmcifDicAvailable()
......
...@@ -4,11 +4,11 @@ ...@@ -4,11 +4,11 @@
* @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Alexander Rose <alexander.rose@weirdbyte.de>
*/ */
import { Database, ValueColumn, ListColumn } from './json-schema' import { Database, Column, EnumCol, StrCol, IntCol, ListCol, FloatCol, CoordCol, MatrixCol, VectorCol } from './schema'
import * as Data from 'mol-io/reader/cif/data-model' import * as Data from 'mol-io/reader/cif/data-model'
import { CifFrame } from 'mol-io/reader/cif/data-model'; import { CifFrame } from 'mol-io/reader/cif/data-model';
export function getFieldType (type: string, values?: string[]): ValueColumn|ListColumn { export function getFieldType (type: string, description: string, values?: string[]): Column {
switch (type) { switch (type) {
case 'code': case 'code':
case 'ucode': case 'ucode':
...@@ -19,11 +19,7 @@ export function getFieldType (type: string, values?: string[]): ValueColumn|List ...@@ -19,11 +19,7 @@ export function getFieldType (type: string, values?: string[]): ValueColumn|List
case 'uchar3': case 'uchar3':
case 'uchar1': case 'uchar1':
case 'boolean': case 'boolean':
if (values && values.length) { return values && values.length ? EnumCol(values, 'str', description) : StrCol(description)
return { enum: [ 'str', values ] }
} else {
return 'str'
}
case 'aliasname': case 'aliasname':
case 'name': case 'name':
case 'idname': case 'idname':
...@@ -56,24 +52,20 @@ export function getFieldType (type: string, values?: string[]): ValueColumn|List ...@@ -56,24 +52,20 @@ export function getFieldType (type: string, values?: string[]): ValueColumn|List
case 'date_dep': case 'date_dep':
case 'url': case 'url':
case 'symop': case 'symop':
return 'str' return StrCol(description)
case 'int': case 'int':
case 'non_negative_int': case 'non_negative_int':
case 'positive_int': case 'positive_int':
if (values && values.length) { return values && values.length ? EnumCol(values, 'int', description) : IntCol(description)
return { enum: [ 'int', values ] }
} else {
return 'int'
}
case 'float': case 'float':
return 'float' return FloatCol(description)
case 'ec-type': case 'ec-type':
case 'ucode-alphanum-csv': case 'ucode-alphanum-csv':
case 'id_list': case 'id_list':
return { list: [ 'str', ',' ] } return ListCol('str', ',', description)
} }
console.log(`unknown type '${type}'`) console.log(`unknown type '${type}'`)
return 'str' return StrCol(description)
} }
type FrameCategories = { [category: string]: Data.CifFrame } type FrameCategories = { [category: string]: Data.CifFrame }
...@@ -135,6 +127,19 @@ function getSubCategory (d: Data.CifFrame, ctx: FrameData): string|undefined { ...@@ -135,6 +127,19 @@ function getSubCategory (d: Data.CifFrame, ctx: FrameData): string|undefined {
} }
} }
function getDescription (d: Data.CifFrame, ctx: FrameData): string|undefined {
const value = getField('item_description', 'description', d, ctx)
if (value) {
return value.str(0).trim()
.replace(/(\r\n|\r|\n)([ \t]+)/g, '\n')
.replace(/(\[[1-3]\])+ element/, 'elements')
.replace(/(\[[1-3]\])+/, '')
}
}
const reMatrixField = /\[[1-3]\]\[[1-3]\]/
const reVectorField = /\[[1-3]\]/
const FORCE_INT_FIELDS = [ const FORCE_INT_FIELDS = [
'_atom_site.id', '_atom_site.id',
'_atom_site.auth_seq_id', '_atom_site.auth_seq_id',
...@@ -190,6 +195,7 @@ export function generateSchema (frames: CifFrame[]) { ...@@ -190,6 +195,7 @@ export function generateSchema (frames: CifFrame[]) {
const links: FrameLinks = {} const links: FrameLinks = {}
const ctx = { categories, links } const ctx = { categories, links }
// build list of links between categories
frames.forEach(d => { frames.forEach(d => {
if (d.header[0] !== '_') return if (d.header[0] !== '_') return
categories[d.header] = d categories[d.header] = d
...@@ -213,12 +219,12 @@ export function generateSchema (frames: CifFrame[]) { ...@@ -213,12 +219,12 @@ export function generateSchema (frames: CifFrame[]) {
Object.keys(categories).forEach(fullName => { Object.keys(categories).forEach(fullName => {
const d = categories[fullName] const d = categories[fullName]
if (!d) { if (!d) {
console.log('foo', fullName) console.log(`${fullName} not found, moving on`)
return return
} }
const categoryName = d.header.substring(1, d.header.indexOf('.')) const categoryName = d.header.substring(1, d.header.indexOf('.'))
const itemName = d.header.substring(d.header.indexOf('.') + 1) const itemName = d.header.substring(d.header.indexOf('.') + 1)
let fields let fields: { [k: string]: Column }
if (categoryName in schema) { if (categoryName in schema) {
fields = schema[categoryName] fields = schema[categoryName]
} else { } else {
...@@ -226,38 +232,41 @@ export function generateSchema (frames: CifFrame[]) { ...@@ -226,38 +232,41 @@ export function generateSchema (frames: CifFrame[]) {
schema[categoryName] = fields schema[categoryName] = fields
} }
const description = getDescription(d, ctx) || ''
// need to use regex to check for matrix or vector items // need to use regex to check for matrix or vector items
// as sub_category assignment is missing for some entries // as sub_category assignment is missing for some entries
const subCategory = getSubCategory(d, ctx) const subCategory = getSubCategory(d, ctx)
if (subCategory === 'cartesian_coordinate' || subCategory === 'fractional_coordinate') { if (subCategory === 'cartesian_coordinate' || subCategory === 'fractional_coordinate') {
fields[itemName] = 'coord' fields[itemName] = CoordCol(description)
} else if (FORCE_INT_FIELDS.includes(d.header)) { } else if (FORCE_INT_FIELDS.includes(d.header)) {
fields[itemName] = 'int' fields[itemName] = IntCol(description)
console.log(`forcing int: ${d.header}`)
} else if (subCategory === 'matrix') { } else if (subCategory === 'matrix') {
fields[itemName.replace(/\[[1-3]\]\[[1-3]\]/, '')] = { 'matrix': [ 3, 3 ] } fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description)
} else if (subCategory === 'vector') { } else if (subCategory === 'vector') {
fields[itemName.replace(/\[[1-3]\]/, '')] = { 'vector': [ 3 ] } fields[itemName.replace(reVectorField, '')] = VectorCol(3, description)
} else { } else {
if (itemName.match(/\[[1-3]\]\[[1-3]\]/)) { if (itemName.match(reMatrixField)) {
fields[itemName.replace(/\[[1-3]\]\[[1-3]\]/, '')] = { 'matrix': [ 3, 3 ] } fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description)
console.log(`${d.header} should have 'matrix' _item_sub_category.id`) console.log(`${d.header} should have 'matrix' _item_sub_category.id`)
} else if (itemName.match(/\[[1-3]\]/)) { } else if (itemName.match(reVectorField)) {
fields[itemName.replace(/\[[1-3]\]/, '')] = { 'vector': [ 3 ] } fields[itemName.replace(reVectorField, '')] = VectorCol(3, description)
console.log(`${d.header} should have 'vector' _item_sub_category.id`) console.log(`${d.header} should have 'vector' _item_sub_category.id`)
} else { } else {
const code = getCode(d, ctx) const code = getCode(d, ctx)
if (code) { if (code) {
let fieldType = getFieldType(code[0], code[1]); let fieldType = getFieldType(code[0], description, code[1]);
if (typeof fieldType === 'string') { if (fieldType.type === 'str') {
if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) { if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = { 'list': [ 'str', ',' ] }; fieldType = ListCol('str', ',', description)
console.log(`comma separated: ${d.header}`) console.log(`forcing comma separated: ${d.header}`)
} else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) { } else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = { 'list': [ 'str', ' ' ] }; fieldType = ListCol('str', ' ', description)
console.log(`space separated: ${d.header}`) console.log(`forcing space separated: ${d.header}`)
} else if (SEMICOLON_SEPARATED_LIST_FIELDS.includes(d.header)) { } else if (SEMICOLON_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = { 'list': [ 'str', ';' ] }; fieldType = ListCol('str', ';', description)
console.log(`space separated: ${d.header}`) console.log(`forcing space separated: ${d.header}`)
} }
} }
fields[itemName] = fieldType fields[itemName] = fieldType
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
* @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Alexander Rose <alexander.rose@weirdbyte.de>
*/ */
import { validate } from './validate' import { Database, Filter, Column } from './schema'
import { Database, getTypeAndArgs, Filter } from './json-schema' import { indentString } from 'mol-util';
function header (name: string, info: string, importDatabasePath = 'mol-data/db') { function header (name: string, info: string, importDatabasePath = 'mol-data/db') {
return `/** return `/**
...@@ -37,38 +37,33 @@ export type ${name}_Schema = typeof ${name}_Schema; ...@@ -37,38 +37,33 @@ export type ${name}_Schema = typeof ${name}_Schema;
export interface ${name}_Database extends Database<${name}_Schema> {}` export interface ${name}_Database extends Database<${name}_Schema> {}`
} }
const value: { [k: string]: (...args: any[]) => string } = { function getTypeDef(c: Column): string {
enum: function (type: string, values: string[]) { switch (c.type) {
return `Aliased<'${values.map(v => v.replace(/'/g, '\\\'')).join(`' | '`)}'>(${type})` case 'str': return 'str'
}, case 'int': return 'int'
matrix: function (rows: number, cols: number) { case 'float': return 'float'
return `Matrix(${rows}, ${cols})` case 'coord': return 'coord'
}, case 'enum':
vector: function (dim: number) { return `Aliased<'${c.values.map(v => v.replace(/'/g, '\\\'')).join(`' | '`)}'>(${c.subType})`
return `Vector(${dim})` case 'matrix':
}, return `Matrix(${c.rows}, ${c.columns})`
list: function (type: 'str'|'int'|'float', separator: string) { case 'vector':
if (type === 'int') { return `Vector(${c.length})`
return `List('${separator}', x => parseInt(x, 10))` case 'list':
} else if (type === 'float') { if (c.subType === 'int') {
return `List('${separator}', x => parseFloat(x))` return `List('${c.separator}', x => parseInt(x, 10))`
} else if (c.subType === 'float' || c.subType === 'coord') {
return `List('${c.separator}', x => parseFloat(x))`
} else { } else {
return `List('${separator}', x => x)` return `List('${c.separator}', x => x)`
} }
} }
} }
const reSafePropertyName = /^[a-zA-Z_$][0-9a-zA-Z_$]*$/ const reSafePropertyName = /^[a-zA-Z_$][0-9a-zA-Z_$]*$/
function safePropertyString(name: string) { function safePropertyString(name: string) { return name.match(reSafePropertyName) ? name : `'${name}'` }
return name.match(reSafePropertyName) ? name : `'${name}'`
}
export function generate (name: string, info: string, schema: Database, fields?: Filter, importDatabasePath?: string) { export function generate (name: string, info: string, schema: Database, fields?: Filter, importDatabasePath?: string) {
const validationResult = validate(schema)
if (validationResult !== true) {
throw validationResult
}
const codeLines: string[] = [] const codeLines: string[] = []
codeLines.push(`export const ${name}_Schema = {`) codeLines.push(`export const ${name}_Schema = {`)
...@@ -78,13 +73,11 @@ export function generate (name: string, info: string, schema: Database, fields?: ...@@ -78,13 +73,11 @@ export function generate (name: string, info: string, schema: Database, fields?:
const columns = schema[table] const columns = schema[table]
Object.keys(columns).forEach(columnName => { Object.keys(columns).forEach(columnName => {
if (fields && !fields[table][columnName]) return if (fields && !fields[table][columnName]) return
let typeDef const typeDef = getTypeDef(columns[columnName])
const fieldType = columns[ columnName ] if (columns[columnName].description) {
if (typeof fieldType === 'object') { codeLines.push(` /**`)
const { type, args } = getTypeAndArgs(fieldType) codeLines.push(`${indentString(columns[columnName].description, 1, ' * ')}`)
typeDef = value[ type ](...args) codeLines.push(` */`)
} else {
typeDef = fieldType
} }
codeLines.push(` ${safePropertyString(columnName)}: ${typeDef},`) codeLines.push(` ${safePropertyString(columnName)}: ${typeDef},`)
}) })
......
...@@ -4,46 +4,42 @@ ...@@ -4,46 +4,42 @@
* @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Alexander Rose <alexander.rose@weirdbyte.de>
*/ */
export interface Database { export interface Database { [ tableName: string ]: Table }
[ tableName: string ]: Table export interface Table { [ columnName: string ]: Column }
} export type Column = IntCol | StrCol | FloatCol | CoordCol | EnumCol | VectorCol | MatrixCol | ListCol
export interface Table { type BaseCol = { description: string }
[ columnName: string ]: Column
}
export type ValueColumn = IntCol | StrCol | FloatCol | CoordCol | EnumCol export type IntCol = { type: 'int' } & BaseCol
export type Column = ValueColumn | VectorCol | MatrixCol | ListColumn export function IntCol(description: string): IntCol { return { type: 'int', description } }
type IntCol = 'int' export type StrCol = { type: 'str' } & BaseCol
type StrCol = 'str' export function StrCol(description: string): StrCol { return { type: 'str', description } }
type FloatCol = 'float'
type CoordCol = 'coord'
interface ComplexColumn { export type FloatCol = { type: 'float' } & BaseCol
[ fieldType: string ]: any[] export function FloatCol(description: string): FloatCol { return { type: 'float', description } }
}
interface EnumCol extends ComplexColumn { export type CoordCol = { type: 'coord' } & BaseCol
enum: [ IntCol | StrCol, string[] ] export function CoordCol(description: string): CoordCol { return { type: 'coord', description } }
}
interface VectorCol extends ComplexColumn { export type EnumCol = { type: 'enum', subType: 'int' | 'str', values: string[] } & BaseCol
vector: [ number ] export function EnumCol(values: string[], subType: 'int' | 'str', description: string): EnumCol {
return { type: 'enum', description, values, subType }
} }
interface MatrixCol extends ComplexColumn { export type VectorCol = { type: 'vector', length: number } & BaseCol
matrix: [ number, number ] export function VectorCol(length: number, description: string): VectorCol {
return { type: 'vector', description, length }
} }
export interface ListColumn extends ComplexColumn { export type MatrixCol = { type: 'matrix', rows: number, columns: number } & BaseCol
list: [ ValueColumn, string ] export function MatrixCol(columns: number, rows: number, description: string): MatrixCol {
return { type: 'matrix', description, columns, rows }
} }
export function getTypeAndArgs (column: ComplexColumn) { export type ListCol = { type: 'list', subType: 'int' | 'str' | 'float' | 'coord', separator: string } & BaseCol
const type = Object.keys(column)[0] as string export function ListCol(subType: 'int' | 'str' | 'float' | 'coord', separator: string, description: string): ListCol {
const args = column[ type ] return { type: 'list', description, separator, subType }
return { type, args }
} }
export type Filter = { [ table: string ]: { [ column: string ]: true } } export type Filter = { [ table: string ]: { [ column: string ]: true } }
......
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Database, Table, Column } from './json-schema'
const SimpleColumnTypes = [ 'str', 'int', 'float', 'coord' ]
const ComplexColumnTypes = [ 'enum', 'vector', 'matrix', 'list' ]
function allTrue<T> (list: T[], fn: (e: T) => boolean) {
return list.reduce((a, v) => a && fn(v), true)
}
function allString (list: string[]) {
return list.reduce((a, v) => a && typeof v === 'string', true)
}
function validateColumn (column: Column): true|Error {
if (typeof column === 'string') {
if (!SimpleColumnTypes.includes(column)) {
return new Error(`simple column types must be one of '${SimpleColumnTypes.join(', ')}' not '${column}'`)
}
return true
} else if (typeof column === 'object') {
const keys = Object.keys(column)
if (keys.length !== 1) {
return new Error(`complex column object must have a single key`)
}
const type = keys[0]
const args = column[ type ]
if (!Array.isArray(args)) {
return new Error(`complex column args must be an array`)
}
switch (type) {
case 'enum':
if (args.length !== 2 && (!allString(args[1]) && !allTrue(args[1], Number.isInteger))) {
return new Error(`enum column must have all string or all integer args ${args}`)
}
break;
case 'vector':
if (args.length !== 1 || !allTrue(args, Number.isInteger)) {
return new Error(`vector column must have one integer arg`)
}
break;
case 'matrix':
if (args.length !== 2 || !allTrue(args, Number.isInteger)) {
return new Error(`matrix column must have two integer args`)
}
break;
case 'list':
if (args.length !== 2 || !allString(args)) {
return new Error(`list column must have two string args`)
}
break;
default:
return new Error(`complex column types must be one of '${ComplexColumnTypes.join(', ')}' not '${type}'`)
}
return true
}
return new Error(`columns must be of type 'object' or 'string' not '${typeof column}'`)
}
function validateTable (table: Table): true|Error {
if (typeof table !== 'object') {
return new Error(`table must be of type 'object' not '${typeof table}'`)
}
for (const columnName in table) {
// could check columnName with regex
const r = validateColumn(table[columnName])
if (r !== true) {
return new Error(`[${columnName}] ${r.message}`)
}
}
return true
}
function validateDatabase (database: Database): true|Error {
if (typeof database !== 'object') {
return new Error(`database must be of type 'object' not '${typeof database}'`)
}
for (const tableName in database) {
// could check tableName with regex
const r = validateTable(database[tableName])
if (r !== true) {
return new Error(`[${tableName}] ${r.message}`)
}
}
return true
}
export function validate (schema: any): true|Error {
return validateDatabase(schema)
}
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import * as argparse from 'argparse'
import * as fs from 'fs'
import { validate } from './util/validate'
function runValidateSchema (path: string) {
const str = fs.readFileSync(path, 'utf8')
const result = validate(JSON.parse(str))
console.log(result === true ? 'valid json schema' : `invalid json schema: "${result}"`)
}
const parser = new argparse.ArgumentParser({
addHelp: true,
description: 'Validate json schema'
});
parser.addArgument([ 'path' ], {
help: 'path to json schema'
});
const args = parser.parseArgs();
if (args.path) {
runValidateSchema(args.path)
}
This diff is collapsed.
...@@ -73,7 +73,7 @@ function getConformation(atom_site: AtomSite): AtomicConformation { ...@@ -73,7 +73,7 @@ function getConformation(atom_site: AtomSite): AtomicConformation {
} }
function isHierarchyDataEqual(a: AtomicData, b: AtomicData) { function isHierarchyDataEqual(a: AtomicData, b: AtomicData) {
// need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300 // TODO need to cast because of how TS handles type resolution for interfaces https://github.com/Microsoft/TypeScript/issues/15300
return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>) return Table.areEqual(a.chains as Table<ChainsSchema>, b.chains as Table<ChainsSchema>)
&& Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>) && Table.areEqual(a.residues as Table<ResiduesSchema>, b.residues as Table<ResiduesSchema>)
&& Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>) && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>)
......
...@@ -184,3 +184,8 @@ export function formatProgress(p: Progress) { ...@@ -184,3 +184,8 @@ export function formatProgress(p: Progress) {
const x = (100 * tp.current / tp.max).toFixed(2); const x = (100 * tp.current / tp.max).toFixed(2);
return `${tp.message} ${x}%`; return `${tp.message} ${x}%`;
} }
const reLine = /^/mg
export function indentString(str: string, count: number, indent: string) {
return count === 0 ? str : str.replace(reLine, indent.repeat(count))
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment