From 422dd10aa56a54933f3485af38583e10af7b3f6a Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Wed, 30 May 2018 23:46:31 +0200 Subject: [PATCH] Mol-script parser --- .../structure/model/formats/mmcif/bonds.ts | 2 +- .../structure/unit/bonds/intra-compute.ts | 2 +- src/mol-script/builder.ts | 40 +++ src/mol-script/compiler.ts | 2 +- src/mol-script/parsers/mol-script/examples.ts | 104 ++++++ src/mol-script/parsers/mol-script/macro.ts | 39 +++ src/mol-script/parsers/mol-script/parser.ts | 132 ++++++++ src/mol-script/parsers/mol-script/symbols.ts | 300 ++++++++++++++++++ src/mol-script/parsers/parser.ts | 12 + src/mol-script/symbol-table.ts | 24 ++ src/mol-util/mask.ts | 4 +- src/perf-tests/mol-script.ts | 5 + 12 files changed, 661 insertions(+), 5 deletions(-) create mode 100644 src/mol-script/builder.ts create mode 100644 src/mol-script/parsers/mol-script/examples.ts create mode 100644 src/mol-script/parsers/mol-script/macro.ts create mode 100644 src/mol-script/parsers/mol-script/parser.ts create mode 100644 src/mol-script/parsers/mol-script/symbols.ts create mode 100644 src/mol-script/parsers/parser.ts create mode 100644 src/mol-script/symbol-table.ts create mode 100644 src/perf-tests/mol-script.ts diff --git a/src/mol-model/structure/model/formats/mmcif/bonds.ts b/src/mol-model/structure/model/formats/mmcif/bonds.ts index 95ac32d4d..89abf70d4 100644 --- a/src/mol-model/structure/model/formats/mmcif/bonds.ts +++ b/src/mol-model/structure/model/formats/mmcif/bonds.ts @@ -1,5 +1,5 @@ /** - * Copyright (c) 2017-2018 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2017-2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> diff --git a/src/mol-model/structure/structure/unit/bonds/intra-compute.ts b/src/mol-model/structure/structure/unit/bonds/intra-compute.ts index 9cd4a9ed1..816d72cb7 100644 --- a/src/mol-model/structure/structure/unit/bonds/intra-compute.ts +++ b/src/mol-model/structure/structure/unit/bonds/intra-compute.ts @@ -1,5 +1,5 @@ /** - * Copyright (c) 2017 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * Copyright (c) 2017 Mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> */ diff --git a/src/mol-script/builder.ts b/src/mol-script/builder.ts new file mode 100644 index 000000000..a79cd1643 --- /dev/null +++ b/src/mol-script/builder.ts @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Expression from './expression' +import Symbol from './symbol' +import SymbolTable from './symbol-table' + +namespace Builder { + export const core = SymbolTable.core; + export const struct = SymbolTable.structureQuery; + + export function atomName(s: string) { return struct.type.atomName([s]); } + export function es(s: string) { return struct.type.elementSymbol([s]); } + export function list(...xs: Expression[]) { return core.type.list(xs); } + export function set(...xs: Expression[]) { return core.type.set(xs); } + export function fn(x: Expression) { return core.ctrl.fn([x]); } + export function evaluate(x: Expression) { return core.ctrl.eval([x]); } + + const _acp = struct.atomProperty.core, _ammp = struct.atomProperty.macromolecular, _atp = struct.atomProperty.topology; + + // atom core property + export function acp(p: keyof typeof _acp) { return (_acp[p] as Symbol<any>)() }; + + // atom topology property + export function atp(p: keyof typeof _atp) { return (_atp[p] as Symbol<any>)() }; + + // atom macromolecular property + export function ammp(p: keyof typeof _ammp) { return (_ammp[p] as Symbol<any>)() }; + + // atom property sets + const _aps = struct.atomSet.propertySet + export function acpSet(p: keyof typeof _acp) { return _aps([ acp(p) ]) }; + export function atpSet(p: keyof typeof _atp) { return _aps([ atp(p) ]) }; + export function ammpSet(p: keyof typeof _ammp) { return _aps([ ammp(p) ]) }; +} + +export default Builder \ No newline at end of file diff --git a/src/mol-script/compiler.ts b/src/mol-script/compiler.ts index 758322a63..65af9dcfa 100644 --- a/src/mol-script/compiler.ts +++ b/src/mol-script/compiler.ts @@ -1,4 +1,4 @@ -/* +/** * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> diff --git a/src/mol-script/parsers/mol-script/examples.ts b/src/mol-script/parsers/mol-script/examples.ts new file mode 100644 index 000000000..7df280f8b --- /dev/null +++ b/src/mol-script/parsers/mol-script/examples.ts @@ -0,0 +1,104 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export default [{ + name: 'Residues connected to HEM', + value: `(sel.atom.is-connected-to + sel.atom.res + :target (sel.atom.res (= atom.label_comp_id HEM)) + ;; default bond test allows only covalent bonds + :bond-test true + :disjunct true)` +}, { + name: 'All C or N atoms in ALA residues', + value: `(sel.atom.atom-groups + :residue-test (= atom.auth_comp_id ALA) + :atom-test (set.has (set _C _N) atom.el))` +}, { + name: 'Residues 130 to 180', + value: `(sel.atom.res (in-range atom.resno 130 180))` +}, { + name: 'All residues within 5 ang from Fe atom', + value: `(sel.atom.include-surroundings + (sel.atom.atoms (= atom.el _Fe)) + :radius 5 + :as-whole-residues true)` +}, { + name: 'Cluster LYS residues within 5 ang', + value: `(sel.atom.cluster + (sel.atom.res (= atom.label_comp_id LYS)) + :max-distance 5)` +}, { + name: 'Residues with max b-factor < 45', + value: `(sel.atom.pick sel.atom.res + :test (< (atom.set.max atom.B_iso_or_equiv) 45))` +}, { + name: 'Atoms between 10 and 15 ang from Fe', + value: `(sel.atom.within sel.atom.atoms + :target (sel.atom.atoms (= atom.el _Fe)) + :min-radius 10 + :max-radius 15)` +}, { + name: 'HEM and 2 layers of connected residues', + value: `(sel.atom.include-connected + (sel.atom.res (= atom.label_comp_id HEM)) + ;; default bond test allows only covalent bonds + ;; another option is to use :bond-test true to allow any connection + :bond-test (bond.is metallic covalent) + :layer-count 2 + :as-whole-residues true)` +}, { + name: 'All rings', + value: `(sel.atom.rings)` +}, { + name: 'CCCCN and CCNCN rings', + value: `(sel.atom.rings + (ringfp _C _N _C _N _C) + ;; the "rotation" of element symbols has no effect + ;; the following is the same as (ringfp _C _C _C _C _N) + (ringfp _C _C _C _N _C))` +}, { + name: 'Sheets', + value: `(sel.atom.atom-groups + :residue-test (atom.sec-struct.is sheet) + :group-by (atom.key.sec-struct))` +}, { + name: 'Helices formed by at least 30 residues', + value: `(sel.atom.pick + (sel.atom.atom-groups + :residue-test (atom.sec-struct.is helix) + :group-by atom.key.sec-struct) + :test (<= 30 (atom.set.count-query sel.atom.res)))` +}, { + name: 'Modified residues', + value: `(sel.atom.res atom.is-modified)` +}, { + name: 'Atoms participating in metallic coordination', + value: `(sel.atom.atoms + (> (atom.bond-count :flags (bond-flags metallic)) 0))` +}, { + name: 'LYS and ALA residues that are between 2 and 5 ang apart', + value: `(sel.atom.dist-cluster + ;; upper triangular matrix are maximum distances of corresponding selections + ;; lower triangular matrix are minumum distances of corresponding selections + :matrix [ + [0 5] + [2 0] + ] + :selections [ + (sel.atom.res (= atom.resname LYS)) + (sel.atom.res (= atom.resname ALA)) + ])` +}, { + name: 'Clusters of 3 LYS residues that are mutually no more than 5 ang apart', + value: `(sel.atom.dist-cluster + :matrix [[0 5 5] [0 0 5] [0 0 0]] + :selections [ + (sel.atom.res (= atom.resname LYS)) + (sel.atom.res (= atom.resname LYS)) + (sel.atom.res (= atom.resname LYS)) + ])` +}] \ No newline at end of file diff --git a/src/mol-script/parsers/mol-script/macro.ts b/src/mol-script/parsers/mol-script/macro.ts new file mode 100644 index 000000000..658e9ce98 --- /dev/null +++ b/src/mol-script/parsers/mol-script/macro.ts @@ -0,0 +1,39 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import B from '../../builder' + +export function getPositionalArgs(args: any) { + return Object.keys(args) + .filter(k => !isNaN(k as any)) + .map(k => +k) + .sort((a, b) => a - b) + .map(k => args[k]); +} + +export function tryGetArg(args: any, name: string | number, defaultValue?: any) { + return (args && args[name] !== void 0) ? args[name] : defaultValue; +} + +export function pickArgs(args: any, ...names: string[]) { + const ret = Object.create(null); + let count = 0; + for (let k of Object.keys(args)) { + if (names.indexOf(k) >= 0) { + ret[k] = args[k]; + count++; + } + } + return count ? ret : void 0; +} + +export function aggregate(property: any, fn: any, initial?: any){ + return B.struct.atomSet.reduce({ + initial: initial !== void 0 ? initial : property, + value: fn([ B.struct.slot.elementSetReduce(), property ]) + }); +} \ No newline at end of file diff --git a/src/mol-script/parsers/mol-script/parser.ts b/src/mol-script/parsers/mol-script/parser.ts new file mode 100644 index 000000000..329497414 --- /dev/null +++ b/src/mol-script/parsers/mol-script/parser.ts @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexanderose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { MonadicParser as P } from 'mol-util/monadic-parser' + +import Parser from '../parser' +import Expression from '../../expression' +import { SymbolMap, MolScriptSymbol } from './symbols' +import B from '../../builder' + +const ws = P.regexp(/[\n\r\s]*/) + +function getSymbolExpression(s: MolScriptSymbol, args?: any) { + switch (s.kind) { + case 'alias': return args ? Expression.Apply(s.symbol.id, args) : Expression.Apply(s.symbol.id); + case 'macro': return s.translate(args); + } +} + +namespace Language { + const Expr = P.lazy(() => P.seq(Symb, ArgList, NamedArgList)); + + const Arg: P<Expression> = P.lazy(() => P.seq( + P.lookahead(P.regexp(/[^:]/)), + P.alt( + // order matters + AtomName, + ElementSymbol, + Bool, + Num, + Str, + QuotedStr, + ListSymbol, + SetSymbol, + List + ) + ).map((x: any) => x[1]).trim(ws)); + + const ArgList = Arg.many(); + const ArgName = P.regexp(/:([a-zA-Z0-9_.-]+)/, 1).trim(ws).desc('arg-name'); + + const NamedArg = P.seq(ArgName, Arg).trim(ws); + + const NamedArgList = NamedArg.many().map(xs => { + const namedArgs: { [key: string]: any } = {} + xs.forEach((a: any) => { namedArgs[a[0]] = a[1] }) + return namedArgs + }); + + const Symb = P.regexp(/[^\s'`,@()\[\]{}';:]+/) // /[a-zA-Z_-][a-zA-Z0-9_.-]+/) + .map(x => { + const s = SymbolMap[x]; + if (!s) { + throw new Error(`'${x}': unknown symbol.`); + } + return s; + }) + .desc('symbol'); + + const Str = P.regexp(/[a-zA-Z_-]+[a-zA-Z0-9_.-]*/).map(x => { + const s = SymbolMap[x]; + if (s) return getSymbolExpression(s); + return x; + }).desc('string'); + + const QuotedStr = P.string('`') + .then(P.regexp(/[^`]*/)) + .skip(P.string('`')) + .desc('quoted-string'); + + const Num = P.regexp(/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/) + .map(v => +v) + .desc('number'); + + const Bool = P.alt( + P.regexp(/true/i).result(true), + P.regexp(/false/i).result(false) + ).desc('boolean'); + + // '[a, b, c]' => core.list([a, b, c]) + const ListSymbol = ArgList + .wrap(P.string('['), P.string(']')) + .map(B.core.type.list) + .desc('list-symbol'); + + // '{a, b, c}' => core.set([a, b, c]) + const SetSymbol = ArgList + .wrap(P.string('{'), P.string('}')) + .map(B.core.type.set) + .desc('set-symbol'); + + // _XYZ -> type.elementSymbol XYZ + const ElementSymbol = P.string('_') + .then(P.regexp(/[0-9a-zA-Z]+/)) + .map(x => B.struct.type.elementSymbol([x])) + .desc('element-symbol'); + + // '.e' => struct.type.atomName(e) + const AtomName = P.string('.') + .then(P.alt(Str, QuotedStr, Num)) + .map(v => B.atomName('' + v)) + .desc('identifier'); + + const List = Expr + .wrap(P.string('('), P.string(')')) + .map(x => { + const array: any[] = x[1]; + const named: any = x[2]; + + if (named && Object.keys(named).length) { + if (array) { + for (let i = 0; i < array.length; i++) named[i] = array[i]; + } + return getSymbolExpression(x[0], named); + } else if (array && array.length) { + return getSymbolExpression(x[0], x[1]); + } else { + return getSymbolExpression(x[0]) + } + }) + .desc('list'); + + export const Query = List.trim(ws) +} + +const reComment = /;[^\n\r]*[\n\r]/g +const transpiler: Parser = str => Language.Query.tryParse(str.replace(reComment, '\n')) +export default transpiler diff --git a/src/mol-script/parsers/mol-script/symbols.ts b/src/mol-script/parsers/mol-script/symbols.ts new file mode 100644 index 000000000..66d41365f --- /dev/null +++ b/src/mol-script/parsers/mol-script/symbols.ts @@ -0,0 +1,300 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Symbol, { Arguments, Argument } from '../../symbol' +import B from '../../builder' +import * as M from './macro' +import MolScript from '../../symbol-table' +import Type from '../../type' +import * as Struct from '../../symbol-table/structure-query' +import Expression from '../../expression' +import { UniqueArray } from 'mol-data/generic' + +export type MolScriptSymbol = + | { kind: 'alias', aliases: string[], symbol: Symbol } + | { kind: 'macro', aliases: string[], symbol: Symbol, translate: (args: any) => Expression } + +function Alias(symbol: Symbol<any>, ...aliases: string[]): MolScriptSymbol { return { kind: 'alias', aliases, symbol }; } +function Macro(symbol: Symbol<any>, translate: (args: any) => Expression, ...aliases: string[]): MolScriptSymbol { + symbol.info.namespace = 'molscript-macro'; + symbol.id = `molscript-macro.${symbol.info.name}`; + return { kind: 'macro', symbol, translate, aliases: [symbol.info.name, ...aliases] }; +} + +export function isMolScriptSymbol(x: any): x is MolScriptSymbol { + return x.kind === 'alias' || x.kind === 'macro'; +} + +export const SymbolTable = [ + [ + 'Core symbols', + Alias(MolScript.core.type.bool, 'bool'), + Alias(MolScript.core.type.num, 'num'), + Alias(MolScript.core.type.str, 'str'), + Alias(MolScript.core.type.regex, 'regex'), + Alias(MolScript.core.type.list, 'list'), + Alias(MolScript.core.type.set, 'set'), + + Alias(MolScript.core.type.compositeKey, 'composite-key'), + Alias(MolScript.core.logic.not, 'not'), + Alias(MolScript.core.logic.and, 'and'), + Alias(MolScript.core.logic.or, 'or'), + Alias(MolScript.core.ctrl.if, 'if'), + Alias(MolScript.core.ctrl.fn, 'fn'), + Alias(MolScript.core.ctrl.eval, 'eval'), + Alias(MolScript.core.math.add, 'add', '+'), + Alias(MolScript.core.math.sub, 'sub', '-'), + Alias(MolScript.core.math.mult, 'mult', '*'), + Alias(MolScript.core.math.div, 'div', '/'), + Alias(MolScript.core.math.pow, 'pow', '**'), + Alias(MolScript.core.math.mod, 'mod'), + Alias(MolScript.core.math.min, 'min'), + Alias(MolScript.core.math.max, 'max'), + Alias(MolScript.core.math.floor, 'floor'), + Alias(MolScript.core.math.ceil, 'ceil'), + Alias(MolScript.core.math.roundInt, 'round'), + Alias(MolScript.core.math.abs, 'abs'), + Alias(MolScript.core.math.sqrt, 'sqrt'), + Alias(MolScript.core.math.sin, 'sin'), + Alias(MolScript.core.math.cos, 'cos'), + Alias(MolScript.core.math.tan, 'tan'), + Alias(MolScript.core.math.asin, 'asin'), + Alias(MolScript.core.math.acos, 'acos'), + Alias(MolScript.core.math.atan, 'atan'), + Alias(MolScript.core.math.sinh, 'sinh'), + Alias(MolScript.core.math.cosh, 'cosh'), + Alias(MolScript.core.math.tanh, 'tanh'), + Alias(MolScript.core.math.exp, 'exp'), + Alias(MolScript.core.math.log, 'log'), + Alias(MolScript.core.math.log10, 'log10'), + Alias(MolScript.core.math.atan2, 'atan2'), + Alias(MolScript.core.rel.eq, 'eq', '='), + Alias(MolScript.core.rel.neq, 'neq', '!='), + Alias(MolScript.core.rel.lt, 'lt', '<'), + Alias(MolScript.core.rel.lte, 'lte', '<='), + Alias(MolScript.core.rel.gr, 'gr', '>'), + Alias(MolScript.core.rel.gre, 'gre', '>='), + Alias(MolScript.core.rel.inRange, 'in-range'), + Alias(MolScript.core.str.concat, 'concat'), + Alias(MolScript.core.str.match, 'regex.match'), + Alias(MolScript.core.list.getAt, 'list.get'), + Alias(MolScript.core.set.has, 'set.has'), + Alias(MolScript.core.set.isSubset, 'set.subset'), + ], + [ + 'Structure', + [ + 'Types', + Alias(MolScript.structureQuery.type.entityType, 'ent-type'), + Alias(MolScript.structureQuery.type.authResidueId, 'auth-resid'), + Alias(MolScript.structureQuery.type.labelResidueId, 'label-resid'), + Alias(MolScript.structureQuery.type.ringFingerprint, 'ringfp'), + Alias(MolScript.structureQuery.type.bondFlags, 'bond-flags'), + ], + [ + 'Slots', + Alias(MolScript.structureQuery.slot.elementSetReduce, 'atom.set.reduce.value'), + ], + [ + 'Generators', + Alias(MolScript.structureQuery.generator.atomGroups, 'sel.atom.atom-groups'), + Alias(MolScript.structureQuery.generator.queryInSelection, 'sel.atom.query-in-selection'), + Alias(MolScript.structureQuery.generator.rings, 'sel.atom.rings'), + Alias(MolScript.structureQuery.generator.empty, 'sel.atom.empty'), + + Macro(Symbol('sel.atom.atoms', Arguments.Dictionary({ + 0: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test applied to each atom.' }) + }), Struct.Types.ElementSelection, 'A selection of singleton atom sets.'), + args => B.struct.generator.atomGroups({ 'atom-test': M.tryGetArg(args, 0, true) })), + + Macro(Symbol('sel.atom.res', Arguments.Dictionary({ + 0: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test applied to the 1st atom of each residue.' }) + }), Struct.Types.ElementSelection, 'A selection of atom sets grouped by residue.'), + args => B.struct.generator.atomGroups({ + 'residue-test': M.tryGetArg(args, 0, true), + 'group-by': B.ammp('residueKey') + })), + + Macro(Symbol('sel.atom.chains', Arguments.Dictionary({ + 0: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test applied to the 1st atom of each chain.' }) + }), Struct.Types.ElementSelection, 'A selection of atom sets grouped by chain.'), + args => B.struct.generator.atomGroups({ + 'chain-test': M.tryGetArg(args, 0, true), + 'group-by': B.ammp('chainKey') + })), + ], + [ + 'Modifiers', + Alias(MolScript.structureQuery.modifier.queryEach, 'sel.atom.query-each'), + Alias(MolScript.structureQuery.modifier.intersectBy, 'sel.atom.intersect-by'), + Alias(MolScript.structureQuery.modifier.exceptBy, 'sel.atom.except-by'), + Alias(MolScript.structureQuery.modifier.unionBy, 'sel.atom.union-by'), + Alias(MolScript.structureQuery.modifier.union, 'sel.atom.union'), + Alias(MolScript.structureQuery.modifier.cluster, 'sel.atom.cluster'), + Alias(MolScript.structureQuery.modifier.includeSurroundings, 'sel.atom.include-surroundings'), + Alias(MolScript.structureQuery.modifier.includeConnected, 'sel.atom.include-connected'), + Alias(MolScript.structureQuery.modifier.expandProperty, 'sel.atom.expand-property'), + + Macro(Symbol('sel.atom.around', Arguments.Dictionary({ + 0: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test applied to the 1st atom of each chain.' }) + }), Struct.Types.ElementSelection, 'A selection of singleton atom sets with centers within "radius" of the center of any atom in the given selection.'), + args => B.struct.modifier.exceptBy({ + '0': B.struct.filter.within({ + '0': B.struct.generator.atomGroups(), target: M.tryGetArg(args, 0), 'max-radius': M.tryGetArg(args, 'radius') + }), + by: M.tryGetArg(args, 0) + })) + ], + [ + 'Filters', + Alias(MolScript.structureQuery.filter.pick, 'sel.atom.pick'), + Alias(MolScript.structureQuery.filter.withSameAtomProperties, 'sel.atom.with-same-atom-properties'), + Alias(MolScript.structureQuery.filter.intersectedBy, 'sel.atom.intersected-by'), + Alias(MolScript.structureQuery.filter.within, 'sel.atom.within'), + Alias(MolScript.structureQuery.filter.isConnectedTo, 'sel.atom.is-connected-to'), + ], + [ + 'Combinators', + Alias(MolScript.structureQuery.combinator.intersect, 'sel.atom.intersect'), + Alias(MolScript.structureQuery.combinator.merge, 'sel.atom.merge'), + Alias(MolScript.structureQuery.combinator.distanceCluster, 'sel.atom.dist-cluster'), + ], + [ + 'Atom Set Properties', + Alias(MolScript.structureQuery.atomSet.atomCount, 'atom.set.atom-count'), + Alias(MolScript.structureQuery.atomSet.countQuery, 'atom.set.count-query'), + Alias(MolScript.structureQuery.atomSet.reduce, 'atom.set.reduce'), + Alias(MolScript.structureQuery.atomSet.propertySet, 'atom.set.property'), + + Macro(Symbol('atom.set.max', Arguments.Dictionary({ + 0: Argument(Type.Num, { description: 'Numeric atom property.'}) + }), Type.Num, 'Maximum of the given property in the current atom set.'), + args => M.aggregate(M.tryGetArg(args, 0), B.core.math.max)), + + Macro(Symbol('atom.set.sum', Arguments.Dictionary({ + 0: Argument(Type.Num, { description: 'Numeric atom property.'}) + }), Type.Num, 'Sum of the given property in the current atom set.'), + args => M.aggregate(M.tryGetArg(args, 0), B.core.math.add, 0)), + + Macro(Symbol('atom.set.avg', Arguments.Dictionary({ + 0: Argument(Type.Num, { description: 'Numeric atom property.'}) + }), Type.Num, 'Average of the given property in the current atom set.'), + args => B.core.math.div([ M.aggregate(M.tryGetArg(args, 0), B.core.math.add, 0), B.struct.atomSet.atomCount() ])), + + Macro(Symbol('atom.set.min', Arguments.Dictionary({ + 0: Argument(Type.Num, { description: 'Numeric atom property.'}) + }), Type.Num, 'Minimum of the given property in the current atom set.'), + args => M.aggregate(M.tryGetArg(args, 0), B.core.math.min)) + ], + [ + 'Atom Properties', + Alias(MolScript.structureQuery.atomProperty.core.elementSymbol, 'atom.el'), + Alias(MolScript.structureQuery.atomProperty.core.vdw, 'atom.vdw'), + Alias(MolScript.structureQuery.atomProperty.core.mass, 'atom.mass'), + Alias(MolScript.structureQuery.atomProperty.core.atomicNumber, 'atom.atomic-number'), + Alias(MolScript.structureQuery.atomProperty.core.x, 'atom.x'), + Alias(MolScript.structureQuery.atomProperty.core.y, 'atom.y'), + Alias(MolScript.structureQuery.atomProperty.core.z, 'atom.z'), + Alias(MolScript.structureQuery.atomProperty.core.atomKey, 'atom.key'), + Alias(MolScript.structureQuery.atomProperty.core.bondCount, 'atom.bond-count'), + + Alias(MolScript.structureQuery.atomProperty.topology.connectedComponentKey, 'atom.key.molecule'), + + Alias(MolScript.structureQuery.atomProperty.macromolecular.authResidueId, 'atom.auth-resid'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.labelResidueId, 'atom.label-resid'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.residueKey, 'atom.key.res'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.chainKey, 'atom.key.chain'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.entityKey, 'atom.key.entity'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.isHet, 'atom.is-het'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.id, 'atom.id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_atom_id, 'atom.label_atom_id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_alt_id, 'atom.label_alt_id', 'atom.altloc'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_comp_id, 'atom.label_comp_id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_asym_id, 'atom.label_asym_id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_entity_id, 'atom.label_entity_id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.label_seq_id, 'atom.label_seq_id'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.auth_atom_id, 'atom.auth_atom_id', 'atom.name'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.auth_comp_id, 'atom.auth_comp_id', 'atom.resname'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.auth_asym_id, 'atom.auth_asym_id', 'atom.chain'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.auth_seq_id, 'atom.auth_seq_id', 'atom.resno'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.pdbx_PDB_ins_code, 'atom.pdbx_PDB_ins_code', 'atom.inscode'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.pdbx_formal_charge, 'atom.pdbx_formal_charge'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.occupancy, 'atom.occupancy'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.B_iso_or_equiv, 'atom.B_iso_or_equiv', 'atom.bfactor'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.entityType, 'atom.entity-type'), + + Alias(MolScript.structureQuery.atomProperty.macromolecular.secondaryStructureKey, 'atom.key.sec-struct'), + + Alias(MolScript.structureQuery.atomProperty.macromolecular.isModified, 'atom.is-modified'), + Alias(MolScript.structureQuery.atomProperty.macromolecular.modifiedParentName, 'atom.modified-parent'), + + Macro(Symbol('atom.sec-struct.is', Arguments.List(Struct.Types.SecondaryStructureFlag), Type.Bool, + `Test if the current atom is part of an secondary structure. Optionally specify allowed sec. struct. types: ${Type.oneOfValues(Struct.Types.SecondaryStructureFlag).join(', ')}`), + args => B.core.flags.hasAny([B.struct.atomProperty.macromolecular.secondaryStructureFlags(), B.struct.type.secondaryStructureFlags(args)])), + ], + [ + 'Bond Properties', + Alias(MolScript.structureQuery.bondProperty.order, 'bond.order'), + Macro(Symbol('bond.is', Arguments.List(Struct.Types.BondFlag), Type.Bool, + `Test if the current bond has at least one (or all if partial = false) of the specified flags: ${Type.oneOfValues(Struct.Types.BondFlag).join(', ')}`), + args => B.core.flags.hasAny([B.struct.bondProperty.flags(), B.struct.type.bondFlags(M.getPositionalArgs(args))])), + ] + ] +]; + +const list: MolScriptSymbol[] = []; + +function makeList(xs: any[]) { + for (const x of xs) { + if (isMolScriptSymbol(x)) list.push(x); + else if (x instanceof Array) makeList(x); + } +} + +makeList(SymbolTable); + +const normalized = (function () { + const symbolList: [string, MolScriptSymbol][] = []; + const symbolMap: { [id: string]: MolScriptSymbol | undefined } = Object.create(null); + const namedArgs = UniqueArray.create<string, string>(); + const constants = UniqueArray.create<string, string>(); + + for (const s of list) { + for (const a of s.aliases) { + symbolList.push([a, s]); + if (symbolMap[a]) throw new Error(`Alias '${a}' already in use.`); + symbolMap[a] = s; + } + const args = s.symbol.args; + if (args.kind !== 'dictionary') { + if (args.type.kind === 'oneof') { + Type.oneOfValues(args.type).forEach(v => UniqueArray.add(constants, v, v)); + } + continue; + } + for (const a of Object.keys(args.map)) { + if (isNaN(a as any)) UniqueArray.add(namedArgs, a, a); + const arg = ((args.map as any)[a]) as Argument; + if (arg.type.kind === 'oneof') { + Type.oneOfValues(arg.type).forEach(v => UniqueArray.add(constants, v, v)); + } + } + } + + return { symbolList, symbolMap, namedArgs: namedArgs.array, constants: constants.array } +})(); + +export const MolScriptSymbols = list; +export const Constants = normalized.constants; +export const NamedArgs = normalized.namedArgs; +export const SymbolMap = normalized.symbolMap; +export const SymbolList = normalized.symbolList; + +const sortedSymbols = SymbolList.map(s => s[0]).sort((a, b) => { + if (a.length === b.length) return (a < b) as any; + return a.length - b.length; +}); +export default [...sortedSymbols, ...NamedArgs.map(a => ':' + a), ...Constants]; \ No newline at end of file diff --git a/src/mol-script/parsers/parser.ts b/src/mol-script/parsers/parser.ts new file mode 100644 index 000000000..122b2e844 --- /dev/null +++ b/src/mol-script/parsers/parser.ts @@ -0,0 +1,12 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexanderose@weirdbyte.de> + */ + +import Expression from '../expression' + +type Parser = (source: string) => Expression + +export default Parser \ No newline at end of file diff --git a/src/mol-script/symbol-table.ts b/src/mol-script/symbol-table.ts new file mode 100644 index 000000000..7a12afc44 --- /dev/null +++ b/src/mol-script/symbol-table.ts @@ -0,0 +1,24 @@ +/** + * Copyright (c) 2017 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import core from './symbol-table/core' +import structureQuery from './symbol-table/structure-query' +import { normalizeTable, symbolList } from './helpers' +import Symbol from './symbol' + +const table = { core, structureQuery }; + +normalizeTable(table); + +export const SymbolList = symbolList(table); + +export const SymbolMap = (function() { + const map: { [id: string]: Symbol | undefined } = Object.create(null); + for (const s of SymbolList) map[s.id] = s; + return map; +})(); + +export default table \ No newline at end of file diff --git a/src/mol-util/mask.ts b/src/mol-util/mask.ts index 8b333231e..ff5c4a0c1 100644 --- a/src/mol-util/mask.ts +++ b/src/mol-util/mask.ts @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2017 MolQL contributors, licensed under MIT, See LICENSE file for more info. +/** + * Copyright (c) 2017 Mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> */ diff --git a/src/perf-tests/mol-script.ts b/src/perf-tests/mol-script.ts new file mode 100644 index 000000000..91bf4fa9d --- /dev/null +++ b/src/perf-tests/mol-script.ts @@ -0,0 +1,5 @@ +import Examples from 'mol-script/parsers/mol-script/examples' +import transpile from 'mol-script/parsers/mol-script/parser' + +const expr = transpile(Examples[Examples.length - 1].value); +console.log(expr); \ No newline at end of file -- GitLab