From 6e17f5bb30caada83a847f435f6051cd7931d40d Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Thu, 9 Aug 2018 11:02:03 +0200 Subject: [PATCH] wip mol-script --- src/mol-script/language/parser.ts | 178 +++++++++++++++++++++++ src/mol-script/runtime/query/compiler.ts | 4 +- src/perf-tests/mol-script.ts | 21 +-- 3 files changed, 191 insertions(+), 12 deletions(-) create mode 100644 src/mol-script/language/parser.ts diff --git a/src/mol-script/language/parser.ts b/src/mol-script/language/parser.ts new file mode 100644 index 000000000..906810e9d --- /dev/null +++ b/src/mol-script/language/parser.ts @@ -0,0 +1,178 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { MonadicParser as P } from 'mol-util/monadic-parser' +import Expression from './expression' +import { MolScriptBuilder as B } from './builder' + +export function parseMolScript(input: string) { + return Language.parse(input); +} + +namespace Language { + type AST = ASTNode.Expression[] + + namespace ASTNode { + export type Expression = Str | Symb | List | Comment + + export interface Str { + kind: 'string', + value: string + } + + export interface Symb { + kind: 'symbol', + value: string + } + + export interface List { + kind: 'list', + bracket: '(' | '[' | '{', + nodes: Expression[] + } + + export interface Comment { + kind: 'comment', + value: string + } + + export function str(value: string): Str { return { kind: 'string', value }; } + export function symb(value: string): Symb { return { kind: 'symbol', value }; } + export function list(bracket: '(' | '[' | '{', nodes: Expression[]): List { return { kind: 'list', bracket, nodes }; } + export function comment(value: string): Comment { return { kind: 'comment', value } } + } + + const ws = P.regexp(/[\n\r\s]*/); + const Expr: P<ASTNode.Expression> = P.lazy(() => (P.alt(Str, List, Symb, Comment).trim(ws))); + const Str = P.takeWhile(c => c !== '`').trim('`').map(ASTNode.str); + const Symb = P.regexp(/[^()\[\]{};`,\n\r\s]+/).map(ASTNode.symb); + const Comment = P.regexp(/\s*;+([^\n\r]*)\n/, 1).map(ASTNode.comment); + const Args = Expr.many(); + const List1 = Args.wrap('(', ')').map(args => ASTNode.list('(', args)); + const List2 = Args.wrap('[', ']').map(args => ASTNode.list('[', args)); + const List3 = Args.wrap('{', '}').map(args => ASTNode.list('{', args)); + const List = P.alt(List1, List2, List3); + + const Expressions: P<AST> = Expr.many(); + + function getAST(input: string) { return Expressions.tryParse(input); } + + function visitExpr(expr: ASTNode.Expression): Expression { + switch (expr.kind) { + case 'string': return expr.value; + case 'symbol': { + const value = expr.value; + if (value.length > 1) { + const fst = value.charAt(0); + switch (fst) { + case '.': return B.atomName(value.substr(1)); + case '_': return B.struct.type.elementSymbol([value.substr(1)]); + } + } + if (value === 'true') return true; + if (value === 'false') return false; + if (isNumber(value)) return +value; + return Expression.Symbol(value); + } + case 'list': { + switch (expr.bracket) { + case '[': return B.core.type.list(withoutComments(expr.nodes).map(visitExpr)); + case '{': return B.core.type.set(withoutComments(expr.nodes).map(visitExpr)); + case '(': { + const head = visitExpr(expr.nodes[0]); + return Expression.Apply(head, getArgs(expr.nodes)); + } + } + return 0 as any; + } + default: { + throw new Error('should not happen'); + } + } + } + + function getArgs(nodes: ASTNode.Expression[]): Expression.Arguments | undefined { + if (nodes.length <= 1) return void 0; + if (!hasNamedArgs(nodes)) { + const args: Expression[] = []; + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'comment') continue; + args[args.length] = visitExpr(n); + } + return args; + } + const args: { [name: string]: Expression } = {}; + let allNumeric = true; + let pos = 0; + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'comment') continue; + if (n.kind === 'symbol' && n.value.length > 1 && n.value.charAt(0) === ':') { + const name = n.value.substr(1); + ++i; + while (i < _i && nodes[i].kind === 'comment') { i++; } + if (i >= _i) throw new Error(`There must be a value foolowed a named arg ':${name}'.`); + args[name] = visitExpr(nodes[i]); + if (isNaN(+name)) allNumeric = false; + } else { + args[pos++] = visitExpr(n); + } + } + if (allNumeric) { + const keys = Object.keys(args).map(a => +a).sort((a, b) => a - b); + let isArray = true; + for (let i = 0, _i = keys.length; i < _i; i++) { + if (keys[i] !== i) { + isArray = false; + break; + } + } + if (isArray) { + const arrayArgs: Expression[] = []; + for (let i = 0, _i = keys.length; i < _i; i++) { + arrayArgs[i] = args[i]; + } + return arrayArgs; + } + } + return args; + } + + function hasNamedArgs(nodes: ASTNode.Expression[]) { + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'symbol' && n.value.length > 1 && n.value.charAt(0) === ':') return true; + } + return false; + } + + function withoutComments(nodes: ASTNode.Expression[]) { + let hasComment = false; + for (let i = 0, _i = nodes.length; i < _i; i++) { + if (nodes[i].kind === 'comment') { + hasComment = true; + break; + } + } + if (!hasComment) return nodes; + return nodes.filter(n => n.kind !== 'comment'); + } + + function isNumber(value: string) { + return /-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/.test(value); + } + + export function parse(input: string): Expression[] { + const ast = getAST(input); + const ret: Expression[] = []; + for (const expr of ast) { + if (expr.kind === 'comment') continue; + ret[ret.length] = visitExpr(expr); + } + return ret; + } +} \ No newline at end of file diff --git a/src/mol-script/runtime/query/compiler.ts b/src/mol-script/runtime/query/compiler.ts index aa3c6b652..14eb9fb7b 100644 --- a/src/mol-script/runtime/query/compiler.ts +++ b/src/mol-script/runtime/query/compiler.ts @@ -134,8 +134,8 @@ function _compile(ctx: QueryCompilerCtx, expression: Expression): CompiledQueryF } if (Expression.isSymbol(expression)) { - // TODO: is this ok in case of constants? - throw new Error('Cannot compile a symbol that is not applied.'); + // TODO: check for "nullary symbols" and automatically apply them? + return CompiledQueryFn.Const(expression.name); } if (!Expression.isSymbol(expression.head)) { diff --git a/src/perf-tests/mol-script.ts b/src/perf-tests/mol-script.ts index 5f6cfcc9e..dd0fcdbe0 100644 --- a/src/perf-tests/mol-script.ts +++ b/src/perf-tests/mol-script.ts @@ -4,24 +4,25 @@ import { QueryContext, Structure, StructureQuery, ModelPropertyDescriptor } from import { readCifFile, getModelsAndStructure } from '../apps/structure-info/model'; import { CustomPropSymbol } from 'mol-script/language/symbol'; import Type from 'mol-script/language/type'; +import { parseMolScript } from 'mol-script/language/parser'; +import * as util from 'util' // import Examples from 'mol-script/script/mol-script/examples' // import { parseMolScript } from 'mol-script/script/mol-script/parser' -// import * as util from 'util' // //import { compileAST } from 'mol-script/script/mol-script/compile'; // for (const e of Examples) { // const expr = parseMolScript(e.value)[0]; // console.log(e.name, util.inspect(expr, true, 10, true)); // } -// const exprs = parseMolScript(`(sel.atom.atom-groups -// :residue-test (= atom.auth_comp_id ALA) -// ;; ho ho ho -// :atom-test (set.has { _C _N } atom.el)) ; comm -// ;; this is a comment -// ((hi) (ho))`); +const exprs = parseMolScript(`(sel.atom.atom-groups + :residue-test (= atom.auth_comp_id ALA) + ;; ho ho ho + :atom-test (set.has { _C _N } atom.el)) ; comm + ;; this is a comment + ((hi) (ho))`); -// console.log(util.inspect(exprs, true, 10, true)); +console.log(util.inspect(exprs, true, 10, true)); // //console.log(expr); const expr = MolScriptBuilder.core.math.add([1, 2, 3]); @@ -44,7 +45,7 @@ const CustomProp = ModelPropertyDescriptor({ DefaultQueryRuntimeTable.addCustomProp(CustomProp); -async function testQ() { +export async function testQ() { const frame = await readCifFile('e:/test/quick/1cbs_updated.cif'); const { structure } = await getModelsAndStructure(frame); @@ -66,4 +67,4 @@ async function testQ() { console.log(result); } -testQ(); \ No newline at end of file +// testQ(); \ No newline at end of file -- GitLab