diff --git a/src/mol-script/compiler.ts b/src/mol-script/compiler.ts index 65af9dcfa9f87cb07fa7434e44db1b57903f29f2..fb62178c72fcd952fad80211949f6ab938536099 100644 --- a/src/mol-script/compiler.ts +++ b/src/mol-script/compiler.ts @@ -61,6 +61,11 @@ function compile(env: Environment, expression: Expression): CompileResult { return CompileResult.Const(expression); } + if (Expression.isSymbol(expression)) { + // TOTO: this needs to look up in the symbol table. + return 0 as any; + } + const head = compile(env, expression.head); if (!expression.args) { return apply(env, head, [], true); diff --git a/src/mol-script/expression-formatter.ts b/src/mol-script/expression-formatter.ts index 71e5ca6bfebdc638ff0210f0ab8d33ddf275826e..fda92b8c24a12a8c1fe715941f60960ee250db0b 100644 --- a/src/mol-script/expression-formatter.ts +++ b/src/mol-script/expression-formatter.ts @@ -6,7 +6,7 @@ import Expression from './expression' -const { isLiteral, isArgumentsArray } = Expression; +const { isLiteral, isSymbol, isArgumentsArray } = Expression; export default function format(e: Expression) { const writer = new Writer(); @@ -76,6 +76,10 @@ function _format(e: Expression, writer: Writer) { else writer.append(`${e}`); return; } + if (isSymbol(e)) { + writer.append(`${e.name}`); + return; + } writer.push(); _format(e.head, writer); diff --git a/src/mol-script/expression.ts b/src/mol-script/expression.ts index c9ffdcb5b79b770be5b142be2a5a1adac3aeaaa2..5a5d41e4b1376103b415a1e6332f54cfa1183514 100644 --- a/src/mol-script/expression.ts +++ b/src/mol-script/expression.ts @@ -6,19 +6,23 @@ type Expression = | Expression.Literal + | Expression.Symbol | Expression.Apply namespace Expression { export type Literal = string | number | boolean + export type Symbol = { kind: 'symbol', name: string } export type Arguments = Expression[] | { [name: string]: Expression } export interface Apply { readonly head: Expression, readonly args?: Arguments } + export function Symbol(name: string): Symbol { return { kind: 'symbol', name }; } export function Apply(head: Expression, args?: Arguments): Apply { return args ? { head, args } : { head }; } export function isArgumentsArray(e: Arguments): e is Expression[] { return e instanceof Array; } export function isArgumentsMap(e: Arguments): e is { [name: string]: Expression } { return !(e instanceof Array); } export function isLiteral(e: Expression): e is Expression.Literal { return !isApply(e); } - export function isApply(e: Expression): e is Expression.Apply { return !! e && !!(e as Expression.Apply).head && typeof e === 'object'; } + export function isApply(e: Expression): e is Expression.Apply { return !!e && !!(e as Expression.Apply).head && typeof e === 'object'; } + export function isSymbol(e: Expression): e is Expression.Symbol { return !!e && (e as any).kind === 'symbol' } } export default Expression \ No newline at end of file diff --git a/src/mol-script/script/mol-script/compile.ts b/src/mol-script/script/mol-script/compile.ts deleted file mode 100644 index 910ee70a24988fab6fe9edf060fb5fa4628e3053..0000000000000000000000000000000000000000 --- a/src/mol-script/script/mol-script/compile.ts +++ /dev/null @@ -1 +0,0 @@ -// TODO: compilation step from lisp AST \ No newline at end of file diff --git a/src/mol-script/script/mol-script/parser.ts b/src/mol-script/script/mol-script/parser.ts index 3ed9e3a94421265d71694fc0a1a24b0ec01a8fe2..03a494f6391fb64dcb1598dee0eeae237fa19356 100644 --- a/src/mol-script/script/mol-script/parser.ts +++ b/src/mol-script/script/mol-script/parser.ts @@ -1,122 +1,178 @@ /** * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. * - * @author Alexander Rose <alexanderose@weirdbyte.de> * @author David Sehnal <david.sehnal@gmail.com> */ -// TODO: add "lisp AST" which is then compiled to mol-script - import { MonadicParser as P } from 'mol-util/monadic-parser' - -import Parser from '../parser' import Expression from '../../expression' -// import { MolScriptSymbol } from './symbols' import B from '../../builder' -const ws = P.regexp(/[\n\r\s]*/) - -// function getSymbolExpression(s: MolScriptSymbol, args?: any) { -// switch (s.kind) { -// case 'alias': return args ? Expression.Apply(s.symbol.id, args) : Expression.Apply(s.symbol.id); -// case 'macro': return s.translate(args); -// } -// } - -namespace Language { - const Expr = P.lazy(() => P.seq(Identifier, ArgList, NamedArgList)); - - const Arg: P<Expression> = P.lazy(() => P.seq( - P.lookahead(P.test(ch => ch !== ':')), - P.alt( - // order matters - AtomName, - ElementSymbol, - Bool, - Num, - Identifier, - QuotedStr, - ListSymbol, - SetSymbol, - List - ) - ).map((x: any) => x[1]).trim(ws)); - - const ArgList = Arg.many(); - const ArgName = P.regexp(/:([a-zA-Z0-9_.-]+)/, 1).trim(ws).desc('arg-name'); - - const NamedArg = P.seq(ArgName, Arg).trim(ws); - - const NamedArgList = NamedArg.many().map(xs => { - const namedArgs: { [key: string]: any } = {} - xs.forEach((a: any) => { namedArgs[a[0]] = a[1] }) - return namedArgs - }); - - const Identifier = P.regexp(/[^\s'`,@()\[\]{}';:]+/) - //.map(id => Expression.Apply(B.core.type.identifier.id, void 0)) - .desc('identifier') - - const QuotedStr = P.regexp(/[^`]*/).trim(P.string('`')) - .desc('quoted-string'); - - const Num = P.regexp(/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/) - .map(v => +v) - .desc('number'); - - const Bool = P.alt( - P.regexp(/true/i).result(true), - P.regexp(/false/i).result(false) - ).desc('boolean'); - - // '[a, b, c]' => core.list([a, b, c]) - const ListSymbol = ArgList - .wrap(P.string('['), P.string(']')) - .map(B.core.type.list) - .desc('list-symbol'); - - // '{a, b, c}' => core.set([a, b, c]) - const SetSymbol = ArgList - .wrap(P.string('{'), P.string('}')) - .map(B.core.type.set) - .desc('set-symbol'); - - // _XYZ -> type.elementSymbol XYZ - const ElementSymbol = P.string('_') - .then(P.regexp(/[0-9a-zA-Z]+/)) - .map(x => B.struct.type.elementSymbol([x])) - .desc('element-symbol'); - - // '.e' => struct.type.atomName(e) - const AtomName = P.string('.') - .then(P.alt(Identifier, QuotedStr, Num)) - .map(v => B.atomName('' + v)) - .desc('identifier'); - - const List = Expr - .wrap(P.string('('), P.string(')')) - .map(x => { - // const array: any[] = x[1]; - // const named: any = x[2]; - - return 0 as any; - - // if (named && Object.keys(named).length) { - // if (array) { - // for (let i = 0; i < array.length; i++) named[i] = array[i]; - // } - // return getSymbolExpression(x[0], named); - // } else if (array && array.length) { - // return getSymbolExpression(x[0], x[1]); - // } else { - // return getSymbolExpression(x[0]) - // } - }) - .desc('list'); - - export const Query = List.trim(ws) +export function parseMolScript(input: string) { + return Language.parse(input); } -const reComment = /;[^\n\r]*[\n\r]/g -const transpiler: Parser = str => Language.Query.tryParse(str.replace(reComment, '\n')) -export default transpiler +namespace Language { + type AST = ASTNode.Expression[] + + namespace ASTNode { + export type Expression = Str | Symb | List | Comment + + export interface Str { + kind: 'string', + value: string + } + + export interface Symb { + kind: 'symbol', + value: string + } + + export interface List { + kind: 'list', + bracket: '(' | '[' | '{', + nodes: Expression[] + } + + export interface Comment { + kind: 'comment', + value: string + } + + export function str(value: string): Str { return { kind: 'string', value }; } + export function symb(value: string): Symb { return { kind: 'symbol', value }; } + export function list(bracket: '(' | '[' | '{', nodes: Expression[]): List { return { kind: 'list', bracket, nodes }; } + export function comment(value: string): Comment { return { kind: 'comment', value } } + } + + const ws = P.regexp(/[\n\r\s]*/); + const Expr: P<ASTNode.Expression> = P.lazy(() => (P.alt(Str, List, Symb, Comment).trim(ws))); + const Str = P.takeWhile(c => c !== '`').trim('`').map(ASTNode.str); + const Symb = P.regexp(/[^()\[\]{};`,\n\r\s]+/).map(ASTNode.symb); + const Comment = P.regexp(/\s*;+([^\n\r]*)\n/, 1).map(ASTNode.comment); + const Args = Expr.many(); + const List1 = Args.wrap('(', ')').map(args => ASTNode.list('(', args)); + const List2 = Args.wrap('[', ']').map(args => ASTNode.list('[', args)); + const List3 = Args.wrap('{', '}').map(args => ASTNode.list('{', args)); + const List = P.alt(List1, List2, List3); + + const Expressions: P<AST> = Expr.many(); + + function getAST(input: string) { return Expressions.tryParse(input); } + + function visitExpr(expr: ASTNode.Expression): Expression { + switch (expr.kind) { + case 'string': return expr.value; + case 'symbol': { + const value = expr.value; + if (value.length > 1) { + const fst = value.charAt(0); + switch (fst) { + case '.': return B.atomName(value.substr(1)); + case '_': return B.struct.type.elementSymbol([value.substr(1)]); + } + } + if (value === 'true') return true; + if (value === 'false') return false; + if (isNumber(value)) return +value; + return Expression.Symbol(value); + } + case 'list': { + switch (expr.bracket) { + case '[': return B.core.type.list(withoutComments(expr.nodes).map(visitExpr)); + case '{': return B.core.type.set(withoutComments(expr.nodes).map(visitExpr)); + case '(': { + const head = visitExpr(expr.nodes[0]); + return Expression.Apply(head, getArgs(expr.nodes)); + } + } + return 0 as any; + } + default: { + throw new Error('should not happen'); + } + } + } + + function getArgs(nodes: ASTNode.Expression[]): Expression.Arguments | undefined { + if (nodes.length <= 1) return void 0; + if (!hasNamedArgs(nodes)) { + const args: Expression[] = []; + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'comment') continue; + args[args.length] = visitExpr(n); + } + return args; + } + const args: { [name: string]: Expression } = {}; + let allNumeric = true; + let pos = 0; + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'comment') continue; + if (n.kind === 'symbol' && n.value.length > 1 && n.value.charAt(0) === ':') { + const name = n.value.substr(1); + ++i; + while (i < _i && nodes[i].kind === 'comment') { i++; } + if (i >= _i) throw new Error(`There must be a value foolowed a named arg ':${name}'.`); + args[name] = visitExpr(nodes[i]); + if (isNaN(+name)) allNumeric = false; + } else { + args[pos++] = visitExpr(n); + } + } + if (allNumeric) { + const keys = Object.keys(args).map(a => +a).sort((a, b) => a - b); + let isArray = true; + for (let i = 0, _i = keys.length; i < _i; i++) { + if (keys[i] !== i) { + isArray = false; + break; + } + } + if (isArray) { + const arrayArgs: Expression[] = []; + for (let i = 0, _i = keys.length; i < _i; i++) { + arrayArgs[i] = args[i]; + } + return arrayArgs; + } + } + return args; + } + + function hasNamedArgs(nodes: ASTNode.Expression[]) { + for (let i = 1, _i = nodes.length; i < _i; i++) { + const n = nodes[i]; + if (n.kind === 'symbol' && n.value.length > 1 && n.value.charAt(0) === ':') return true; + } + return false; + } + + function withoutComments(nodes: ASTNode.Expression[]) { + let hasComment = false; + for (let i = 0, _i = nodes.length; i < _i; i++) { + if (nodes[i].kind === 'comment') { + hasComment = true; + break; + } + } + if (!hasComment) return nodes; + return nodes.filter(n => n.kind !== 'comment'); + } + + function isNumber(value: string) { + return /-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/.test(value); + } + + export function parse(input: string): Expression[] { + const ast = getAST(input); + const ret: Expression[] = []; + for (const expr of ast) { + if (expr.kind === 'comment') continue; + ret[ret.length] = visitExpr(expr); + } + return ret; + } +} \ No newline at end of file diff --git a/src/mol-script/symbol.ts b/src/mol-script/symbol.ts index 34630be3fa2e936aef935d3ac68e19c024628c0b..d79b6764b64a2045c4aae8d8bf8616f1405ef794 100644 --- a/src/mol-script/symbol.ts +++ b/src/mol-script/symbol.ts @@ -64,7 +64,7 @@ interface Symbol<A extends Arguments = Arguments, T extends Type = Type> { function Symbol<A extends Arguments, T extends Type>(name: string, args: A, type: T, description?: string) { const symbol: Symbol<A, T> = function(args: ExpressionArguments<A['@type']>) { - return Expression.Apply(symbol.id, args as any); + return Expression.Apply(Expression.Symbol(symbol.id), args as any); } as any; symbol.info = { namespace: '', name, description }; symbol.id = ''; diff --git a/src/mol-util/monadic-parser.ts b/src/mol-util/monadic-parser.ts index e5eba1fd56c664dbaefc963df5eee38db8033993..9438c0782b6713496db8058ef83e0c45f42f85e6 100644 --- a/src/mol-util/monadic-parser.ts +++ b/src/mol-util/monadic-parser.ts @@ -35,12 +35,15 @@ export class MonadicParser<A> { return MonadicParser.alt(this, alternative); } - trim<B>(parser: MonadicParser<B>): MonadicParser<A> { + trim<B>(parser: MonadicParser<B> | string): MonadicParser<A> { return this.wrap(parser, parser); } - wrap<L, R>(leftParser: MonadicParser<L>, rightParser: MonadicParser<R>): MonadicParser<A> { - return seqPick(1, leftParser, this, rightParser); + wrap<L, R>(leftParser: MonadicParser<L> | string, rightParser: MonadicParser<R> | string): MonadicParser<A> { + return seqPick(1, + typeof leftParser === 'string' ? MonadicParser.string(leftParser) : leftParser, + this, + typeof rightParser === 'string' ? MonadicParser.string(rightParser) : rightParser); } thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) { diff --git a/src/perf-tests/mol-script.ts b/src/perf-tests/mol-script.ts index 0f55c7883004a2e9e1d4afaa33f97f1c425af6b3..3b488f5f59bd9bc46e52b174c75213dfaa6ebae5 100644 --- a/src/perf-tests/mol-script.ts +++ b/src/perf-tests/mol-script.ts @@ -1,5 +1,18 @@ import Examples from 'mol-script/script/mol-script/examples' -import parse from 'mol-script/script/mol-script/parser' +import { parseMolScript } from 'mol-script/script/mol-script/parser' +import * as util from 'util' +//import { compileAST } from 'mol-script/script/mol-script/compile'; -const expr = parse(Examples[Examples.length - 1].value); -console.log(expr); \ No newline at end of file +for (const e of Examples) { + const expr = parseMolScript(e.value)[0]; + console.log(e.name, util.inspect(expr, true, 10, true)); +} +// const exprs = parseMolScript(`(sel.atom.atom-groups +// :residue-test (= atom.auth_comp_id ALA) +// ;; ho ho ho +// :atom-test (set.has { _C _N } atom.el)) ; comm +// ;; this is a comment +// ((hi) (ho))`); + +// console.log(util.inspect(exprs, true, 10, true)); +// //console.log(expr); \ No newline at end of file