diff --git a/src/mol-script/core-symbols.ts b/src/mol-script/symbol-table/core.ts similarity index 97% rename from src/mol-script/core-symbols.ts rename to src/mol-script/symbol-table/core.ts index 67dd55ca4c4c077738192a299eefefa5aab20b79..6c20d0c00887fd5be338aa2a485cad9c7f9e1a59 100644 --- a/src/mol-script/core-symbols.ts +++ b/src/mol-script/symbol-table/core.ts @@ -1,12 +1,12 @@ -/* +/** * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> */ -import Type from './type' -import Symbol, { Arguments, Argument } from './symbol' -import { symbol, normalizeTable, symbolList } from './helpers' +import Type from '../type' +import Symbol, { Arguments, Argument } from '../symbol' +import { symbol, normalizeTable, symbolList } from '../helpers' export namespace Types { export type List<T = any> = ArrayLike<T> diff --git a/src/mol-script/symbol-table/structure-query.ts b/src/mol-script/symbol-table/structure-query.ts new file mode 100644 index 0000000000000000000000000000000000000000..5ce9110e49611b5518af323c6e5896c340fe2b06 --- /dev/null +++ b/src/mol-script/symbol-table/structure-query.ts @@ -0,0 +1,320 @@ +/** + * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Type from '../type' +import * as Core from './core' +import { Arguments, Argument } from '../symbol' +import { symbol } from '../helpers' + +export namespace Types { + export const ElementSymbol = Type.Value('Structure', 'ElementSymbol'); + export const AtomName = Type.Value('Structure', 'AtomName'); + + export const BondFlag = Type.OneOf('Structure', 'BondFlag', Type.Str, ['covalent', 'metallic', 'ion', 'hydrogen', 'sulfide', 'computed', 'aromatic']); + export const BondFlags = Core.Types.Flags(BondFlag, 'BondFlags'); + + export const SecondaryStructureFlag = Type.OneOf('Structure', 'SecondaryStructureFlag', Type.Str, ['alpha', 'beta', '3-10', 'pi', 'sheet', 'strand', 'helix', 'turn', 'none']); + export const SecondaryStructureFlags = Core.Types.Flags(SecondaryStructureFlag, 'SecondaryStructureFlag'); + + export const RingFingerprint = Type.Value('Structure', 'RingFingerprint'); + export const EntityType = Type.OneOf('Structure', 'EntityType', Type.Str, ['polymer', 'non-polymer', 'water', 'unknown']); + export const ResidueId = Type.Value('Structure', 'ResidueId'); + + export const ElementSet = Type.Value('Structure', 'ElementSet'); + export const ElementSelection = Type.Value('Structure', 'ElementSelection'); + export const ElementReference = Type.Value('Structure', 'ElementReference'); + + export const ElementSelectionQuery = Core.Types.Fn(ElementSelection, 'ElementSelectionQuery'); +} + +const type = { + '@header': 'Types', + elementSymbol: symbol( + Arguments.Dictionary({ 0: Argument(Type.Str) }), + Types.ElementSymbol, 'Create element symbol representation from a string value.'), + + atomName: symbol( + Arguments.Dictionary({ 0: Argument(Type.AnyValue) }), Types.AtomName, 'Convert a value to an atom name.'), + + entityType: symbol( + Arguments.Dictionary({ 0: Argument(Types.EntityType) }), + Types.EntityType, + `Create normalized representation of entity type: ${Type.oneOfValues(Types.EntityType).join(', ')}.`), + + bondFlags: symbol( + Arguments.List(Types.BondFlag), + Types.BondFlags, + `Create bond flags representation from a list of strings. Allowed flags: ${Type.oneOfValues(Types.BondFlag).join(', ')}.`), + + ringFingerprint: symbol( + Arguments.List(Types.ElementSymbol, { nonEmpty: true }), + Types.RingFingerprint, + 'Create ring fingerprint from the supplied atom element list.'), + + secondaryStructureFlags: symbol( + Arguments.List(Types.SecondaryStructureFlag), + Types.SecondaryStructureFlags, + `Create secondary structure flags representation from a list of strings. Allowed flags: ${Type.oneOfValues(Types.SecondaryStructureFlag).join(', ')}.`), + + authResidueId: symbol(Arguments.Dictionary({ + 0: Argument(Type.Str, { description: 'auth_asym_id' }), + 1: Argument(Type.Num, { description: 'auth_seq_id' }), + 2: Argument(Type.Str, { description: 'pdbx_PDB_ins_code', isOptional: true }) + }), Types.ResidueId, `Residue identifier based on "auth_" annotation.`), + labelResidueId: symbol(Arguments.Dictionary({ + 0: Argument(Type.Str, { description: 'label_entity_id' }), + 1: Argument(Type.Str, { description: 'label_asym_id' }), + 2: Argument(Type.Num, { description: 'label_seq_id' }), + 3: Argument(Type.Str, { description: 'pdbx_PDB_ins_code', isOptional: true }) + }), Types.ResidueId, `Residue identifier based on mmCIF's "label_" annotation.`) +}; + +const slot = { + '@header': 'Iteration Slots', + element: symbol(Arguments.None, Types.ElementReference, 'A reference to the current element.'), + elementSetReduce: symbol(Arguments.None, Type.Variable('a', Type.AnyValue, true), 'Current value of the element set reducer.') +} + +const generator = { + '@header': 'Generators', + all: symbol(Arguments.None, Types.ElementSelectionQuery, 'The entire structure.'), + + atomGroups: symbol(Arguments.Dictionary({ + 'entity-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom of every entity' }), + 'chain-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom of every chain' }), + 'residue-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom every residue' }), + 'atom-test': Argument(Type.Bool, { isOptional: true, defaultValue: true }), + 'group-by': Argument(Type.Any, { isOptional: true, defaultValue: `atom-key`, description: 'Group atoms to sets based on this property. Default: each atom has its own set' }), + }), Types.ElementSelectionQuery, 'Return all atoms for which the tests are satisfied, grouped into sets.'), + + rings: symbol(Arguments.List(Types.RingFingerprint), Types.ElementSelectionQuery, 'Return rings with the specified fingerprint(s). If no fingerprints are given, return all rings.'), + + queryInSelection: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + query: Argument(Types.ElementSelectionQuery), + 'in-complement': Argument(Type.Bool, { isOptional: true, defaultValue: false }) + }), Types.ElementSelectionQuery, 'Executes query only on atoms that are in the source selection.'), + + empty: symbol(Arguments.None, Types.ElementSelectionQuery, 'Nada.'), +} + +const modifier = { + '@header': 'Selection Modifications', + + queryEach: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + query: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, 'Query every atom set in the input selection separately.'), + + intersectBy: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + by: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, 'Intersect each atom set from the first sequence from atoms in the second one.'), + + exceptBy: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + by: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, `Remove all atoms from 'selection' that occur in 'by'.`), + + unionBy: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + by: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, 'For each atom set A in the orginal sequence, combine all atoms sets in the target selection that intersect with A.'), + + union: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, 'Collects all atom sets in the sequence into a single atom set.'), + + cluster: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + 'min-distance': Argument(Type.Num, { isOptional: true, defaultValue: 0 }), + 'max-distance': Argument(Type.Num), + 'min-size': Argument(Type.Num, { description: 'Minimal number of sets to merge, must be at least 2', isOptional: true, defaultValue: 2 }), + 'max-size': Argument(Type.Num, { description: 'Maximal number of sets to merge, if not set, no limit', isOptional: true }), + }), Types.ElementSelectionQuery, 'Combines atom sets that have mutual distance in the interval [min-radius, max-radius]. Minimum/maximum size determines how many atom sets can be combined.'), + + includeSurroundings: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + radius: Argument(Type.Num), + 'atom-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0, description: 'Value added to each atom before the distance check, for example VDW radius. Using this argument is computationally demanding.' }), + 'as-whole-residues': Argument(Type.Bool, { isOptional: true }) + }), Types.ElementSelectionQuery, 'For each atom set in the selection, include all surrouding atoms/residues that are within the specified radius.'), + + includeConnected: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + 'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }), + 'layer-count': Argument(Type.Num, { isOptional: true, defaultValue: 1, description: 'Number of bonded layers to include.' }), + 'as-whole-residues': Argument(Type.Bool, { isOptional: true }) + }), Types.ElementSelectionQuery, 'Pick all atom sets that are connected to the target.'), + + expandProperty: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + property: Argument(Type.AnyValue) + }), Types.ElementSelectionQuery, 'To each atom set in the selection, add all atoms that have the same property value that was already present in the set.') +} + +const filter = { + '@header': 'Selection Filters', + pick: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + test: Argument(Type.Bool) + }), Types.ElementSelectionQuery, 'Pick all atom sets that satisfy the test.'), + + withSameAtomProperties: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + source: Argument(Types.ElementSelectionQuery), + property: Argument(Type.Any) + }), Types.ElementSelectionQuery, 'Pick all atom sets for which the set of given atom properties is a subset of the source properties.'), + + intersectedBy: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + by: Argument(Types.ElementSelectionQuery) + }), Types.ElementSelectionQuery, 'Pick all atom sets that have non-zero intersection with the target.'), + + within: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + target: Argument(Types.ElementSelectionQuery), + 'min-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0 }), + 'max-radius': Argument(Type.Num), + 'atom-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0, description: 'Value added to each atom before the distance check, for example VDW radius. Using this argument is computationally demanding.' }), + invert: Argument(Type.Bool, { isOptional: true, defaultValue: false, description: 'If true, pick only atom sets that are further than the specified radius.' }), + }), Types.ElementSelectionQuery, 'Pick all atom sets from selection that have any atom within the radius of any atom from target.'), + + isConnectedTo: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery), + target: Argument(Types.ElementSelectionQuery), + 'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }), + disjunct: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'If true, there must exist a bond to an atom that lies outside the given atom set to pass test.' }), + invert: Argument(Type.Bool, { isOptional: true, defaultValue: false, description: 'If true, return atom sets that are not connected.' }) + }), Types.ElementSelectionQuery, 'Pick all atom sets that are connected to the target.'), +} + +const combinator = { + '@header': 'Selection Combinators', + intersect: symbol(Arguments.List(Types.ElementSelectionQuery), Types.ElementSelectionQuery, 'Return all unique atom sets that appear in all of the source selections.'), + merge: symbol(Arguments.List(Types.ElementSelectionQuery), Types.ElementSelectionQuery, 'Merges multiple selections into a single one. Only unique atom sets are kept.'), + distanceCluster: symbol(Arguments.Dictionary({ + matrix: Argument(Core.Types.List(Core.Types.List(Type.Num)), { description: 'Distance matrix, represented as list of rows (num[][])). Lower triangle is min distance, upper triangle is max distance.' }), + selections: Argument(Core.Types.List(Types.ElementSelectionQuery), { description: 'A list of held selections.' }) + }), Types.ElementSelectionQuery, 'Pick combinations of atom sets from the source sequences that are mutually within distances specified by a matrix.') +} + +const atomSet = { + '@header': 'Atom Sets', + + atomCount: symbol(Arguments.None, Type.Num), + + countQuery: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementSelectionQuery) + }), Type.Num, 'Counts the number of occurences of a specific query inside the current atom set.'), + + reduce: symbol(Arguments.Dictionary({ + initial: Argument(Type.Variable('a', Type.AnyValue, true), { description: 'Initial value assigned to slot.atom-set-reduce. Current atom is set to the 1st atom of the current set for this.' }), + value: Argument(Type.Variable('a', Type.AnyValue, true), { description: 'Expression executed for each atom in the set' }) + }), Type.Variable('a', Type.AnyValue, true), 'Execute the value expression for each atom in the current atom set and return the result. Works the same way as Array.reduce in JavaScript (``result = value(value(...value(initial)))``)'), + + propertySet: symbol(Arguments.Dictionary({ + 0: Argument(Core.Types.ConstrainedVar), + }), Core.Types.Set(Core.Types.ConstrainedVar), 'Returns a set with all values of the given property in the current atom set.'), +} + +const atomProperty = { + '@header': 'Atom Properties', + + core: { + '@header': 'Core Properties', + + elementSymbol: atomProp(Types.ElementSymbol), + + vdw: atomProp(Type.Num, 'Van der Waals radius'), + mass: atomProp(Type.Num, 'Atomic weight'), + atomicNumber: atomProp(Type.Num, 'Atomic number'), + + x: atomProp(Type.Num, 'Cartesian X coordinate'), + y: atomProp(Type.Num, 'Cartesian Y coordinate'), + z: atomProp(Type.Num, 'Cartesian Z coordinate'), + + atomKey: atomProp(Type.AnyValue, 'Unique value for each atom. Main use case is grouping of atoms.'), + + bondCount: symbol(Arguments.Dictionary({ + 0: Argument(Types.ElementReference, { isOptional: true, defaultValue: 'slot.current-atom' }), + flags: Argument(Types.BondFlags, { isOptional: true, defaultValue: 'covalent' as any }), + }), Type.Num, 'Number of bonds (by default only covalent bonds are counted).') + }, + + topology: { + connectedComponentKey: atomProp(Type.AnyValue, 'Unique value for each connected component.') + }, + + macromolecular: { + '@header': 'Macromolecular Properties (derived from the mmCIF format)', + + authResidueId: atomProp(Types.ResidueId, `type.auth-residue-id symbol executed on current atom's residue`), + labelResidueId: atomProp(Types.ResidueId, `type.label-residue-id symbol executed on current atom's residue`), + + residueKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``(label_entity_id,auth_asym_id,auth_seq_id,pdbx_PDB_ins_code)``, main use case is grouping of atoms'), + chainKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``(label_entity_id,auth_asym_id)``, main use case is grouping of atoms'), + entityKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``label_entity_id``, main use case is grouping of atoms'), + + isHet: atomProp(Type.Bool, 'Equivalent to atom_site.group_PDB !== ATOM'), + + id: atomProp(Type.Num, '_atom_site.id'), + + label_atom_id: atomProp(Types.AtomName), + label_alt_id: atomProp(Type.Str), + label_comp_id: atomProp(Type.Str), + label_asym_id: atomProp(Type.Str), + label_entity_id: atomProp(Type.Str), + label_seq_id: atomProp(Type.Num), + + auth_atom_id: atomProp(Types.AtomName), + auth_comp_id: atomProp(Type.Str), + auth_asym_id: atomProp(Type.Str), + auth_seq_id: atomProp(Type.Num), + + pdbx_PDB_ins_code: atomProp(Type.Str), + pdbx_formal_charge: atomProp(Type.Num), + + occupancy: atomProp(Type.Num), + B_iso_or_equiv: atomProp(Type.Num), + + entityType: atomProp(Types.EntityType, 'Type of the entity as defined in mmCIF (polymer, non-polymer, water, unknown)'), + + secondaryStructureKey: atomProp(Type.AnyValue, 'Unique value for each secondary structure element.'), + secondaryStructureFlags: atomProp(Types.SecondaryStructureFlags), + + isModified: atomProp(Type.Bool, 'True if the atom bolongs to modification of a standard residue.'), + modifiedParentName: atomProp(Type.Str, `'3-letter' code of the modifed parent residue.`), + } +} + +const bondProperty = { + '@header': 'Bond Properties', + + flags: bondProp(Types.BondFlags), + order: bondProp(Type.Num) +} + +function atomProp(type: Type, description?: string) { + return symbol(Arguments.Dictionary({ 0: Argument(Types.ElementReference, { isOptional: true, defaultValue: 'slot.current-atom' }) }), type, description); +} + +function bondProp(type: Type, description?: string) { + return symbol(Arguments.None, type, description); +} + +export default { + '@header': 'Structure Queries', + type, + slot, + generator, + modifier, + filter, + combinator, + atomSet, + atomProperty, + bondProperty +} \ No newline at end of file diff --git a/src/mol-util/_spec/monadic-parser.spec.ts b/src/mol-util/_spec/monadic-parser.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..4268d627c1c2e3b12251d338327ecd65f5beb2cc --- /dev/null +++ b/src/mol-util/_spec/monadic-parser.spec.ts @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { MonadicParser as P } from '../monadic-parser' + +describe('parser', () => { + it('string', () => { + const p = P.string('abc'); + expect(p.parse('abc').success).toBe(true); + expect(p.parse('cabc').success).toBe(false); + }); + + it('alt', () => { + const p = P.alt(P.string('abc'), P.string('123')); + expect(p.parse('abc').success).toBe(true); + expect(p.parse('123').success).toBe(true); + expect(p.parse('123a').success).toBe(false); + }); + + it('trim', () => { + const p = P.string('abc').trim(P.whitespace); + expect(p.tryParse(' abc ')).toBe('abc'); + }); + + it('wrap', () => { + const p = P.string('abc').wrap(P.string('('), P.string(')')); + expect(p.tryParse('(abc)')).toBe('abc'); + }); + + it('then', () => { + const p = P.string('abc').then(P.string('123')); + expect(p.tryParse('abc123')).toBe('123'); + }); + + it('many', () => { + const p = P.string('1').many(); + expect(p.tryParse('111')).toEqual(['1', '1', '1']); + }); + + it('times', () => { + const p = P.string('1').times(2); + expect(p.tryParse('11')).toEqual(['1', '1']); + }); + + it('sepBy', () => { + const p = P.sepBy(P.digits, P.string(',')).map(xs => xs.map(x => +x)); + expect(p.tryParse('1,2,3,4')).toEqual([1, 2, 3, 4]); + }); +}); \ No newline at end of file diff --git a/src/mol-util/monadic-parser.ts b/src/mol-util/monadic-parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..b4e8c1d636f6a8a13b329e8acc7c87750d329926 --- /dev/null +++ b/src/mol-util/monadic-parser.ts @@ -0,0 +1,560 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +/** + * Adapted from Parsimmon (https://github.com/jneen/parsimmon) + * Copyright (c) 2011-present J. Adkisson (http://jneen.net). + */ + +export class MonadicParser<A> { + constructor(public _: MonadicParser.Action<A>) { } + + parse(input: string): MonadicParser.ParseResult<A> { + const result = this.skip(MonadicParser.eof)._(input, 0); + if (result.status) { + return { success: true, value: result.value }; + } + return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected }; + }; + + tryParse(str: string) { + const result = this.parse(str); + if (result.success) { + return result.value; + } else { + const msg = formatError(str, result); + const err = new Error(msg); + throw err; + } + } + + or<B>(alternative: MonadicParser<B>): MonadicParser<A | B> { + return MonadicParser.alt(this, alternative); + } + + trim<B>(parser: MonadicParser<B>): MonadicParser<A> { + return this.wrap(parser, parser); + } + + wrap<L, R>(leftParser: MonadicParser<L>, rightParser: MonadicParser<R>): MonadicParser<A> { + return seqPick(1, leftParser, this, rightParser); + } + + thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) { + return wrapper(this); + } + + then<B>(next: MonadicParser<B>): MonadicParser<B> { + return seqPick(1, this, next); + } + + many() { + return new MonadicParser((input, i) => { + const accum: A[] = []; + let result: MonadicParser.Result<A> | undefined = void 0; + + while (true) { + result = mergeReplies(this._(input, i), result); + if (result.status) { + if (i === result.index) { + throw new Error('infinite loop detected in .many() parser --- calling .many() on a parser which can accept zero characters is usually the cause'); + } + i = result.index; + accum.push(result.value); + } else { + return mergeReplies(makeSuccess(i, accum), result); + } + } + }); + }; + + times(min: number, _max?: number): MonadicParser<A[]> { + const max = typeof _max === 'undefined' ? min : _max; + return new MonadicParser((input, i) => { + const accum: A[] = []; + let result: MonadicParser.Result<A> | undefined = void 0; + let prevResult: MonadicParser.Result<A> | undefined = void 0; + let times: number; + for (times = 0; times < min; times++) { + result = this._(input, i); + prevResult = mergeReplies(result, prevResult); + if (result.status) { + i = result.index; + accum.push(result.value); + } else { + return prevResult as any; + } + } + for (; times < max; times += 1) { + result = this._(input, i); + prevResult = mergeReplies(result, prevResult); + if (result.status) { + i = result.index; + accum.push(result.value); + } else { + break; + } + } + return mergeReplies(makeSuccess(i, accum), prevResult); + }); + }; + + result<B>(res: B) { + return this.map(() => res); + }; + + atMost(n: number) { + return this.times(0, n); + }; + + atLeast(n: number) { + return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]); + }; + + map<B>(f: (a: A) => B): MonadicParser<B> { + return new MonadicParser((input, i) => { + const result = this._(input, i); + if (!result.status) { + return result; + } + return mergeReplies(makeSuccess(result.index, f(result.value)), result); + }); + } + + skip<B>(next: MonadicParser<B>): MonadicParser<A> { + return seqPick(0, this, next); + } + + mark(): MonadicParser<MonadicParser.Mark<A>> { + return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ start: r[0], value: r[1], end: r[2] })); + } + + node(name: string): MonadicParser<MonadicParser.Node<A>> { + return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ name, start: r[0], value: r[1], end: r[2] })); + }; + + sepBy<B>(separator: MonadicParser<B>): MonadicParser<A[]> { + return MonadicParser.sepBy(this, separator); + } + + sepBy1<B>(separator: MonadicParser<B>): MonadicParser<A[]> { + return MonadicParser.sepBy1(this, separator); + } + + lookahead<B>(x: MonadicParser<B>) { + return this.skip(MonadicParser.lookahead(x)); + }; + + notFollowedBy<B>(x: MonadicParser<B>) { + return this.skip(MonadicParser.notFollowedBy(x)); + }; + + desc(expected: string) { + return new MonadicParser((input, i) => { + const reply = this._(input, i); + if (!reply.status) { + reply.expected = [expected]; + } + return reply; + }); + }; + + fallback<B>(result: B) { + return this.or(MonadicParser.succeed(result)); + }; + + ap<B>(other: MonadicParser<(x: A) => B>): MonadicParser<B> { + return MonadicParser.seq(other, this).map(([f, x]) => f(x)); + }; + + chain<B>(f: (a: A) => MonadicParser<B>): MonadicParser<B> { + return new MonadicParser<B>((input, i) => { + const result = this._(input, i); + if (!result.status) { + return result as any; + } + const nextParser = f(result.value); + return mergeReplies(nextParser._(input, result.index), result); + }); + }; +} + +export namespace MonadicParser { + export type Action<T> = (input: string, i: number) => MonadicParser.Result<T> + + export type ParseResult<T> = ParseSuccess<T> | ParseFailure; + + export interface Index { + /** zero-based character offset */ + offset: number; + /** one-based line offset */ + line: number; + /** one-based column offset */ + column: number; + } + + export interface ParseSuccess<T> { + success: true, + value: T + } + + export interface ParseFailure { + success: false, + index: Index, + expected: string[], + } + + export interface Mark<T> { + start: Index; + end: Index; + value: T; + } + + export interface Node<T> extends Mark<T> { + name: string + } + + export interface Success<T> { + status: true, + value: T, + index: number + } + + export interface Failure { + status: false, + furthest: number, + expected: string[] + } + + export type Result<T> = Success<T> | Failure + + // export function createLanguage(parsers: any) { + // const language: any = {}; + // for (const key of Object.keys(parsers)) { + // (function (key) { + // language[key] = lazy(() => parsers[key](language)); + // })(key); + // } + // return language; + // } + + export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]> + export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]> + export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]> + export function seq<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<[A, B, C, D]> + export function seq<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<[A, B, C, D, E]> + export function seq<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]> + export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> { + const numParsers = parsers.length; + return new MonadicParser<any[]>((input, index) => { + let result: MonadicParser.Result<any> | undefined; + let accum = new Array(numParsers); + let i = index; + for (let j = 0; j < numParsers; j++) { + result = mergeReplies(parsers[j]._(input, i), result); + if (!result.status) { + return result; + } + accum[j] = result.value; + i = result.index; + } + return mergeReplies(makeSuccess(i, accum), result); + }); + } + + export function alt<A>(a: MonadicParser<A>): MonadicParser<A> + export function alt<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<A | B> + export function alt<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<A | B | C> + export function alt<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<A | B | C | D> + export function alt<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<A | B | C | D | E> + export function alt<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]> + export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> { + const numParsers = parsers.length; + if (numParsers === 0) { + return fail('zero alternates'); + } + return new MonadicParser((input, i) => { + let result: MonadicParser.Result<any> | undefined; + for (let j = 0; j < parsers.length; j++) { + result = mergeReplies(parsers[j]._(input, i), result); + if (result.status) { + return result; + } + } + return result!; + }); + } + + export function sepBy<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>): MonadicParser<A[]> { + return sepBy1(parser, separator).or(succeed([])); + } + + export function sepBy1<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>) { + const pairs = separator.then(parser).many(); + return seq(parser, pairs).map(r => [r[0], ...r[1]]); + } + + export function string(str: string) { + const expected = `'${str}'`; + return new MonadicParser((input, i) => { + const j = i + str.length; + const head = input.slice(i, j); + if (head === str) { + return makeSuccess(j, head); + } else { + return makeFailure(i, expected); + } + }); + } + + function flags(re: RegExp) { + const s = '' + re; + return s.slice(s.lastIndexOf('/') + 1); + } + + function anchoredRegexp(re: RegExp) { + return RegExp('^(?:' + re.source + ')', flags(re)); + } + + export function regexp(re: RegExp, groupNumber?: number) { + const anchored = anchoredRegexp(re); + const expected = '' + re; + const group = groupNumber || 0; + return new MonadicParser(function (input, i) { + const match = anchored.exec(input.slice(i)); + if (match) { + if (0 <= group && group <= match.length) { + const fullMatch = match[0]; + const groupMatch = match[group]; + return makeSuccess(i + fullMatch.length, groupMatch); + } + const message = `invalid match group (0 to ${match.length}) in ${expected}`; + return makeFailure(i, message); + } + return makeFailure(i, expected); + }); + } + + export function succeed<A>(value: A) { + return new MonadicParser((input, i) => makeSuccess(i, value)); + } + + export function fail(expected: string): MonadicParser<any> { + return new MonadicParser((input, i) => makeFailure(i, expected)); + } + + export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> { + if (isParser(x)) { + return new MonadicParser((input, i) => { + const result = x._(input, i); + if (result.status) { + result.index = i; + result.value = null as any; + } + return result as any; + }); + } else if (typeof x === 'string') { + return lookahead(string(x)); + } else if (x instanceof RegExp) { + return lookahead(regexp(x)); + } + throw new Error('not a string, regexp, or parser: ' + x); + } + + export function notFollowedBy<A>(parser: MonadicParser<A>): MonadicParser<null> { + return new MonadicParser((input, i) => { + const result = parser._(input, i); + return result.status + ? makeFailure(i, 'not "' + input.slice(i, result.index) + '"') + : makeSuccess(i, null); + }); + } + + export function test(predicate: (char: string) => boolean): MonadicParser<string> { + return new MonadicParser((input, i) => { + const char = input.charAt(i); + if (i < input.length && predicate(char)) { + return makeSuccess(i + 1, char); + } else { + return makeFailure(i, 'a character ' + predicate); + } + }); + } + + export function oneOf(str: string) { + return test(ch => str.indexOf(ch) >= 0); + } + + export function noneOf(str: string) { + return test(ch => str.indexOf(ch) < 0); + } + + export function range(begin: string, end: string) { + return test(ch => begin <= ch && ch <= end).desc(begin + '-' + end); + } + + export function takeWhile(predicate: (ch: string) => boolean) { + return new MonadicParser((input, i) => { + let j = i; + while (j < input.length && predicate(input.charAt(j))) { + j++; + } + return makeSuccess(j, input.slice(i, j)); + }); + } + + export function lazy<T>(f: () => MonadicParser<T>) { + const parser = new MonadicParser((input, i) => { + const a = f()._; + parser._ = a; + return a(input, i); + }); + return parser; + } + + export function empty() { + return fail('empty'); + } + + export const index = new MonadicParser(function (input, i) { + return makeSuccess(i, makeLineColumnIndex(input, i)); + }); + + export const anyChar = new MonadicParser<string>((input, i) => { + if (i >= input.length) { + return makeFailure(i, 'any character'); + } + return makeSuccess(i + 1, input.charAt(i)); + }); + + export const all = new MonadicParser(function (input, i) { + return makeSuccess(input.length, input.slice(i)); + }); + + export const eof = new MonadicParser(function (input, i) { + if (i < input.length) { + return makeFailure(i, 'EOF'); + } + return makeSuccess(i, null); + }); + + export const digit = regexp(/[0-9]/).desc('a digit'); + export const digits = regexp(/[0-9]*/).desc('optional digits'); + export const letter = regexp(/[a-z]/i).desc('a letter'); + export const letters = regexp(/[a-z]*/i).desc('optional letters'); + export const optWhitespace = regexp(/\s*/).desc('optional whitespace'); + export const whitespace = regexp(/\s+/).desc('whitespace'); + export const cr = string('\r'); + export const lf = string('\n'); + export const crlf = string('\r\n'); + export const newline = alt(crlf, lf, cr).desc('newline'); + export const end = alt(newline, eof); +} + +function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> { + const numParsers = parsers.length; + return new MonadicParser<any[]>((input, index) => { + let result: MonadicParser.Result<any> | undefined; + let picked: any; + let i = index; + for (let j = 0; j < numParsers; j++) { + result = mergeReplies(parsers[j]._(input, i), result); + if (!result.status) { + return result; + } + if (idx === j) picked = result.value; + i = result.index; + } + return mergeReplies(makeSuccess(i, picked), result); + }); +} + + +function makeSuccess<T>(index: number, value: T): MonadicParser.Success<T> { + return { status: true, index, value }; +} + +function makeFailure(index: number, expected: string): MonadicParser.Failure { + return { status: false, furthest: index, expected: [expected] }; +} + +function mergeReplies<A, B>(result: MonadicParser.Result<A>, last?: MonadicParser.Result<B>): MonadicParser.Result<A> { + if (!last || result.status || last.status || result.furthest > last.furthest) { + return result; + } + const expected = result.furthest === last.furthest + ? unsafeUnion(result.expected, last.expected) + : last.expected; + return { status: result.status, furthest: last.furthest, expected }; +} + +function makeLineColumnIndex(input: string, i: number): MonadicParser.Index { + const lines = input.slice(0, i).split('\n'); + // Note that unlike the character offset, the line and column offsets are + // 1-based. + const lineWeAreUpTo = lines.length; + const columnWeAreUpTo = lines[lines.length - 1].length + 1; + return { offset: i, line: lineWeAreUpTo, column: columnWeAreUpTo }; +} + +function formatExpected(expected: string[]) { + if (expected.length === 1) { + return expected[0]; + } + return 'one of ' + expected.join(', '); +} + +function formatGot(input: string, error: MonadicParser.ParseFailure) { + const index = error.index; + const i = index.offset; + if (i === input.length) { + return ', got the end of the input'; + } + const prefix = i > 0 ? '\'...' : '\''; + const suffix = input.length - i > 12 ? '...\'' : '\''; + return ( + ' at line ' + + index.line + + ' column ' + + index.column + + ', got ' + + prefix + + input.slice(i, i + 12) + + suffix + ); +} + +function formatError(input: string, error: MonadicParser.ParseFailure) { + return 'expected ' + formatExpected(error.expected) + formatGot(input, error); +} + +function unsafeUnion(xs: string[], ys: string[]) { + const xn = xs.length; + const yn = ys.length; + if (xn === 0) return ys; + else if (yn === 0) return xs; + + const set = new Set<string>(); + const ret: string[] = []; + for (let i = 0; i < xn; i++) { + if (!set.has(xs[i])) { + ret[ret.length] = xs[i]; + set.add(xs[i]); + } + } + for (let i = 0; i < yn; i++) { + if (!set.has(ys[i])) { + ret[ret.length] = ys[i]; + set.add(ys[i]); + } + } + ret.sort(); + return ret; +} + +function isParser(obj: any): obj is MonadicParser<any> { + return obj instanceof MonadicParser; +} \ No newline at end of file