Skip to content
Snippets Groups Projects
Commit 03e7ce98 authored by David Sehnal's avatar David Sehnal
Browse files

Added monadic parser to mol-util

parent d0c75205
No related branches found
No related tags found
No related merge requests found
/* /**
* Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info. * Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
* *
* @author David Sehnal <david.sehnal@gmail.com> * @author David Sehnal <david.sehnal@gmail.com>
*/ */
import Type from './type' import Type from '../type'
import Symbol, { Arguments, Argument } from './symbol' import Symbol, { Arguments, Argument } from '../symbol'
import { symbol, normalizeTable, symbolList } from './helpers' import { symbol, normalizeTable, symbolList } from '../helpers'
export namespace Types { export namespace Types {
export type List<T = any> = ArrayLike<T> export type List<T = any> = ArrayLike<T>
......
/**
* Copyright (c) 2018 Mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import Type from '../type'
import * as Core from './core'
import { Arguments, Argument } from '../symbol'
import { symbol } from '../helpers'
export namespace Types {
export const ElementSymbol = Type.Value('Structure', 'ElementSymbol');
export const AtomName = Type.Value('Structure', 'AtomName');
export const BondFlag = Type.OneOf('Structure', 'BondFlag', Type.Str, ['covalent', 'metallic', 'ion', 'hydrogen', 'sulfide', 'computed', 'aromatic']);
export const BondFlags = Core.Types.Flags(BondFlag, 'BondFlags');
export const SecondaryStructureFlag = Type.OneOf('Structure', 'SecondaryStructureFlag', Type.Str, ['alpha', 'beta', '3-10', 'pi', 'sheet', 'strand', 'helix', 'turn', 'none']);
export const SecondaryStructureFlags = Core.Types.Flags(SecondaryStructureFlag, 'SecondaryStructureFlag');
export const RingFingerprint = Type.Value('Structure', 'RingFingerprint');
export const EntityType = Type.OneOf('Structure', 'EntityType', Type.Str, ['polymer', 'non-polymer', 'water', 'unknown']);
export const ResidueId = Type.Value('Structure', 'ResidueId');
export const ElementSet = Type.Value('Structure', 'ElementSet');
export const ElementSelection = Type.Value('Structure', 'ElementSelection');
export const ElementReference = Type.Value('Structure', 'ElementReference');
export const ElementSelectionQuery = Core.Types.Fn(ElementSelection, 'ElementSelectionQuery');
}
const type = {
'@header': 'Types',
elementSymbol: symbol(
Arguments.Dictionary({ 0: Argument(Type.Str) }),
Types.ElementSymbol, 'Create element symbol representation from a string value.'),
atomName: symbol(
Arguments.Dictionary({ 0: Argument(Type.AnyValue) }), Types.AtomName, 'Convert a value to an atom name.'),
entityType: symbol(
Arguments.Dictionary({ 0: Argument(Types.EntityType) }),
Types.EntityType,
`Create normalized representation of entity type: ${Type.oneOfValues(Types.EntityType).join(', ')}.`),
bondFlags: symbol(
Arguments.List(Types.BondFlag),
Types.BondFlags,
`Create bond flags representation from a list of strings. Allowed flags: ${Type.oneOfValues(Types.BondFlag).join(', ')}.`),
ringFingerprint: symbol(
Arguments.List(Types.ElementSymbol, { nonEmpty: true }),
Types.RingFingerprint,
'Create ring fingerprint from the supplied atom element list.'),
secondaryStructureFlags: symbol(
Arguments.List(Types.SecondaryStructureFlag),
Types.SecondaryStructureFlags,
`Create secondary structure flags representation from a list of strings. Allowed flags: ${Type.oneOfValues(Types.SecondaryStructureFlag).join(', ')}.`),
authResidueId: symbol(Arguments.Dictionary({
0: Argument(Type.Str, { description: 'auth_asym_id' }),
1: Argument(Type.Num, { description: 'auth_seq_id' }),
2: Argument(Type.Str, { description: 'pdbx_PDB_ins_code', isOptional: true })
}), Types.ResidueId, `Residue identifier based on "auth_" annotation.`),
labelResidueId: symbol(Arguments.Dictionary({
0: Argument(Type.Str, { description: 'label_entity_id' }),
1: Argument(Type.Str, { description: 'label_asym_id' }),
2: Argument(Type.Num, { description: 'label_seq_id' }),
3: Argument(Type.Str, { description: 'pdbx_PDB_ins_code', isOptional: true })
}), Types.ResidueId, `Residue identifier based on mmCIF's "label_" annotation.`)
};
const slot = {
'@header': 'Iteration Slots',
element: symbol(Arguments.None, Types.ElementReference, 'A reference to the current element.'),
elementSetReduce: symbol(Arguments.None, Type.Variable('a', Type.AnyValue, true), 'Current value of the element set reducer.')
}
const generator = {
'@header': 'Generators',
all: symbol(Arguments.None, Types.ElementSelectionQuery, 'The entire structure.'),
atomGroups: symbol(Arguments.Dictionary({
'entity-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom of every entity' }),
'chain-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom of every chain' }),
'residue-test': Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'Test for the 1st atom every residue' }),
'atom-test': Argument(Type.Bool, { isOptional: true, defaultValue: true }),
'group-by': Argument(Type.Any, { isOptional: true, defaultValue: `atom-key`, description: 'Group atoms to sets based on this property. Default: each atom has its own set' }),
}), Types.ElementSelectionQuery, 'Return all atoms for which the tests are satisfied, grouped into sets.'),
rings: symbol(Arguments.List(Types.RingFingerprint), Types.ElementSelectionQuery, 'Return rings with the specified fingerprint(s). If no fingerprints are given, return all rings.'),
queryInSelection: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
query: Argument(Types.ElementSelectionQuery),
'in-complement': Argument(Type.Bool, { isOptional: true, defaultValue: false })
}), Types.ElementSelectionQuery, 'Executes query only on atoms that are in the source selection.'),
empty: symbol(Arguments.None, Types.ElementSelectionQuery, 'Nada.'),
}
const modifier = {
'@header': 'Selection Modifications',
queryEach: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
query: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, 'Query every atom set in the input selection separately.'),
intersectBy: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
by: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, 'Intersect each atom set from the first sequence from atoms in the second one.'),
exceptBy: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
by: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, `Remove all atoms from 'selection' that occur in 'by'.`),
unionBy: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
by: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, 'For each atom set A in the orginal sequence, combine all atoms sets in the target selection that intersect with A.'),
union: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, 'Collects all atom sets in the sequence into a single atom set.'),
cluster: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
'min-distance': Argument(Type.Num, { isOptional: true, defaultValue: 0 }),
'max-distance': Argument(Type.Num),
'min-size': Argument(Type.Num, { description: 'Minimal number of sets to merge, must be at least 2', isOptional: true, defaultValue: 2 }),
'max-size': Argument(Type.Num, { description: 'Maximal number of sets to merge, if not set, no limit', isOptional: true }),
}), Types.ElementSelectionQuery, 'Combines atom sets that have mutual distance in the interval [min-radius, max-radius]. Minimum/maximum size determines how many atom sets can be combined.'),
includeSurroundings: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
radius: Argument(Type.Num),
'atom-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0, description: 'Value added to each atom before the distance check, for example VDW radius. Using this argument is computationally demanding.' }),
'as-whole-residues': Argument(Type.Bool, { isOptional: true })
}), Types.ElementSelectionQuery, 'For each atom set in the selection, include all surrouding atoms/residues that are within the specified radius.'),
includeConnected: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }),
'layer-count': Argument(Type.Num, { isOptional: true, defaultValue: 1, description: 'Number of bonded layers to include.' }),
'as-whole-residues': Argument(Type.Bool, { isOptional: true })
}), Types.ElementSelectionQuery, 'Pick all atom sets that are connected to the target.'),
expandProperty: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
property: Argument(Type.AnyValue)
}), Types.ElementSelectionQuery, 'To each atom set in the selection, add all atoms that have the same property value that was already present in the set.')
}
const filter = {
'@header': 'Selection Filters',
pick: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
test: Argument(Type.Bool)
}), Types.ElementSelectionQuery, 'Pick all atom sets that satisfy the test.'),
withSameAtomProperties: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
source: Argument(Types.ElementSelectionQuery),
property: Argument(Type.Any)
}), Types.ElementSelectionQuery, 'Pick all atom sets for which the set of given atom properties is a subset of the source properties.'),
intersectedBy: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
by: Argument(Types.ElementSelectionQuery)
}), Types.ElementSelectionQuery, 'Pick all atom sets that have non-zero intersection with the target.'),
within: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
target: Argument(Types.ElementSelectionQuery),
'min-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0 }),
'max-radius': Argument(Type.Num),
'atom-radius': Argument(Type.Num, { isOptional: true, defaultValue: 0, description: 'Value added to each atom before the distance check, for example VDW radius. Using this argument is computationally demanding.' }),
invert: Argument(Type.Bool, { isOptional: true, defaultValue: false, description: 'If true, pick only atom sets that are further than the specified radius.' }),
}), Types.ElementSelectionQuery, 'Pick all atom sets from selection that have any atom within the radius of any atom from target.'),
isConnectedTo: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery),
target: Argument(Types.ElementSelectionQuery),
'bond-test': Argument(Type.Bool, { isOptional: true, defaultValue: 'true for covalent bonds' as any }),
disjunct: Argument(Type.Bool, { isOptional: true, defaultValue: true, description: 'If true, there must exist a bond to an atom that lies outside the given atom set to pass test.' }),
invert: Argument(Type.Bool, { isOptional: true, defaultValue: false, description: 'If true, return atom sets that are not connected.' })
}), Types.ElementSelectionQuery, 'Pick all atom sets that are connected to the target.'),
}
const combinator = {
'@header': 'Selection Combinators',
intersect: symbol(Arguments.List(Types.ElementSelectionQuery), Types.ElementSelectionQuery, 'Return all unique atom sets that appear in all of the source selections.'),
merge: symbol(Arguments.List(Types.ElementSelectionQuery), Types.ElementSelectionQuery, 'Merges multiple selections into a single one. Only unique atom sets are kept.'),
distanceCluster: symbol(Arguments.Dictionary({
matrix: Argument(Core.Types.List(Core.Types.List(Type.Num)), { description: 'Distance matrix, represented as list of rows (num[][])). Lower triangle is min distance, upper triangle is max distance.' }),
selections: Argument(Core.Types.List(Types.ElementSelectionQuery), { description: 'A list of held selections.' })
}), Types.ElementSelectionQuery, 'Pick combinations of atom sets from the source sequences that are mutually within distances specified by a matrix.')
}
const atomSet = {
'@header': 'Atom Sets',
atomCount: symbol(Arguments.None, Type.Num),
countQuery: symbol(Arguments.Dictionary({
0: Argument(Types.ElementSelectionQuery)
}), Type.Num, 'Counts the number of occurences of a specific query inside the current atom set.'),
reduce: symbol(Arguments.Dictionary({
initial: Argument(Type.Variable('a', Type.AnyValue, true), { description: 'Initial value assigned to slot.atom-set-reduce. Current atom is set to the 1st atom of the current set for this.' }),
value: Argument(Type.Variable('a', Type.AnyValue, true), { description: 'Expression executed for each atom in the set' })
}), Type.Variable('a', Type.AnyValue, true), 'Execute the value expression for each atom in the current atom set and return the result. Works the same way as Array.reduce in JavaScript (``result = value(value(...value(initial)))``)'),
propertySet: symbol(Arguments.Dictionary({
0: Argument(Core.Types.ConstrainedVar),
}), Core.Types.Set(Core.Types.ConstrainedVar), 'Returns a set with all values of the given property in the current atom set.'),
}
const atomProperty = {
'@header': 'Atom Properties',
core: {
'@header': 'Core Properties',
elementSymbol: atomProp(Types.ElementSymbol),
vdw: atomProp(Type.Num, 'Van der Waals radius'),
mass: atomProp(Type.Num, 'Atomic weight'),
atomicNumber: atomProp(Type.Num, 'Atomic number'),
x: atomProp(Type.Num, 'Cartesian X coordinate'),
y: atomProp(Type.Num, 'Cartesian Y coordinate'),
z: atomProp(Type.Num, 'Cartesian Z coordinate'),
atomKey: atomProp(Type.AnyValue, 'Unique value for each atom. Main use case is grouping of atoms.'),
bondCount: symbol(Arguments.Dictionary({
0: Argument(Types.ElementReference, { isOptional: true, defaultValue: 'slot.current-atom' }),
flags: Argument(Types.BondFlags, { isOptional: true, defaultValue: 'covalent' as any }),
}), Type.Num, 'Number of bonds (by default only covalent bonds are counted).')
},
topology: {
connectedComponentKey: atomProp(Type.AnyValue, 'Unique value for each connected component.')
},
macromolecular: {
'@header': 'Macromolecular Properties (derived from the mmCIF format)',
authResidueId: atomProp(Types.ResidueId, `type.auth-residue-id symbol executed on current atom's residue`),
labelResidueId: atomProp(Types.ResidueId, `type.label-residue-id symbol executed on current atom's residue`),
residueKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``(label_entity_id,auth_asym_id,auth_seq_id,pdbx_PDB_ins_code)``, main use case is grouping of atoms'),
chainKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``(label_entity_id,auth_asym_id)``, main use case is grouping of atoms'),
entityKey: atomProp(Type.AnyValue, 'Unique value for each tuple ``label_entity_id``, main use case is grouping of atoms'),
isHet: atomProp(Type.Bool, 'Equivalent to atom_site.group_PDB !== ATOM'),
id: atomProp(Type.Num, '_atom_site.id'),
label_atom_id: atomProp(Types.AtomName),
label_alt_id: atomProp(Type.Str),
label_comp_id: atomProp(Type.Str),
label_asym_id: atomProp(Type.Str),
label_entity_id: atomProp(Type.Str),
label_seq_id: atomProp(Type.Num),
auth_atom_id: atomProp(Types.AtomName),
auth_comp_id: atomProp(Type.Str),
auth_asym_id: atomProp(Type.Str),
auth_seq_id: atomProp(Type.Num),
pdbx_PDB_ins_code: atomProp(Type.Str),
pdbx_formal_charge: atomProp(Type.Num),
occupancy: atomProp(Type.Num),
B_iso_or_equiv: atomProp(Type.Num),
entityType: atomProp(Types.EntityType, 'Type of the entity as defined in mmCIF (polymer, non-polymer, water, unknown)'),
secondaryStructureKey: atomProp(Type.AnyValue, 'Unique value for each secondary structure element.'),
secondaryStructureFlags: atomProp(Types.SecondaryStructureFlags),
isModified: atomProp(Type.Bool, 'True if the atom bolongs to modification of a standard residue.'),
modifiedParentName: atomProp(Type.Str, `'3-letter' code of the modifed parent residue.`),
}
}
const bondProperty = {
'@header': 'Bond Properties',
flags: bondProp(Types.BondFlags),
order: bondProp(Type.Num)
}
function atomProp(type: Type, description?: string) {
return symbol(Arguments.Dictionary({ 0: Argument(Types.ElementReference, { isOptional: true, defaultValue: 'slot.current-atom' }) }), type, description);
}
function bondProp(type: Type, description?: string) {
return symbol(Arguments.None, type, description);
}
export default {
'@header': 'Structure Queries',
type,
slot,
generator,
modifier,
filter,
combinator,
atomSet,
atomProperty,
bondProperty
}
\ No newline at end of file
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { MonadicParser as P } from '../monadic-parser'
describe('parser', () => {
it('string', () => {
const p = P.string('abc');
expect(p.parse('abc').success).toBe(true);
expect(p.parse('cabc').success).toBe(false);
});
it('alt', () => {
const p = P.alt(P.string('abc'), P.string('123'));
expect(p.parse('abc').success).toBe(true);
expect(p.parse('123').success).toBe(true);
expect(p.parse('123a').success).toBe(false);
});
it('trim', () => {
const p = P.string('abc').trim(P.whitespace);
expect(p.tryParse(' abc ')).toBe('abc');
});
it('wrap', () => {
const p = P.string('abc').wrap(P.string('('), P.string(')'));
expect(p.tryParse('(abc)')).toBe('abc');
});
it('then', () => {
const p = P.string('abc').then(P.string('123'));
expect(p.tryParse('abc123')).toBe('123');
});
it('many', () => {
const p = P.string('1').many();
expect(p.tryParse('111')).toEqual(['1', '1', '1']);
});
it('times', () => {
const p = P.string('1').times(2);
expect(p.tryParse('11')).toEqual(['1', '1']);
});
it('sepBy', () => {
const p = P.sepBy(P.digits, P.string(',')).map(xs => xs.map(x => +x));
expect(p.tryParse('1,2,3,4')).toEqual([1, 2, 3, 4]);
});
});
\ No newline at end of file
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
/**
* Adapted from Parsimmon (https://github.com/jneen/parsimmon)
* Copyright (c) 2011-present J. Adkisson (http://jneen.net).
*/
export class MonadicParser<A> {
constructor(public _: MonadicParser.Action<A>) { }
parse(input: string): MonadicParser.ParseResult<A> {
const result = this.skip(MonadicParser.eof)._(input, 0);
if (result.status) {
return { success: true, value: result.value };
}
return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected };
};
tryParse(str: string) {
const result = this.parse(str);
if (result.success) {
return result.value;
} else {
const msg = formatError(str, result);
const err = new Error(msg);
throw err;
}
}
or<B>(alternative: MonadicParser<B>): MonadicParser<A | B> {
return MonadicParser.alt(this, alternative);
}
trim<B>(parser: MonadicParser<B>): MonadicParser<A> {
return this.wrap(parser, parser);
}
wrap<L, R>(leftParser: MonadicParser<L>, rightParser: MonadicParser<R>): MonadicParser<A> {
return seqPick(1, leftParser, this, rightParser);
}
thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) {
return wrapper(this);
}
then<B>(next: MonadicParser<B>): MonadicParser<B> {
return seqPick(1, this, next);
}
many() {
return new MonadicParser((input, i) => {
const accum: A[] = [];
let result: MonadicParser.Result<A> | undefined = void 0;
while (true) {
result = mergeReplies(this._(input, i), result);
if (result.status) {
if (i === result.index) {
throw new Error('infinite loop detected in .many() parser --- calling .many() on a parser which can accept zero characters is usually the cause');
}
i = result.index;
accum.push(result.value);
} else {
return mergeReplies(makeSuccess(i, accum), result);
}
}
});
};
times(min: number, _max?: number): MonadicParser<A[]> {
const max = typeof _max === 'undefined' ? min : _max;
return new MonadicParser((input, i) => {
const accum: A[] = [];
let result: MonadicParser.Result<A> | undefined = void 0;
let prevResult: MonadicParser.Result<A> | undefined = void 0;
let times: number;
for (times = 0; times < min; times++) {
result = this._(input, i);
prevResult = mergeReplies(result, prevResult);
if (result.status) {
i = result.index;
accum.push(result.value);
} else {
return prevResult as any;
}
}
for (; times < max; times += 1) {
result = this._(input, i);
prevResult = mergeReplies(result, prevResult);
if (result.status) {
i = result.index;
accum.push(result.value);
} else {
break;
}
}
return mergeReplies(makeSuccess(i, accum), prevResult);
});
};
result<B>(res: B) {
return this.map(() => res);
};
atMost(n: number) {
return this.times(0, n);
};
atLeast(n: number) {
return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]);
};
map<B>(f: (a: A) => B): MonadicParser<B> {
return new MonadicParser((input, i) => {
const result = this._(input, i);
if (!result.status) {
return result;
}
return mergeReplies(makeSuccess(result.index, f(result.value)), result);
});
}
skip<B>(next: MonadicParser<B>): MonadicParser<A> {
return seqPick(0, this, next);
}
mark(): MonadicParser<MonadicParser.Mark<A>> {
return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ start: r[0], value: r[1], end: r[2] }));
}
node(name: string): MonadicParser<MonadicParser.Node<A>> {
return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ name, start: r[0], value: r[1], end: r[2] }));
};
sepBy<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
return MonadicParser.sepBy(this, separator);
}
sepBy1<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
return MonadicParser.sepBy1(this, separator);
}
lookahead<B>(x: MonadicParser<B>) {
return this.skip(MonadicParser.lookahead(x));
};
notFollowedBy<B>(x: MonadicParser<B>) {
return this.skip(MonadicParser.notFollowedBy(x));
};
desc(expected: string) {
return new MonadicParser((input, i) => {
const reply = this._(input, i);
if (!reply.status) {
reply.expected = [expected];
}
return reply;
});
};
fallback<B>(result: B) {
return this.or(MonadicParser.succeed(result));
};
ap<B>(other: MonadicParser<(x: A) => B>): MonadicParser<B> {
return MonadicParser.seq(other, this).map(([f, x]) => f(x));
};
chain<B>(f: (a: A) => MonadicParser<B>): MonadicParser<B> {
return new MonadicParser<B>((input, i) => {
const result = this._(input, i);
if (!result.status) {
return result as any;
}
const nextParser = f(result.value);
return mergeReplies(nextParser._(input, result.index), result);
});
};
}
export namespace MonadicParser {
export type Action<T> = (input: string, i: number) => MonadicParser.Result<T>
export type ParseResult<T> = ParseSuccess<T> | ParseFailure;
export interface Index {
/** zero-based character offset */
offset: number;
/** one-based line offset */
line: number;
/** one-based column offset */
column: number;
}
export interface ParseSuccess<T> {
success: true,
value: T
}
export interface ParseFailure {
success: false,
index: Index,
expected: string[],
}
export interface Mark<T> {
start: Index;
end: Index;
value: T;
}
export interface Node<T> extends Mark<T> {
name: string
}
export interface Success<T> {
status: true,
value: T,
index: number
}
export interface Failure {
status: false,
furthest: number,
expected: string[]
}
export type Result<T> = Success<T> | Failure
// export function createLanguage(parsers: any) {
// const language: any = {};
// for (const key of Object.keys(parsers)) {
// (function (key) {
// language[key] = lazy(() => parsers[key](language));
// })(key);
// }
// return language;
// }
export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]>
export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]>
export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]>
export function seq<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<[A, B, C, D]>
export function seq<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<[A, B, C, D, E]>
export function seq<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> {
const numParsers = parsers.length;
return new MonadicParser<any[]>((input, index) => {
let result: MonadicParser.Result<any> | undefined;
let accum = new Array(numParsers);
let i = index;
for (let j = 0; j < numParsers; j++) {
result = mergeReplies(parsers[j]._(input, i), result);
if (!result.status) {
return result;
}
accum[j] = result.value;
i = result.index;
}
return mergeReplies(makeSuccess(i, accum), result);
});
}
export function alt<A>(a: MonadicParser<A>): MonadicParser<A>
export function alt<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<A | B>
export function alt<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<A | B | C>
export function alt<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<A | B | C | D>
export function alt<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<A | B | C | D | E>
export function alt<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> {
const numParsers = parsers.length;
if (numParsers === 0) {
return fail('zero alternates');
}
return new MonadicParser((input, i) => {
let result: MonadicParser.Result<any> | undefined;
for (let j = 0; j < parsers.length; j++) {
result = mergeReplies(parsers[j]._(input, i), result);
if (result.status) {
return result;
}
}
return result!;
});
}
export function sepBy<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>): MonadicParser<A[]> {
return sepBy1(parser, separator).or(succeed([]));
}
export function sepBy1<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>) {
const pairs = separator.then(parser).many();
return seq(parser, pairs).map(r => [r[0], ...r[1]]);
}
export function string(str: string) {
const expected = `'${str}'`;
return new MonadicParser((input, i) => {
const j = i + str.length;
const head = input.slice(i, j);
if (head === str) {
return makeSuccess(j, head);
} else {
return makeFailure(i, expected);
}
});
}
function flags(re: RegExp) {
const s = '' + re;
return s.slice(s.lastIndexOf('/') + 1);
}
function anchoredRegexp(re: RegExp) {
return RegExp('^(?:' + re.source + ')', flags(re));
}
export function regexp(re: RegExp, groupNumber?: number) {
const anchored = anchoredRegexp(re);
const expected = '' + re;
const group = groupNumber || 0;
return new MonadicParser(function (input, i) {
const match = anchored.exec(input.slice(i));
if (match) {
if (0 <= group && group <= match.length) {
const fullMatch = match[0];
const groupMatch = match[group];
return makeSuccess(i + fullMatch.length, groupMatch);
}
const message = `invalid match group (0 to ${match.length}) in ${expected}`;
return makeFailure(i, message);
}
return makeFailure(i, expected);
});
}
export function succeed<A>(value: A) {
return new MonadicParser((input, i) => makeSuccess(i, value));
}
export function fail(expected: string): MonadicParser<any> {
return new MonadicParser((input, i) => makeFailure(i, expected));
}
export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> {
if (isParser(x)) {
return new MonadicParser((input, i) => {
const result = x._(input, i);
if (result.status) {
result.index = i;
result.value = null as any;
}
return result as any;
});
} else if (typeof x === 'string') {
return lookahead(string(x));
} else if (x instanceof RegExp) {
return lookahead(regexp(x));
}
throw new Error('not a string, regexp, or parser: ' + x);
}
export function notFollowedBy<A>(parser: MonadicParser<A>): MonadicParser<null> {
return new MonadicParser((input, i) => {
const result = parser._(input, i);
return result.status
? makeFailure(i, 'not "' + input.slice(i, result.index) + '"')
: makeSuccess(i, null);
});
}
export function test(predicate: (char: string) => boolean): MonadicParser<string> {
return new MonadicParser((input, i) => {
const char = input.charAt(i);
if (i < input.length && predicate(char)) {
return makeSuccess(i + 1, char);
} else {
return makeFailure(i, 'a character ' + predicate);
}
});
}
export function oneOf(str: string) {
return test(ch => str.indexOf(ch) >= 0);
}
export function noneOf(str: string) {
return test(ch => str.indexOf(ch) < 0);
}
export function range(begin: string, end: string) {
return test(ch => begin <= ch && ch <= end).desc(begin + '-' + end);
}
export function takeWhile(predicate: (ch: string) => boolean) {
return new MonadicParser((input, i) => {
let j = i;
while (j < input.length && predicate(input.charAt(j))) {
j++;
}
return makeSuccess(j, input.slice(i, j));
});
}
export function lazy<T>(f: () => MonadicParser<T>) {
const parser = new MonadicParser((input, i) => {
const a = f()._;
parser._ = a;
return a(input, i);
});
return parser;
}
export function empty() {
return fail('empty');
}
export const index = new MonadicParser(function (input, i) {
return makeSuccess(i, makeLineColumnIndex(input, i));
});
export const anyChar = new MonadicParser<string>((input, i) => {
if (i >= input.length) {
return makeFailure(i, 'any character');
}
return makeSuccess(i + 1, input.charAt(i));
});
export const all = new MonadicParser(function (input, i) {
return makeSuccess(input.length, input.slice(i));
});
export const eof = new MonadicParser(function (input, i) {
if (i < input.length) {
return makeFailure(i, 'EOF');
}
return makeSuccess(i, null);
});
export const digit = regexp(/[0-9]/).desc('a digit');
export const digits = regexp(/[0-9]*/).desc('optional digits');
export const letter = regexp(/[a-z]/i).desc('a letter');
export const letters = regexp(/[a-z]*/i).desc('optional letters');
export const optWhitespace = regexp(/\s*/).desc('optional whitespace');
export const whitespace = regexp(/\s+/).desc('whitespace');
export const cr = string('\r');
export const lf = string('\n');
export const crlf = string('\r\n');
export const newline = alt(crlf, lf, cr).desc('newline');
export const end = alt(newline, eof);
}
function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> {
const numParsers = parsers.length;
return new MonadicParser<any[]>((input, index) => {
let result: MonadicParser.Result<any> | undefined;
let picked: any;
let i = index;
for (let j = 0; j < numParsers; j++) {
result = mergeReplies(parsers[j]._(input, i), result);
if (!result.status) {
return result;
}
if (idx === j) picked = result.value;
i = result.index;
}
return mergeReplies(makeSuccess(i, picked), result);
});
}
function makeSuccess<T>(index: number, value: T): MonadicParser.Success<T> {
return { status: true, index, value };
}
function makeFailure(index: number, expected: string): MonadicParser.Failure {
return { status: false, furthest: index, expected: [expected] };
}
function mergeReplies<A, B>(result: MonadicParser.Result<A>, last?: MonadicParser.Result<B>): MonadicParser.Result<A> {
if (!last || result.status || last.status || result.furthest > last.furthest) {
return result;
}
const expected = result.furthest === last.furthest
? unsafeUnion(result.expected, last.expected)
: last.expected;
return { status: result.status, furthest: last.furthest, expected };
}
function makeLineColumnIndex(input: string, i: number): MonadicParser.Index {
const lines = input.slice(0, i).split('\n');
// Note that unlike the character offset, the line and column offsets are
// 1-based.
const lineWeAreUpTo = lines.length;
const columnWeAreUpTo = lines[lines.length - 1].length + 1;
return { offset: i, line: lineWeAreUpTo, column: columnWeAreUpTo };
}
function formatExpected(expected: string[]) {
if (expected.length === 1) {
return expected[0];
}
return 'one of ' + expected.join(', ');
}
function formatGot(input: string, error: MonadicParser.ParseFailure) {
const index = error.index;
const i = index.offset;
if (i === input.length) {
return ', got the end of the input';
}
const prefix = i > 0 ? '\'...' : '\'';
const suffix = input.length - i > 12 ? '...\'' : '\'';
return (
' at line ' +
index.line +
' column ' +
index.column +
', got ' +
prefix +
input.slice(i, i + 12) +
suffix
);
}
function formatError(input: string, error: MonadicParser.ParseFailure) {
return 'expected ' + formatExpected(error.expected) + formatGot(input, error);
}
function unsafeUnion(xs: string[], ys: string[]) {
const xn = xs.length;
const yn = ys.length;
if (xn === 0) return ys;
else if (yn === 0) return xs;
const set = new Set<string>();
const ret: string[] = [];
for (let i = 0; i < xn; i++) {
if (!set.has(xs[i])) {
ret[ret.length] = xs[i];
set.add(xs[i]);
}
}
for (let i = 0; i < yn; i++) {
if (!set.has(ys[i])) {
ret[ret.length] = ys[i];
set.add(ys[i]);
}
}
ret.sort();
return ret;
}
function isParser(obj: any): obj is MonadicParser<any> {
return obj instanceof MonadicParser;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment