Skip to content
Snippets Groups Projects
monadic-parser.ts 18.85 KiB
/**
 * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
 * @author David Sehnal <david.sehnal@gmail.com>
 */

/**
 * Adapted from Parsimmon (https://github.com/jneen/parsimmon)
 * Copyright (c) 2011-present J. Adkisson (http://jneen.net).
 */

export class MonadicParser<A> {
    constructor(public _: MonadicParser.Action<A>) { }

    parse(input: string): MonadicParser.ParseResult<A> {
        const result = this.skip(MonadicParser.eof)._(input, 0);
        if (result.status) {
            return { success: true, value: result.value };
        }
        return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected };
    };

    tryParse(str: string) {
        const result = this.parse(str);
        if (result.success) {
            return result.value;
        } else {
            const msg = formatError(str, result);
            const err = new Error(msg);
            throw err;
        }
    }

    or<B>(alternative: MonadicParser<B>): MonadicParser<A | B> {
        return MonadicParser.alt(this, alternative);
    }

    trim<B>(parser: MonadicParser<B> | string): MonadicParser<A> {
        return this.wrap(parser, parser);
    }

    wrap<L, R>(leftParser: MonadicParser<L> | string, rightParser: MonadicParser<R> | string): MonadicParser<A> {
        return seqPick(1,
            typeof leftParser === 'string' ? MonadicParser.string(leftParser) : leftParser,
            this,
            typeof rightParser === 'string' ? MonadicParser.string(rightParser) : rightParser);
    }

    thru<B>(wrapper: (p: MonadicParser<A>) => MonadicParser<B>) {
        return wrapper(this);
    }

    then<B>(next: MonadicParser<B>): MonadicParser<B> {
        return seqPick(1, this, next);
    }

    many() {
        return new MonadicParser((input, i) => {
            const accum: A[] = [];
            let result: MonadicParser.Result<A> | undefined = void 0;

            while (true) {
                result = mergeReplies(this._(input, i), result);
                if (result.status) {
                    if (i === result.index) {
                        throw new Error('infinite loop detected in .many() parser --- calling .many() on a parser which can accept zero characters is usually the cause');
                    }
                    i = result.index;
                    accum.push(result.value);
                } else {
                    return mergeReplies(makeSuccess(i, accum), result);
                }
            }
        });
    };

    times(min: number, _max?: number): MonadicParser<A[]> {
        const max = typeof _max === 'undefined' ? min : _max;
        return new MonadicParser((input, i) => {
            const accum: A[] = [];
            let result: MonadicParser.Result<A> | undefined = void 0;
            let prevResult: MonadicParser.Result<A> | undefined = void 0;
            let times: number;
            for (times = 0; times < min; times++) {
                result = this._(input, i);
                prevResult = mergeReplies(result, prevResult);
                if (result.status) {
                    i = result.index;
                    accum.push(result.value);
                } else {
                    return prevResult as any;
                }
            }
            for (; times < max; times += 1) {
                result = this._(input, i);
                prevResult = mergeReplies(result, prevResult);
                if (result.status) {
                    i = result.index;
                    accum.push(result.value);
                } else {
                    break;
                }
            }
            return mergeReplies(makeSuccess(i, accum), prevResult);
        });
    };

    result<B>(res: B) {
        return this.map(() => res);
    };

    atMost(n: number) {
        return this.times(0, n);
    };

    atLeast(n: number) {
        return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]);
    };

    map<B>(f: (a: A) => B): MonadicParser<B> {
        return new MonadicParser((input, i) => {
            const result = this._(input, i);
            if (!result.status) {
                return result;
            }
            return mergeReplies(makeSuccess(result.index, f(result.value)), result);
        });
    }

    skip<B>(next: MonadicParser<B>): MonadicParser<A> {
        return seqPick(0, this, next);
    }

    mark(): MonadicParser<MonadicParser.Mark<A>> {
        return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ start: r[0], value: r[1], end: r[2] }));
    }

    node(name: string): MonadicParser<MonadicParser.Node<A>> {
        return MonadicParser.seq(MonadicParser.index, this, MonadicParser.index).map(r => ({ name, start: r[0], value: r[1], end: r[2] }));
    };

    sepBy<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
        return MonadicParser.sepBy(this, separator);
    }

    sepBy1<B>(separator: MonadicParser<B>): MonadicParser<A[]> {
        return MonadicParser.sepBy1(this, separator);
    }

    lookahead<B>(x: MonadicParser<B>) {
        return this.skip(MonadicParser.lookahead(x));
    };

    notFollowedBy<B>(x: MonadicParser<B>) {
        return this.skip(MonadicParser.notFollowedBy(x));
    };

    desc(expected: string) {
        return new MonadicParser((input, i) => {
            const reply = this._(input, i);
            if (!reply.status) {
                reply.expected = [expected];
            }
            return reply;
        });
    };

    fallback<B>(result: B) {
        return this.or(MonadicParser.succeed(result));
    };

    ap<B>(other: MonadicParser<(x: A) => B>): MonadicParser<B> {
        return MonadicParser.seq(other, this).map(([f, x]) => f(x));
    };

    chain<B>(f: (a: A) => MonadicParser<B>): MonadicParser<B> {
        return new MonadicParser<B>((input, i) => {
            const result = this._(input, i);
            if (!result.status) {
                return result as any;
            }
            const nextParser = f(result.value);
            return mergeReplies(nextParser._(input, result.index), result);
        });
    };
}

export namespace MonadicParser {
    export type Action<T> = (input: string, i: number) => MonadicParser.Result<T>

    export type ParseResult<T> = ParseSuccess<T> | ParseFailure;

    export interface Index {
        /** zero-based character offset */
        offset: number;
        /** one-based line offset */
        line: number;
        /** one-based column offset */
        column: number;
    }

    export interface ParseSuccess<T> {
        success: true,
        value: T
    }

    export interface ParseFailure {
        success: false,
        index: Index,
        expected: string[],
    }

    export interface Mark<T> {
        start: Index;
        end: Index;
        value: T;
    }

    export interface Node<T> extends Mark<T> {
        name: string
    }

    export interface Success<T> {
        status: true,
        value: T,
        index: number
    }

    export interface Failure {
        status: false,
        furthest: number,
        expected: string[]
    }

    export type Result<T> = Success<T> | Failure

    // export function createLanguage(parsers: any) {
    //     const language: any = {};
    //     for (const key of Object.keys(parsers)) {
    //         (function (key) {
    //             language[key] = lazy(() => parsers[key](language));
    //         })(key);
    //     }
    //     return language;
    // }

    export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]>
    export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]>
    export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]>
    export function seq<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<[A, B, C, D]>
    export function seq<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<[A, B, C, D, E]>
    export function seq<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
    export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> {
        const numParsers = parsers.length;
        return new MonadicParser<any[]>((input, index) => {
            let result: MonadicParser.Result<any> | undefined;
            let accum = new Array(numParsers);
            let i = index;
            for (let j = 0; j < numParsers; j++) {
                result = mergeReplies(parsers[j]._(input, i), result);
                if (!result.status) {
                    return result;
                }
                accum[j] = result.value;
                i = result.index;
            }
            return mergeReplies(makeSuccess(i, accum), result);
        });
    }

    export function alt<A>(a: MonadicParser<A>): MonadicParser<A>
    export function alt<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<A | B>
    export function alt<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<A | B | C>
    export function alt<A, B, C, D>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>): MonadicParser<A | B | C | D>
    export function alt<A, B, C, D, E>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>, d: MonadicParser<D>, e: MonadicParser<E>): MonadicParser<A | B | C | D | E>
    export function alt<T>(...parsers: MonadicParser<T>[]): MonadicParser<T[]>
    export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> {
        const numParsers = parsers.length;
        if (numParsers === 0) {
            return fail('zero alternates');
        }
        return new MonadicParser((input, i) => {
            let result: MonadicParser.Result<any> | undefined;
            for (let j = 0; j < parsers.length; j++) {
                result = mergeReplies(parsers[j]._(input, i), result);
                if (result.status) {
                    return result;
                }
            }
            return result!;
        });
    }

    export function sepBy<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>): MonadicParser<A[]> {
        return sepBy1(parser, separator).or(succeed([]));
    }

    export function sepBy1<A, B>(parser: MonadicParser<A>, separator: MonadicParser<B>) {
        const pairs = separator.then(parser).many();
        return seq(parser, pairs).map(r => [r[0], ...r[1]]);
    }

    export function string(str: string) {
        const expected = `'${str}'`;
        if (str.length === 1) {
            const code = str.charCodeAt(0);
            return new MonadicParser((input, i) => input.charCodeAt(i) === code ? makeSuccess(i + 1, str) : makeFailure(i, expected));
        }

        return new MonadicParser((input, i) => {
            const j = i + str.length;
            if (input.slice(i, j) === str) return makeSuccess(j, str);
            else return makeFailure(i, expected);
        });
    }

    function flags(re: RegExp) {
        const s = '' + re;
        return s.slice(s.lastIndexOf('/') + 1);
    }

    function anchoredRegexp(re: RegExp) {
        return RegExp('^(?:' + re.source + ')', flags(re));
    }

    export function regexp(re: RegExp, groupNumber?: number) {
        const anchored = anchoredRegexp(re);
        const expected = '' + re;
        const group = groupNumber || 0;
        return new MonadicParser(function (input, i) {
            const match = anchored.exec(input.slice(i));
            if (match) {
                if (0 <= group && group <= match.length) {
                    const fullMatch = match[0];
                    const groupMatch = match[group];
                    return makeSuccess(i + fullMatch.length, groupMatch);
                }
                const message = `invalid match group (0 to ${match.length}) in ${expected}`;
                return makeFailure(i, message);
            }
            return makeFailure(i, expected);
        });
    }

    export function succeed<A>(value: A) {
        return new MonadicParser((input, i) => makeSuccess(i, value));
    }

    export function fail(expected: string): MonadicParser<any> {
        return new MonadicParser((input, i) => makeFailure(i, expected));
    }

    export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> {
        if (isParser(x)) {
            return new MonadicParser((input, i) => {
                const result = x._(input, i);
                if (result.status) {
                    result.index = i;
                    result.value = null as any;
                }
                return result as any;
            });
        } else if (typeof x === 'string') {
            return lookahead(string(x));
        } else if (x instanceof RegExp) {
            return lookahead(regexp(x));
        }
        throw new Error('not a string, regexp, or parser: ' + x);
    }

    export function notFollowedBy<A>(parser: MonadicParser<A>): MonadicParser<null> {
        return new MonadicParser((input, i) => {
            const result = parser._(input, i);
            return result.status
                ? makeFailure(i, 'not "' + input.slice(i, result.index) + '"')
                : makeSuccess(i, null);
        });
    }

    export function test(predicate: (char: string) => boolean): MonadicParser<string> {
        return new MonadicParser((input, i) => {
            const char = input.charAt(i);
            if (i < input.length && predicate(char)) {
                return makeSuccess(i + 1, char);
            } else {
                return makeFailure(i, 'a character ' + predicate);
            }
        });
    }

    export function oneOf(str: string) {
        return test(ch => str.indexOf(ch) >= 0);
    }

    export function noneOf(str: string) {
        return test(ch => str.indexOf(ch) < 0);
    }

    export function range(begin: string, end: string) {
        return test(ch => begin <= ch && ch <= end).desc(begin + '-' + end);
    }

    export function takeWhile(predicate: (ch: string) => boolean) {
        return new MonadicParser((input, i) => {
            let j = i;
            while (j < input.length && predicate(input.charAt(j))) {
                j++;
            }
            return makeSuccess(j, input.slice(i, j));
        });
    }

    export function lazy<T>(f: () => MonadicParser<T>) {
        const parser = new MonadicParser((input, i) => {
            const a = f()._;
            parser._ = a;
            return a(input, i);
        });
        return parser;
    }

    export function empty() {
        return fail('empty');
    }

    export const index = new MonadicParser(function (input, i) {
        return makeSuccess(i, makeLineColumnIndex(input, i));
    });

    export const anyChar = new MonadicParser<string>((input, i) => {
        if (i >= input.length) {
            return makeFailure(i, 'any character');
        }
        return makeSuccess(i + 1, input.charAt(i));
    });

    export const all = new MonadicParser(function (input, i) {
        return makeSuccess(input.length, input.slice(i));
    });

    export const eof = new MonadicParser(function (input, i) {
        if (i < input.length) {
            return makeFailure(i, 'EOF');
        }
        return makeSuccess(i, null);
    });

    export const digit = regexp(/[0-9]/).desc('a digit');
    export const digits = regexp(/[0-9]*/).desc('optional digits');
    export const letter = regexp(/[a-z]/i).desc('a letter');
    export const letters = regexp(/[a-z]*/i).desc('optional letters');
    export const optWhitespace = regexp(/\s*/).desc('optional whitespace');
    export const whitespace = regexp(/\s+/).desc('whitespace');
    export const cr = string('\r');
    export const lf = string('\n');
    export const crlf = string('\r\n');
    export const newline = alt(crlf, lf, cr).desc('newline');
    export const end = alt(newline, eof);
}

function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> {
    const numParsers = parsers.length;
    return new MonadicParser<any[]>((input, index) => {
        let result: MonadicParser.Result<any> | undefined;
        let picked: any;
        let i = index;
        for (let j = 0; j < numParsers; j++) {
            result = mergeReplies(parsers[j]._(input, i), result);
            if (!result.status) {
                return result;
            }
            if (idx === j) picked = result.value;
            i = result.index;
        }
        return mergeReplies(makeSuccess(i, picked), result);
    });
}

function makeSuccess<T>(index: number, value: T): MonadicParser.Success<T> {
    return { status: true, index, value };
}

function makeFailure(index: number, expected: string): MonadicParser.Failure {
    return { status: false, furthest: index, expected: [expected] };
}

function mergeReplies<A, B>(result: MonadicParser.Result<A>, last?: MonadicParser.Result<B>): MonadicParser.Result<A> {
    if (!last || result.status || last.status || result.furthest > last.furthest) {
        return result;
    }
    const expected = result.furthest === last.furthest
        ? unsafeUnion(result.expected, last.expected)
        : last.expected;
    return { status: result.status, furthest: last.furthest, expected };
}

function makeLineColumnIndex(input: string, i: number): MonadicParser.Index {
    const lines = input.slice(0, i).split('\n');
    // Note that unlike the character offset, the line and column offsets are
    // 1-based.
    const lineWeAreUpTo = lines.length;
    const columnWeAreUpTo = lines[lines.length - 1].length + 1;
    return { offset: i, line: lineWeAreUpTo, column: columnWeAreUpTo };
}

function formatExpected(expected: string[]) {
    if (expected.length === 1) {
        return expected[0];
    }
    return 'one of ' + expected.join(', ');
}

function formatGot(input: string, error: MonadicParser.ParseFailure) {
    const index = error.index;
    const i = index.offset;
    if (i === input.length) {
        return ', got the end of the input';
    }
    const prefix = i > 0 ? '\'...' : '\'';
    const suffix = input.length - i > 12 ? '...\'' : '\'';
    return ` at line ${index.line} column ${index.column}, got ${prefix}${input.slice(i, i + 12)}${suffix}`;
}

function formatError(input: string, error: MonadicParser.ParseFailure) {
    return `expected ${formatExpected(error.expected)}${formatGot(input, error)}`;
}

function unsafeUnion(xs: string[], ys: string[]) {
    const xn = xs.length;
    const yn = ys.length;
    if (xn === 0) return ys;
    else if (yn === 0) return xs;

    const set = new Set<string>();
    const ret: string[] = [];
    for (let i = 0; i < xn; i++) {
        if (!set.has(xs[i])) {
            ret[ret.length] = xs[i];
            set.add(xs[i]);
        }
    }
    for (let i = 0; i < yn; i++) {
        if (!set.has(ys[i])) {
            ret[ret.length] = ys[i];
            set.add(ys[i]);
        }
    }
    ret.sort();
    return ret;
}

function isParser(obj: any): obj is MonadicParser<any> {
    return obj instanceof MonadicParser;
}