diff --git a/src/mol-script/transpilers/helper.ts b/src/mol-script/transpilers/helper.ts new file mode 100644 index 0000000000000000000000000000000000000000..084568861689af8f110b9b753b6d1804a23ec8a7 --- /dev/null +++ b/src/mol-script/transpilers/helper.ts @@ -0,0 +1,394 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ +//import * as Q from 'parsimmon'; +import * as P from '../../mol-util/monadic-parser'; +import { MolScriptBuilder } from '../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { Expression } from '../language/expression'; +import { KeywordDict, PropertyDict, FunctionDict, OperatorList } from './types'; + +export function escapeRegExp(s: String) { + return String(s).replace(/[\\^$*+?.()|[\]{}]/g, '\\$&'); +} + +// Takes a parser for the prefix operator, and a parser for the base thing being +// parsed, and parses as many occurrences as possible of the prefix operator. +// Note that the parser is created using `P.lazy` because it's recursive. It's +// valid for there to be zero occurrences of the prefix operator. +export function prefix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) { + const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => { + return P.MonadicParser.seq(opParser, parser) + .map(x => mapFn(...x)) + .or(nextParser); + }); + return parser; +} + +// Ideally this function would be just like `PREFIX` but reordered like +// `P.seq(parser, opParser).or(nextParser)`, but that doesn't work. The +// reason for that is that Parsimmon will get stuck in infinite recursion, since +// the very first rule. Inside `parser` is to match parser again. Alternatively, +// you might think to try `nextParser.or(P.seq(parser, opParser))`, but +// that won't work either because in a call to `.or` (aka `P.alt`), Parsimmon +// takes the first possible match, even if subsequent matches are longer, so the +// parser will never actually look far enough ahead to see the postfix +// operators. +export function postfix(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) { + // Because we can't use recursion like stated above, we just match a flat list + // of as many occurrences of the postfix operator as possible, then use + // `.reduce` to manually nest the list. + // + // Example: + // + // INPUT :: "4!!!" + // PARSE :: [4, "factorial", "factorial", "factorial"] + // REDUCE :: ["factorial", ["factorial", ["factorial", 4]]] + return P.MonadicParser.seqMap( /* no seqMap() in monadic-parser.ts, any suitable replacement? */ + nextParser, + opParser.many(), + (x:any, suffixes:any) => + suffixes.reduce((acc:any, x:any) => { + return mapFn(x, acc); + }, x) + ); +} + +// Takes a parser for all the operators at this precedence level, and a parser +// that parsers everything at the next precedence level, and returns a parser +// that parses as many binary operations as possible, associating them to the +// right. (e.g. 1^2^3 is 1^(2^3) not (1^2)^3) +export function binaryRight(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) { + const parser: P.MonadicParser<any> = P.MonadicParser.lazy(() => + nextParser.chain(next => + P.MonadicParser.seq( + opParser, + P.MonadicParser.of(next), + parser + ).map((x) => { + console.log(x); + return x; + }).or(P.MonadicParser.of(next)) + ) + ); + return parser; +} + +// Takes a parser for all the operators at this precedence level, and a parser +// that parsers everything at the next precedence level, and returns a parser +// that parses as many binary operations as possible, associating them to the +// left. (e.g. 1-2-3 is (1-2)-3 not 1-(2-3)) +export function binaryLeft(opParser: P.MonadicParser<any>, nextParser: P.MonadicParser<any>, mapFn: any) { + // We run into a similar problem as with the `POSTFIX` parser above where we + // can't recurse in the direction we want, so we have to resort to parsing an + // entire list of operator chunks and then using `.reduce` to manually nest + // them again. + // + // Example: + // + // INPUT :: "1+2+3" + // PARSE :: [1, ["+", 2], ["+", 3]] + // REDUCE :: ["+", ["+", 1, 2], 3] + return P.MonadicParser.seqMap( + nextParser, + P.MonadicParser.seq(opParser, nextParser).many(), + (first:any, rest:any) => { + return rest.reduce((acc:any, ch:any) => { + let [op, another] = ch; + return mapFn(op, acc, another); + }, first); + } + ); +} + +/** + * combine operators of decreasing binding strength + */ +export function combineOperators(opList: any[], rule: P.MonadicParser<any>) { + const x = opList.reduce( + (acc, level) => { + const map = level.isUnsupported ? makeError(`operator '${level.name}' not supported`) : level.map; + return level.type(level.rule, acc, map); + }, + rule + ); + return x; +} + +export function infixOp(re: RegExp, group: number = 0) { + return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace)); + // return P.optWhitespace.then(P.MonadicParser.regexp(re, group).lookahead(P.whitespace)) + // return P.MonadicParser.regexp(re, group).skip(P.whitespace) +} + +export function prefixOp(re: RegExp, group: number = 0) { + return P.MonadicParser.regexp(re, group).skip(P.MonadicParser.whitespace); +} + +export function postfixOp(re: RegExp, group: number = 0) { + return P.MonadicParser.whitespace.then(P.MonadicParser.regexp(re, group)); +} + +// export function functionOp (re: RegExp, rule: P.MonadicParser<any>) { +// return P.MonadicParser.regexp(re, group).wrap(P.string('('), P.string(')')) +// } + +export function ofOp(name: string, short?: string) { + const op = short ? `${name}|${escapeRegExp(short)}` : name; + const re = RegExp(`(${op})\\s+([-+]?[0-9]*\\.?[0-9]+)\\s+OF`, 'i'); + return infixOp(re, 2).map(parseFloat); +} + +export function makeError(msg: string) { + return function () { + throw new Error(msg); + }; +} + +export function andExpr(selections: any[]) { + if (selections.length === 1) { + return selections[0]; + } else if (selections.length > 1) { + return B.core.logic.and(selections); + } else { + return undefined; + } +} + +export function orExpr(selections: any[]) { + if (selections.length === 1) { + return selections[0]; + } else if (selections.length > 1) { + return B.core.logic.or(selections); + } else { + return undefined; + } +} + +export function testExpr(property: any, args: any) { + if (args && args.op !== undefined && args.val !== undefined) { + const opArgs = [property, args.val]; + switch (args.op) { + case '=': return B.core.rel.eq(opArgs); + case '!=': return B.core.rel.neq(opArgs); + case '>': return B.core.rel.gr(opArgs); + case '<': return B.core.rel.lt(opArgs); + case '>=': return B.core.rel.gre(opArgs); + case '<=': return B.core.rel.lte(opArgs); + default: throw new Error(`operator '${args.op}' not supported`); + } + } else if (args && args.flags !== undefined) { + return B.core.flags.hasAny([property, args.flags]); + } else if (args && args.min !== undefined && args.max !== undefined) { + return B.core.rel.inRange([property, args.min, args.max]); + } else if (!Array.isArray(args)) { + return B.core.rel.eq([property, args]); + } else if (args.length > 1) { + return B.core.set.has([B.core.type.set(args), property]); + } else { + return B.core.rel.eq([property, args[0]]); + } +} + +export function invertExpr(selection: Expression) { + return B.struct.generator.queryInSelection({ + 0: selection, query: B.struct.generator.atomGroups(), 'in-complement': true + }); +} + +export function strLenSortFn(a: string, b: string) { + return a.length < b.length ? 1 : -1; +} + +function getNamesRegex(name: string, abbr?: string[]) { + const names = (abbr ? [name].concat(abbr) : [name]) + .sort(strLenSortFn).map(escapeRegExp).join('|'); + return RegExp(`${names}`, 'i'); +} + +export function getPropertyRules(properties: PropertyDict) { + // in keyof typeof properties + const propertiesDict: { [name: string]: P.MonadicParser<any> } = {}; + + Object.keys(properties).sort(strLenSortFn).forEach(name => { + const ps = properties[name]; + const errorFn = makeError(`property '${name}' not supported`); + const rule = P.MonadicParser.regexp(ps.regex).map(x => { + if (ps.isUnsupported) errorFn(); + return testExpr(ps.property, ps.map(x)); + }); + + if (!ps.isNumeric) { + propertiesDict[name] = rule; + } + }); + + return propertiesDict; +} + +export function getNamedPropertyRules(properties: PropertyDict) { + const namedPropertiesList: P.MonadicParser<any>[] = []; + + Object.keys(properties).sort(strLenSortFn).forEach(name => { + const ps = properties[name]; + const errorFn = makeError(`property '${name}' not supported`); + const rule = P.MonadicParser.regexp(ps.regex).map(x => { + if (ps.isUnsupported) errorFn(); + return testExpr(ps.property, ps.map(x)); + }); + const nameRule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).trim(P.MonadicParser.optWhitespace); + const groupMap = (x: any) => B.struct.generator.atomGroups({ [ps.level]: x }); + + if (ps.isNumeric) { + namedPropertiesList.push( + nameRule.then(P.MonadicParser.seq( + P.MonadicParser.regexp(/>=|<=|=|!=|>|</).trim(P.MonadicParser.optWhitespace), + P.MonadicParser.regexp(ps.regex).map(ps.map) + )).map(x => { + if (ps.isUnsupported) errorFn(); + return testExpr(ps.property, { op: x[0], val: x[1] }); + }).map(groupMap) + ); + } else { + namedPropertiesList.push(nameRule.then(rule).map(groupMap)); + } + }); + + return namedPropertiesList; +} + +export function getKeywordRules(keywords: KeywordDict) { + const keywordsList: P.MonadicParser<any>[] = []; + + Object.keys(keywords).sort(strLenSortFn).forEach(name => { + const ks = keywords[name]; + const mapFn = ks.map ? ks.map : makeError(`keyword '${name}' not supported`); + const rule = P.MonadicParser.regexp(getNamesRegex(name, ks.abbr)).map(mapFn); + keywordsList.push(rule); + }); + + return keywordsList; +} + +export function getFunctionRules(functions: FunctionDict, argRule: P.MonadicParser<any>) { + const functionsList: P.MonadicParser<any>[] = []; + const begRule = P.MonadicParser.regexp(/\(\s*/); + const endRule = P.MonadicParser.regexp(/\s*\)/); + + Object.keys(functions).sort(strLenSortFn).forEach(name => { + const fs = functions[name]; + const mapFn = fs.map ? fs.map : makeError(`function '${name}' not supported`); + const rule = P.MonadicParser.regexp(new RegExp(name, 'i')).skip(begRule).then(argRule).skip(endRule).map(mapFn); + functionsList.push(rule); + }); + + return functionsList; +} + +//const rule = P.regex(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => { +// if (ps.isUnsupported) errorFn() +// return ps.property +//}) + +export function getPropertyNameRules(properties: PropertyDict, lookahead: RegExp) { + const list: P.MonadicParser<any>[] = []; + Object.keys(properties).sort(strLenSortFn).forEach(name => { + const ps = properties[name]; + const errorFn = makeError(`property '${name}' not supported`); + const rule = P.MonadicParser.regexp(getNamesRegex(name, ps.abbr)).lookahead(lookahead).map(() => { + if (ps.isUnsupported) errorFn(); + return ps.property; + }); + list.push(rule); + }); + + return list; +} + +export function getReservedWords(properties: PropertyDict, keywords: KeywordDict, operators: OperatorList, functions?: FunctionDict) { + const w: string[] = []; + for (const name in properties) { + w.push(name); + if (properties[name].abbr) w.push(...properties[name].abbr!); + } + for (const name in keywords) { + w.push(name); + if (keywords[name].abbr) w.push(...keywords[name].abbr!); + } + operators.forEach(o => { + w.push(o.name); + if (o.abbr) w.push(...o.abbr); + }); + return w; +} + +export function atomNameSet(ids: string[]) { + return B.core.type.set(ids.map(B.atomName)); +} + +export function asAtoms(e: Expression) { + return B.struct.generator.queryInSelection({ + 0: e, + query: B.struct.generator.atomGroups() + }); +} + +export function wrapValue(property: any, value: any, sstrucDict?: any) { + switch (property.head) { + case 'structure.atom-property.macromolecular.label_atom_id': + return B.atomName(value); + case 'structure.atom-property.core.element-symbol': + return B.es(value); + case 'structure.atom-property.macromolecular.secondary-structure-flags': + if (sstrucDict) { + value = [sstrucDict[value.toUpperCase()] || 'none']; + } + return B.struct.type.secondaryStructureFlags([value]); + default: + return value; + } +} + +const propPrefix = 'structure.atom-property.macromolecular.'; +const entityProps = ['entityKey', 'label_entity_id', 'entityType']; +const chainProps = ['chainKey', 'label_asym_id', 'label_entity_id', 'auth_asym_id', 'entityType']; +const residueProps = ['residueKey', 'label_comp_id', 'label_seq_id', 'auth_comp_id', 'auth_seq_id', 'pdbx_formal_charge', 'secondaryStructureKey', 'secondaryStructureFlags', 'isModified', 'modifiedParentName']; +export function testLevel(property: any) { + if (property.head.startsWith(propPrefix)) { + const name = property.head.substr(propPrefix.length); + if (entityProps.indexOf(name) !== -1) return 'entity-test'; + if (chainProps.indexOf(name) !== -1) return 'chain-test'; + if (residueProps.indexOf(name) !== -1) return 'residue-test'; + } + return 'atom-test'; +} + +const flagProps = [ + 'structure.atom-property.macromolecular.secondary-structure-flags' +]; +export function valuesTest(property: any, values: any[]) { + if (flagProps.indexOf(property.head) !== -1) { + const name = values[0].head; + const flags: any[] = []; + values.forEach(v => flags.push(...v.args[0])); + return B.core.flags.hasAny([property, { head: name, args: flags }]); + } else { + if (values.length === 1) { + return B.core.rel.eq([property, values[0]]); + } else if (values.length > 1) { + return B.core.set.has([B.core.type.set(values), property]); + } + } +} + +export function resnameExpr(resnameList: string[]) { + return B.struct.generator.atomGroups({ + 'residue-test': B.core.set.has([ + B.core.type.set(resnameList), + B.ammp('label_comp_id') + ]) + }); +} diff --git a/src/mol-script/transpilers/pymol/examples.ts b/src/mol-script/transpilers/pymol/examples.ts new file mode 100644 index 0000000000000000000000000000000000000000..10c90a3ac2ae655a7811336b790c909cab15ad17 --- /dev/null +++ b/src/mol-script/transpilers/pymol/examples.ts @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author David Sehnal <david.sehnal@gmail.com> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +export const pymolSelectionsExamples = [{ + name: 'ALA residues', + value: 'resn ALA' +}, { + name: 'Atoms named "C", "O", "N", or "CA"', + value: 'name c+o+n+ca' +}, { + name: 'Residues with helix or sheet secondary structure', + value: 'ss h+s' +}, { + name: 'C-alpha atoms of residues 100 to 180 in chain A', + value: 'A/100-180/CA' +}, { + name: 'Residues 100 to 180', + value: 'resi 100-180' +}, { + name: 'Atoms that are 1 ang + vdw radius away from polymer', + value: 'polymer gap 1' +}, { + name: 'Residues within 4 ang of HEM', + value: 'byres resn HEM around 4' +}, { + name: 'HEM and residues within 4 ang', + value: 'byres resn HEM expand 4' +}, { + name: 'Solvent close (2.5 ang) to polymer', + value: 'solvent NEAR_TO 2.5 OF polymer' +}, { + name: 'Cystein residues within 3 ang of HEM', + value: 'byres resn CYS WITHIN 3 OF resn HEM' +}, { + name: 'Solvent atoms 4 ang away from oxygen', + value: 'solvent beyond 4 of (name O and not solvent)' +}, { + name: 'All rings in PHE', + value: 'byring resn PHE' +}, { + name: 'CYS and all bound residues', + value: 'byres BOUND_TO resn CYS' +}, { + name: 'HEM and atoms up to 7 bonds away', + value: 'resn HEM extend 7' +}, { + name: 'Atoms with alternate location A or none', + value: 'alt A+""' +}]; diff --git a/src/mol-script/transpilers/pymol/keywords.ts b/src/mol-script/transpilers/pymol/keywords.ts new file mode 100644 index 0000000000000000000000000000000000000000..84f2e7507b340902303077c5354aadfd3f5efe30 --- /dev/null +++ b/src/mol-script/transpilers/pymol/keywords.ts @@ -0,0 +1,202 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import * as h from '../helper'; +import { KeywordDict } from '../types'; + +const ResDict = { + nucleic: ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'], + protein: ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'CYX', 'GLN', 'GLU', 'GLY', 'HIS', 'HID', 'HIE', 'HIP', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'], + solvent: ['HOH', 'WAT', 'H20', 'TIP', 'SOL'] +}; + +export const keywords: KeywordDict = { + all: { + '@desc': 'All atoms currently loaded into PyMOL', + abbr: ['*'], + map: () => B.struct.generator.atomGroups() + }, + none: { + '@desc': 'No atoms (empty selection)', + map: () => B.struct.generator.empty() + }, + hydrogens: { + '@desc': 'All hydrogen atoms currently loaded into PyMOL', + abbr: ['hydro', 'h.'], + map: () => B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.eq([ + B.acp('elementSymbol'), + B.es('H') + ]) + }) + }, + hetatm: { + '@desc': 'All atoms loaded from Protein Data Bank HETATM records', + abbr: ['het'], + map: () => B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.eq([B.ammp('isHet'), true]) + }) + }, + visible: { + '@desc': 'All atoms in enabled objects with at least one visible representation', + abbr: ['v.'] + }, + polymer: { + '@desc': 'All atoms on the polymer (not het). Finds atoms with residue identifiers matching a known polymer, such a peptide and DNA.', + abbr: ['pol.'], + map: () => B.struct.generator.atomGroups({ + 'residue-test': B.core.set.has([ + B.core.type.set(ResDict.nucleic.concat(ResDict.protein)), + B.ammp('label_comp_id') + ]) + }) + }, + backbone: { + '@desc': 'Polymer backbone atoms (new in PyMOL 1.6.1)', + abbr: ['bb.'] + }, + sidechain: { + '@desc': 'Polymer non-backbone atoms (new in PyMOL 1.6.1)', + abbr: ['sc.'] + }, + present: { + '@desc': 'All atoms with defined coordinates in the current state (used in creating movies)', + abbr: ['pr.'] + }, + center: { + '@desc': 'Pseudo-atom at the center of the scene' + }, + origin: { + '@desc': 'Pseudo-atom at the origin of rotation', + }, + enabled: { + '@desc': 'All enabled objects or selections from the object list.', + }, + masked: { + '@desc': 'All masked atoms.', + abbr: ['msk.'] + }, + protected: { + '@desc': 'All protected atoms.', + abbr: ['pr.'] + }, + bonded: { + '@desc': 'All bonded atoms', + map: () => B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.gr([B.struct.atomProperty.core.bondCount({ + flags: B.struct.type.bondFlags(['covalent', 'metallic', 'sulfide']) + }), 0]) + }) + }, + donors: { + '@desc': 'All hydrogen bond donor atoms.', + abbr: ['don.'] + }, + acceptors: { + '@desc': 'All hydrogen bond acceptor atoms.', + abbr: ['acc.'] + }, + fixed: { + '@desc': 'All fixed atoms.', + abbr: ['fxd.'] + }, + restrained: { + '@desc': 'All restrained atoms.', + abbr: ['rst.'] + }, + organic: { + '@desc': 'All atoms in non-polymer organic compounds (e.g. ligands, buffers). Finds carbon-containing molecules that do not match known polymers.', + abbr: ['org.'], + map: () => h.asAtoms(B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union([ + B.struct.generator.queryInSelection({ + '0': B.struct.generator.atomGroups({ + 'residue-test': B.core.logic.not([ + B.core.set.has([ + B.core.type.set(ResDict.nucleic.concat(ResDict.protein)), + B.ammp('label_comp_id') + ]) + ]) + }), + query: B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.eq([ + B.es('C'), + B.acp('elementSymbol') + ]) + }) + }) + ]), + property: B.ammp('residueKey') + })) + }, + inorganic: { + '@desc': 'All non-polymer inorganic atoms/ions. Finds atoms in molecules that do not contain carbon and do not match any known solvent residues.', + abbr: ['ino.'], + map: () => h.asAtoms(B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union([ + B.struct.filter.pick({ + '0': B.struct.generator.atomGroups({ + 'residue-test': B.core.logic.not([ + B.core.set.has([ + B.core.type.set(ResDict.nucleic.concat(ResDict.protein).concat(ResDict.solvent)), + B.ammp('label_comp_id') + ]) + ]), + 'group-by': B.ammp('residueKey') + }), + test: B.core.logic.not([ + B.core.set.has([ + B.struct.atomSet.propertySet([B.acp('elementSymbol')]), + B.es('C') + ]) + ]) + }) + ]), + property: B.ammp('residueKey') + })) + }, + solvent: { + '@desc': 'All water molecules. The hardcoded solvent residue identifiers are currently: HOH, WAT, H20, TIP, SOL.', + abbr: ['sol.'], + map: () => B.struct.generator.atomGroups({ + 'residue-test': B.core.set.has([ + B.core.type.set(ResDict.solvent), + B.ammp('label_comp_id') + ]) + }) + }, + guide: { + '@desc': 'All protein CA and nucleic acid C4*/C4', + map: () => B.struct.combinator.merge([ + B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.eq([ + B.atomName('CA'), + B.ammp('label_atom_id') + ]), + 'residue-test': B.core.set.has([ + B.core.type.set(ResDict.protein), + B.ammp('label_comp_id') + ]) + }), + B.struct.generator.atomGroups({ + 'atom-test': B.core.set.has([ + h.atomNameSet(['C4*', 'C4']), + B.ammp('label_atom_id') + ]), + 'residue-test': B.core.set.has([ + B.core.type.set(ResDict.nucleic), + B.ammp('label_comp_id') + ]) + }) + ]), + }, + metals: { + '@desc': 'All metal atoms (new in PyMOL 1.6.1)' + } +}; diff --git a/src/mol-script/transpilers/pymol/markdown-docs.ts b/src/mol-script/transpilers/pymol/markdown-docs.ts new file mode 100644 index 0000000000000000000000000000000000000000..7dfe9f675ea2c450aeb324e7607dd443295e5017 --- /dev/null +++ b/src/mol-script/transpilers/pymol/markdown-docs.ts @@ -0,0 +1,61 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; + +const _docs: string[] = [ + 'PyMol', + '============', + '--------------------------------', + '' +]; + +_docs.push(`## Properties\n\n`); +_docs.push('--------------------------------\n'); +for (const name in properties) { + if (properties[name].isUnsupported) continue; + + const names = [name]; + if (properties[name].abbr) names.push(...properties[name].abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (properties[name]['@desc']) { + _docs.push(`*${properties[name]['@desc']}*\n`); + } +} + +_docs.push(`## Operators\n\n`); +_docs.push('--------------------------------\n'); +operators.forEach(o => { + if (o.isUnsupported) return; + + const names = [o.name]; + if (o.abbr) names.push(...o.abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (o['@desc']) { + _docs.push(`*${o['@desc']}*\n`); + } +}); + +_docs.push(`## Keywords\n\n`); +_docs.push('--------------------------------\n'); +for (const name in keywords) { + if (!keywords[name].map) continue; + + const names = [name]; + if (keywords[name].abbr) names.push(...keywords[name].abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (keywords[name]['@desc']) { + _docs.push(`*${keywords[name]['@desc']}*\n`); + } +} + +export const docs = _docs.join('\n'); \ No newline at end of file diff --git a/src/mol-script/transpilers/pymol/operators.ts b/src/mol-script/transpilers/pymol/operators.ts new file mode 100644 index 0000000000000000000000000000000000000000..abc597d82c0bb767ef32a2dcc68879d3cec0d40e --- /dev/null +++ b/src/mol-script/transpilers/pymol/operators.ts @@ -0,0 +1,367 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import * as P from '../../../mol-util/monadic-parser'; +import * as h from '../helper'; +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { OperatorList } from '../types'; +import { Expression } from '../../language/expression'; + +export const operators: OperatorList = [ + { + '@desc': 'Selects atoms that are not included in s1.', + '@examples': [ + 'NOT resn ALA', + 'not (resi 42 or chain A)', + '!resi 42 or chain A', + ], + name: 'not', + type: h.prefix, + rule: P.MonadicParser.alt( + P.MonadicParser.regexp(/NOT/i).skip(P.MonadicParser.whitespace), + P.MonadicParser.string('!').skip(P.MonadicParser.optWhitespace) + ), + map: (op, selection) => h.invertExpr(selection), + }, + { + '@desc': 'Selects atoms included in both s1 and s2.', + '@examples': ['chain A AND name CA'], + name: 'and', + type: h.binaryLeft, + rule: h.infixOp(/AND|&/i), + map: (op, selection, by) => + B.struct.modifier.intersectBy({ 0: selection, by }), + }, + { + '@desc': 'Selects atoms included in either s1 or s2.', + '@examples': ['chain A OR chain B'], + name: 'or', + type: h.binaryLeft, + rule: h.infixOp(/OR|\|/i), + map: (op: string, s1: Expression, s2: Expression) => B.struct.combinator.merge([s1, s2]), + }, + { + '@desc': + 'Selects atoms in s1 whose identifiers name, resi, resn, chain and segi all match atoms in s2.', + '@examples': ['chain A IN chain B'], + name: 'in', + type: h.binaryLeft, + rule: h.infixOp(/IN/i), + map: (op: string, selection: Expression, source: Expression) => { + return B.struct.filter.withSameAtomProperties({ + 0: selection, + source, + property: B.core.type.compositeKey([ + B.ammp('label_atom_id'), + B.ammp('label_seq_id'), + B.ammp('label_comp_id'), + B.ammp('auth_asym_id'), + B.ammp('label_asym_id'), + ]), + }); + }, + }, + { + '@desc': + 'Selects atoms in s1 whose identifiers name and resi match atoms in s2.', + '@examples': ['chain A LIKE chain B'], + name: 'like', + type: h.binaryLeft, + rule: h.infixOp(/LIKE|l\./i), + map: (op: string, selection: Expression, source: Expression) => { + return B.struct.filter.withSameAtomProperties({ + 0: selection, + source, + property: B.core.type.compositeKey([ + B.ammp('label_atom_id'), + B.ammp('label_seq_id'), + ]), + }); + }, + }, + { + '@desc': + 'Selects all atoms whose van der Waals radii are separated from the van der Waals radii of s1 by a minimum of X Angstroms.', + '@examples': ['solvent GAP 2'], + name: 'gap', + type: h.postfix, + rule: h + .postfixOp(/GAP\s+([-+]?[0-9]*\.?[0-9]+)/i, 1) + .map((x) => parseFloat(x)), + map: (distance: number, target: Expression) => { + return B.struct.filter.within({ + '0': B.struct.generator.atomGroups(), + target, + 'atom-radius': B.acp('vdw'), + 'max-radius': distance, + invert: true, + }); + }, + }, + { + '@desc': + 'Selects atoms with centers within X Angstroms of the center of any atom in s1.', + '@examples': ['resname LIG AROUND 1'], + name: 'around', + abbr: ['a.'], + type: h.postfix, + rule: h + .postfixOp(/(AROUND|a\.)\s+([-+]?[0-9]*\.?[0-9]+)/i, 2) + .map((x) => parseFloat(x)), + map: (radius: number, target: Expression) => { + return B.struct.modifier.exceptBy({ + '0': B.struct.filter.within({ + '0': B.struct.generator.atomGroups(), + target, + 'max-radius': radius, + }), + by: target, + }); + }, + }, + { + '@desc': + 'Expands s1 by all atoms within X Angstroms of the center of any atom in s1.', + '@examples': ['chain A EXPAND 3'], + name: 'expand', + abbr: ['x.'], + type: h.postfix, + rule: h + .postfixOp(/(EXPAND|x\.)\s+([-+]?[0-9]*\.?[0-9]+)/i, 2) + .map((x) => parseFloat(x)), + map: (radius: number, selection: Expression) => { + return B.struct.modifier.includeSurroundings({ 0: selection, radius }); + }, + }, + { + '@desc': + 'Selects atoms in s1 that are within X Angstroms of any atom in s2.', + '@examples': ['chain A WITHIN 3 OF chain B'], + name: 'within', + abbr: ['w.'], + type: h.binaryLeft, + rule: h.ofOp('WITHIN', 'w.'), + map: (radius: number, selection: Expression, target: Expression) => { + return B.struct.filter.within({ + 0: selection, + target, + 'max-radius': radius, + }); + }, + }, + { + '@desc': + 'Same as within, but excludes s2 from the selection (and thus is identical to s1 and s2 around X).', + '@examples': ['chain A NEAR_TO 3 OF chain B'], + name: 'near_to', + abbr: ['nto.'], + type: h.binaryLeft, + rule: h.ofOp('NEAR_TO', 'nto.'), + map: (radius: number, selection: Expression, target: Expression) => { + return B.struct.modifier.exceptBy({ + '0': B.struct.filter.within({ + '0': selection, + target, + 'max-radius': radius, + }), + by: target, + }); + }, + }, + { + '@desc': 'Selects atoms in s1 that are at least X Anstroms away from s2.', + '@examples': ['solvent BEYOND 2 OF chain A'], + name: 'beyond', + abbr: ['be.'], + type: h.binaryLeft, + rule: h.ofOp('BEYOND', 'be.'), + map: (radius: number, selection: Expression, target: Expression) => { + return B.struct.modifier.exceptBy({ + '0': B.struct.filter.within({ + '0': selection, + target, + 'max-radius': radius, + invert: true, + }), + by: target, + }); + }, + }, + { + '@desc': 'Expands selection to complete residues.', + '@examples': ['BYRESIDUE name N'], + name: 'byresidue', + abbr: ['byresi', 'byres', 'br.'], + type: h.prefix, + rule: h.prefixOp(/BYRESIDUE|byresi|byres|br\./i), + map: (op: string, selection: Expression) => { + return h.asAtoms( + B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union({ 0: selection }), + property: B.ammp('residueKey'), + }) + ); + }, + }, + { + '@desc': + 'Completely selects all alpha carbons in all residues covered by a selection.', + '@examples': ['BYCALPHA chain A'], + name: 'bycalpha', + abbr: ['bca.'], + type: h.prefix, + rule: h.prefixOp(/BYCALPHA|bca\./i), + map: (op: string, selection: Expression) => { + return B.struct.generator.queryInSelection({ + '0': B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union({ 0: selection }), + property: B.ammp('residueKey'), + }), + query: B.struct.generator.atomGroups({ + 'atom-test': B.core.rel.eq([ + B.atomName('CA'), + B.ammp('label_atom_id'), + ]), + }), + }); + }, + }, + { + '@desc': 'Expands selection to complete molecules.', + '@examples': ['BYMOLECULE resi 20-30'], + name: 'bymolecule', + abbr: ['bymol', 'bm.'], + type: h.prefix, + rule: h.prefixOp(/BYMOLECULE|bymol|bm\./i), + map: (op: string, selection: Expression) => { + return h.asAtoms( + B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union({ 0: selection }), + property: B.atp('connectedComponentKey'), + }) + ); + }, + }, + { + '@desc': 'Expands selection to complete fragments.', + '@examples': ['BYFRAGMENT resi 10'], + name: 'byfragment', + abbr: ['byfrag', 'bf.'], + isUnsupported: true, + type: h.prefix, + rule: h.prefixOp(/BYFRAGMENT|byfrag|bf\./i), + map: (op: string, selection: Expression) => [op, selection], + }, + { + '@desc': 'Expands selection to complete segments.', + '@examples': ['BYSEGMENT resn CYS'], + name: 'bysegment', + abbr: ['bysegi', 'byseg', 'bs.'], + type: h.prefix, + rule: h.prefixOp(/BYSEGMENT|bysegi|byseg|bs\./i), + map: (op: string, selection: Expression) => { + return h.asAtoms( + B.struct.modifier.expandProperty({ + '0': B.struct.modifier.union({ 0: selection }), + property: B.ammp('chainKey'), + }) + ); + }, + }, + { + '@desc': 'Expands selection to complete objects.', + '@examples': ['BYOBJECT chain A'], + name: 'byobject', + abbr: ['byobj', 'bo.'], + isUnsupported: true, + type: h.prefix, + rule: h.prefixOp(/BYOBJECT|byobj|bo\./i), + map: (op: string, selection: Expression) => [op, selection], + }, + { + '@desc': 'Expands selection to unit cell.', + '@examples': ['BYCELL chain A'], + name: 'bycell', + isUnsupported: true, + type: h.prefix, + rule: h.prefixOp(/BYCELL/i), + map: (op: string, selection: Expression) => [op, selection], + }, + { + '@desc': 'All rings of size ≤ 7 which have at least one atom in s1.', + '@examples': ['BYRING resn HEM'], + name: 'byring', + type: h.prefix, + rule: h.prefixOp(/BYRING/i), + map: (op: string, selection: Expression) => { + return h.asAtoms( + B.struct.filter.intersectedBy({ + '0': B.struct.filter.pick({ + '0': B.struct.generator.rings(), + test: B.core.logic.and([ + B.core.rel.lte([B.struct.atomSet.atomCount(), 7]), + B.core.rel.gr([B.struct.atomSet.countQuery([selection]), 1]), + ]), + }), + by: selection, + }) + ); + }, + }, + { + '@desc': 'Selects atoms directly bonded to s1, excludes s1.', + '@examples': ['NEIGHBOUR resn CYS'], + name: 'neighbour', + type: h.prefix, + abbr: ['nbr.'], + rule: h.prefixOp(/NEIGHBOUR|nbr\./i), + map: (op: string, selection: Expression) => { + return B.struct.modifier.exceptBy({ + '0': h.asAtoms( + B.struct.modifier.includeConnected({ + '0': B.struct.modifier.union({ 0: selection }), + 'bond-test': true, + }) + ), + by: selection, + }); + }, + }, + { + '@desc': 'Selects atoms directly bonded to s1, may include s1.', + '@examples': ['BOUND_TO resname CA'], + name: 'bound_to', + abbr: ['bto.'], + type: h.prefix, + rule: h.prefixOp(/BOUND_TO|bto\./i), + map: (op: string, selection: Expression) => { + return h.asAtoms( + B.struct.modifier.includeConnected({ + '0': B.struct.modifier.union({ 0: selection }), + }) + ); + }, + }, + { + '@desc': 'Extends s1 by X bonds connected to atoms in s1.', + '@examples': ['resname LIG EXTEND 3'], + name: 'extend', + abbr: ['xt.'], + type: h.postfix, + rule: h.postfixOp(/(EXTEND|xt\.)\s+([0-9]+)/i, 2).map((x) => parseInt(x)), + map: (count: number, selection: Expression) => { + return h.asAtoms( + B.struct.modifier.includeConnected({ + '0': B.struct.modifier.union({ 0: selection }), + 'bond-test': true, + 'layer-count': count, + }) + ); + }, + }, +]; diff --git a/src/mol-script/transpilers/pymol/parser.ts b/src/mol-script/transpilers/pymol/parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..bf91bb00ca1134d115537ff2505ddc61afd453f0 --- /dev/null +++ b/src/mol-script/transpilers/pymol/parser.ts @@ -0,0 +1,171 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +// https://pymol.org/dokuwiki/doku.php?id=selection:alpha +// https://github.com/evonove/pymol/blob/master/pymol/layer3/Selector.cpp + +import * as P from '../../../mol-util/monadic-parser'; +import * as h from '../helper'; +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; +import { AtomGroupArgs } from '../types'; +import { Transpiler } from '../transpiler'; + +const propertiesDict = h.getPropertyRules(properties); + +const slash = P.MonadicParser.string('/'); + +/* is Parser -> MonadicParser substitution correct? */ +function orNull(rule: P.MonadicParser<any>) { + return rule.or(P.MonadicParser.of(null)); +} + +function atomSelectionQuery(x: any) { + const tests: AtomGroupArgs = {}; + const props: { [k: string]: any[] } = {}; + + for (let k in x) { + const ps = properties[k]; + if (!ps) { + throw new Error(`property '${k}' not supported, value '${x[k]}'`); + } + if (x[k] === null) continue; + if (!props[ps.level]) props[ps.level] = []; + props[ps.level].push(x[k]); + } + + for (let p in props) { + tests[p] = h.andExpr(props[p]); + } + + return B.struct.generator.atomGroups(tests); +} + +const lang = P.MonadicParser.createLanguage({ + Parens: function (r : any) { + return P.MonadicParser.alt( + r.Parens, + r.Operator, + r.Expression + ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')')); + }, + + Expression: function (r : any) { + return P.MonadicParser.alt( + r.AtomSelectionMacro.map(atomSelectionQuery), + r.NamedAtomProperties, + r.Pepseq, + r.Rep, + r.Keywords, + r.Object + ); + }, + + AtomSelectionMacro: function (r : any) { + return P.MonadicParser.alt( + slash.then(P.MonadicParser.alt( + P.MonadicParser.seq( + orNull(r.ObjectProperty).skip(slash), + orNull(propertiesDict.segi).skip(slash), + orNull(propertiesDict.chain).skip(slash), + orNull(propertiesDict.resi).skip(slash), + orNull(propertiesDict.name) + ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3], name: x[4] }; }), + P.MonadicParser.seq( + orNull(r.ObjectProperty).skip(slash), + orNull(propertiesDict.segi).skip(slash), + orNull(propertiesDict.chain).skip(slash), + orNull(propertiesDict.resi) + ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3] }; }), + P.MonadicParser.seq( + orNull(r.ObjectProperty).skip(slash), + orNull(propertiesDict.segi).skip(slash), + orNull(propertiesDict.chain) + ).map(x => { return { object: x[0], segi: x[1], chain: x[2] }; }), + P.MonadicParser.seq( + orNull(r.ObjectProperty).skip(slash), + orNull(propertiesDict.segi) + ).map(x => { return { object: x[0], segi: x[1] }; }), + P.MonadicParser.seq( + orNull(r.ObjectProperty) + ).map(x => { return { object: x[0] }; }), + )), + P.MonadicParser.alt( + P.MonadicParser.seq( + orNull(r.ObjectProperty).skip(slash), + orNull(propertiesDict.segi).skip(slash), + orNull(propertiesDict.chain).skip(slash), + orNull(propertiesDict.resi).skip(slash), + orNull(propertiesDict.name) + ).map(x => { return { object: x[0], segi: x[1], chain: x[2], resi: x[3], name: x[4] }; }), + P.MonadicParser.seq( + orNull(propertiesDict.segi).skip(slash), + orNull(propertiesDict.chain).skip(slash), + orNull(propertiesDict.resi).skip(slash), + orNull(propertiesDict.name) + ).map(x => { return { segi: x[0], chain: x[1], resi: x[2], name: x[3] }; }), + P.MonadicParser.seq( + orNull(propertiesDict.chain).skip(slash), + orNull(propertiesDict.resi).skip(slash), + orNull(propertiesDict.name) + ).map(x => { return { chain: x[0], resi: x[1], name: x[2] }; }), + P.MonadicParser.seq( + orNull(propertiesDict.resi).skip(slash), + orNull(propertiesDict.name) + ).map(x => { return { resi: x[0], name: x[1] }; }), + ) + ); + }, + + NamedAtomProperties: function () { + return P.MonadicParser.alt(...h.getNamedPropertyRules(properties)); + }, + + Keywords: () => P.MonadicParser.alt(...h.getKeywordRules(keywords)), + + ObjectProperty: () => { + const w = h.getReservedWords(properties, keywords, operators) + .sort(h.strLenSortFn).map(h.escapeRegExp).join('|'); + return P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i')); + }, + Object: (r : any) => { + return r.ObjectProperty.notFollowedBy(slash) + .map( (x:any) => { throw new Error(`property 'object' not supported, value '${x}'`); }); + }, + + // Selects peptide sequence matching upper-case one-letter + // sequence SEQ (see also FindSeq). + // PEPSEQ seq + Pepseq: () => { + return P.MonadicParser.regexp(/(PEPSEQ|ps\.)\s+([a-z]+)/i, 2) + .map(h.makeError(`operator 'pepseq' not supported`)); + }, + + // Selects atoms which show representation rep. + // REP rep + Rep: () => { + return P.MonadicParser.regexp(/REP\s+(lines|spheres|mesh|ribbon|cartoon|sticks|dots|surface|labels|extent|nonbonded|nb_spheres|slice|extent|slice|dashes|angles|dihedrals|cgo|cell|callback|everything)/i, 1) + .map(h.makeError(`operator 'rep' not supported`)); + }, + + Operator: function (r : any) { + return h.combineOperators(operators, P.MonadicParser.alt(r.Parens, r.Expression, r.Operator)); + }, + + Query: function (r : any) { + return P.MonadicParser.alt( + r.Operator, + r.Parens, + r.Expression + ).trim(P.MonadicParser.optWhitespace); + } +}); + +export const transpiler: Transpiler = str => lang.Query.tryParse(str); diff --git a/src/mol-script/transpilers/pymol/properties.ts b/src/mol-script/transpilers/pymol/properties.ts new file mode 100644 index 0000000000000000000000000000000000000000..ddc2fdf8456e25d6788b71019c6c04b03b8094e1 --- /dev/null +++ b/src/mol-script/transpilers/pymol/properties.ts @@ -0,0 +1,154 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { PropertyDict } from '../types'; + +const reFloat = /[-+]?[0-9]*\.?[0-9]+/; +const rePosInt = /[0-9]+/; + +function atomNameListMap(x: string) { return x.split('+').map(B.atomName); } +function listMap(x: string) { return x.split('+').map(x => x.replace(/^["']|["']$/g, '')); } +function rangeMap(x: string) { + const [min, max] = x.split('-').map(x => parseInt(x)); + return { min, max }; +} +function listOrRangeMap(x: string) { + return x.includes('-') ? rangeMap(x) : listMap(x).map(x => parseInt(x)); +} +function elementListMap(x: string) { + return x.split('+').map(B.struct.type.elementSymbol); +} + +const sstrucDict: { [k: string]: string } = { + H: 'helix', + S: 'beta', + L: 'none' +}; +function sstrucListMap(x: string) { + return { + flags: B.struct.type.secondaryStructureFlags( + x.toUpperCase().split('+').map(ss => sstrucDict[ss] || 'none') + ) + }; +} + +export const properties: PropertyDict = { + symbol: { + '@desc': 'chemical-symbol-list: list of 1- or 2-letter chemical symbols from the periodic table', + '@examples': ['symbol O+N'], + abbr: ['e.'], regex: /[a-zA-Z'"+]+/, map: elementListMap, + level: 'atom-test', property: B.acp('elementSymbol') + }, + name: { + '@desc': 'atom-name-list: list of up to 4-letter codes for atoms in proteins or nucleic acids', + '@examples': ['name CA+CB+CG+CD'], + abbr: ['n.'], regex: /[a-zA-Z0-9'"+]+/, map: atomNameListMap, + level: 'atom-test', property: B.ammp('label_atom_id') + }, + resn: { + '@desc': 'residue-name-list: list of 3-letter codes for amino acids or list of up to 2-letter codes for nucleic acids', + '@examples': ['resn ASP+GLU+ASN+GLN', 'resn A+G'], + abbr: ['resname', 'r.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap, + level: 'residue-test', property: B.ammp('label_comp_id') + }, + resi: { + '@desc': 'residue-identifier-list list of up to 4-digit residue numbers or residue-identifier-range', + '@examples': ['resi 1+10+100+1000', 'resi 1-10'], + abbr: ['resident', 'residue', 'resid', 'i.'], regex: /[0-9+-]+/, map: listOrRangeMap, + level: 'residue-test', property: B.ammp('auth_seq_id') + }, + alt: { + '@desc': 'alternate-conformation-identifier-list list of single letters', + '@examples': ['alt A+B', 'alt ""', 'alt ""+A'], + abbr: [], regex: /[a-zA-Z0-9'"+]+/, map: listMap, + level: 'atom-test', property: B.ammp('label_alt_id') + }, + chain: { + '@desc': 'chain-identifier-list list of single letters or sometimes numbers', + '@examples': ['chain A'], + abbr: ['c.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap, + level: 'chain-test', property: B.ammp('auth_asym_id') + }, + segi: { + '@desc': 'segment-identifier-list list of up to 4 letter identifiers', + '@examples': ['segi lig'], + abbr: ['segid', 's.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap, + level: 'chain-test', property: B.ammp('label_asym_id') + }, + flag: { + '@desc': 'flag-number a single integer from 0 to 31', + '@examples': ['flag 0'], + isUnsupported: true, + abbr: ['f.'], regex: /[0-9]+/, map: x => parseInt(x), + level: 'atom-test' + }, + numeric_type: { + '@desc': 'type-number a single integer', + '@examples': ['nt. 5'], + isUnsupported: true, + abbr: ['nt.'], regex: /[0-9]+/, map: x => parseInt(x), + level: 'atom-test' + }, + text_type: { + '@desc': 'type-string a list of up to 4 letter codes', + '@examples': ['text_type HA+HC'], + isUnsupported: true, + abbr: ['tt.'], regex: /[a-zA-Z0-9'"+]+/, map: listMap, + level: 'atom-test' + }, + id: { + '@desc': 'external-index-number a single integer', + '@examples': ['id 23'], + abbr: [], regex: rePosInt, map: x => parseInt(x), + level: 'atom-test', property: B.ammp('id') + }, + index: { + '@desc': 'internal-index-number a single integer', + '@examples': ['index 11'], + isUnsupported: true, + abbr: ['idx.'], regex: rePosInt, map: x => parseInt(x), + level: 'atom-test' + }, + ss: { + '@desc': 'secondary-structure-type list of single letters. Helical regions should be assigned H and sheet regions S. Loop regions can either be assigned L or be blank.', + '@examples': ['ss H+S+L', 'ss S+""'], + abbr: [], regex: /[a-zA-Z'"+]+/, map: sstrucListMap, + level: 'residue-test', property: B.ammp('secondaryStructureFlags') + }, + + b: { + '@desc': 'comparison-operator b-factor-value a real number', + '@examples': ['b > 10'], + isNumeric: true, + abbr: [], regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('B_iso_or_equiv') + }, + q: { + '@desc': 'comparison-operator occupancy-value a real number', + '@examples': ['q <0.50'], + isNumeric: true, + abbr: [], regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('occupancy') + }, + formal_charge: { + '@desc': 'comparison-operator formal charge-value an integer', + '@examples': ['fc. = -1'], + isNumeric: true, + abbr: ['fc.'], regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('pdbx_formal_charge') + }, + partial_charge: { + '@desc': 'comparison-operator partial charge-value a real number', + '@examples': ['pc. > 1'], + isUnsupported: true, + isNumeric: true, + abbr: ['pc.'], regex: reFloat, map: x => parseFloat(x), + level: 'atom-test' + } +}; diff --git a/src/mol-script/transpilers/pymol/symbols.ts b/src/mol-script/transpilers/pymol/symbols.ts new file mode 100644 index 0000000000000000000000000000000000000000..1e02f21bee62f005cc427f177c831b61264936d7 --- /dev/null +++ b/src/mol-script/transpilers/pymol/symbols.ts @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; + +export const Properties: string[] = []; +for (const name in properties) { + if (properties[name].isUnsupported) continue; + Properties.push(name); + if (properties[name].abbr) Properties.push(...properties[name].abbr!); +} + +export const Operators: string[] = []; +operators.forEach(o => { + if (o.isUnsupported) return; + Operators.push(o.name); + if (o.abbr) Operators.push(...o.abbr); +}); + +export const Keywords: string[] = []; +for (const name in keywords) { + if (!keywords[name].map) continue; + Keywords.push(name); + if (keywords[name].abbr) Keywords.push(...keywords[name].abbr!); +} + +export const all = { Properties, Operators: [...Operators, 'of'], Keywords }; diff --git a/src/mol-script/transpilers/transpiler.ts b/src/mol-script/transpilers/transpiler.ts new file mode 100644 index 0000000000000000000000000000000000000000..c38cd620ae265414e7c060b57a1d8a7a471d4b6a --- /dev/null +++ b/src/mol-script/transpilers/transpiler.ts @@ -0,0 +1,10 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { Expression } from '../language/expression'; + +export type Transpiler = (source: string) => Expression diff --git a/src/mol-script/transpilers/types.ts b/src/mol-script/transpilers/types.ts new file mode 100644 index 0000000000000000000000000000000000000000..fc85f86ecd1b9e609fc3c64b0ed7eeb1f44d2946 --- /dev/null +++ b/src/mol-script/transpilers/types.ts @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import * as P from '../../mol-util/monadic-parser'; +import { Expression } from '../language/expression'; +// import Symbol from '../mini-lisp/symbol' + +export interface AtomGroupArgs { + [index: string]: any + 'entity-test'?: Expression + 'chain-test'?: Expression + 'residue-test'?: Expression + 'atom-test'?: Expression + 'groupBy'?: Expression +} + +export interface Keyword { + '@desc': string + abbr?: string[] + map?: () => Expression /* not given means the keyword is unsupported */ +} + +export type KeywordDict = { [name: string]: Keyword } + +export interface Property { + '@desc': string + '@examples': string[] + isUnsupported?: boolean + isNumeric?: boolean + abbr?: string[] + regex: RegExp + map: (s: string) => any + level: 'atom-test' | 'residue-test' | 'chain-test' | 'entity-test' + property?: any /* Symbol */ +} + +export type PropertyDict = { [name: string]: Property } + +export interface Operator { + '@desc': string + '@examples': string[] + name: string + abbr?: string[] + isUnsupported?: boolean + type: (p1: P.MonadicParser<any>, p2: P.MonadicParser<any>, fn: any) => P.MonadicParser<any> + rule: P.MonadicParser<any> + map: (x: any, y: any, z?: any) => Expression | Expression[] +} + +export type OperatorList = Operator[] + +export interface Function { + '@desc': string + '@examples': string[] + map?: (x: any) => Expression /* not given means the keyword is unsupported */ +} + +export type FunctionDict = { [name: string]: Function } diff --git a/src/mol-script/transpilers/vmd/examples.ts b/src/mol-script/transpilers/vmd/examples.ts new file mode 100644 index 0000000000000000000000000000000000000000..77625d325d4832dd6b251984c1e99d99882fb94b --- /dev/null +++ b/src/mol-script/transpilers/vmd/examples.ts @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +export const vmdSelectionsExamples = [{ + name: 'All water residues', + value: 'water' +}, { + name: 'All C-alpha atoms', + value: 'name CA' +}, { + name: 'Residue 35', + value: 'resid 35' +}, { + name: 'C-alpha atoms of ALA', + value: 'name CA and resname ALA' +}, { + name: 'Backbone atoms', + value: 'backbone' +}, { + name: 'Non-protein atoms', + value: 'not protein' +}, { + name: 'Protein backbone or hydrogen atoms', + value: 'protein (backbone or name H)' +}, { + name: 'Atoms heavier than 20', + value: 'mass > 20' +}, { + name: 'Atoms with two bonds', + value: 'numbonds = 2' +}, { + name: 'Atoms with an absolute charge greater 1', + value: 'abs(charge) > 1' +}, { + name: 'Atoms with an x coordinate between -25 and -20', + value: 'x < -20 and x > -25' +}, { + name: 'Helices', + value: 'structure H' +}, { + name: 'Atoms with name "A 1"', + value: "name 'A 1'" +}, { + name: 'Atoms with name "A *"', + value: "name 'A *'" +}, { + name: 'Atoms with names starting with C', + value: 'name "C.*"' +}, { + name: 'Atoms within 10 ang of [25, 15, 10]', + value: 'sqr(x+25)+sqr(y+15)+sqr(z+10) <= sqr(10)' +}, { + name: 'Atoms within 5 ang of iron atoms', + value: 'within 5 of name FE' +}, { + name: 'Atoms around 10 ang of HEM residue', + value: 'exwithin 10 of resname HEM' +}, { + name: 'ALA residues within 15 ang of HEM', + value: 'resname ALA within 15 of resname HEM' +}, { + name: 'All groups that include an iron atom', + value: 'same resid as name FE' +}, { + name: 'Atoms with mass between 12 and 17.5', + value: 'mass 12 to 17.5' +}, { + name: 'Residues 60, 80, 90 and 142', + value: 'resid 60 80 90 142' +}/* , { + name: 'Residues ala, arg, asn, asp, cys, and tyr', + value: 'resname ALA to CYS TYR' +}*/]; \ No newline at end of file diff --git a/src/mol-script/transpilers/vmd/functions.ts b/src/mol-script/transpilers/vmd/functions.ts new file mode 100644 index 0000000000000000000000000000000000000000..a53642639260d69ab294f3540b47c405d119ddb8 --- /dev/null +++ b/src/mol-script/transpilers/vmd/functions.ts @@ -0,0 +1,102 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { FunctionDict } from '../types'; + +// import * as P from 'parsimmon' +// import * as h from '../helper' +// import { Expression } from '../../language/expression'; + +export const functions: FunctionDict = { + 'sqr': { + '@desc': 'square of x', + '@examples': ['sqr(2)'], + map: x => B.core.math.pow([x, 2]), + }, + 'sqrt': { + '@desc': 'square root of x', + '@examples': ['sqrt(2)'], + map: x => B.core.math.sqrt([x]), + }, + 'abs': { + '@desc': 'absolute value of x', + '@examples': ['abs(2)'], + map: x => B.core.math.abs([x]), + }, + 'floor': { + '@desc': 'largest integer not greater than x', + '@examples': ['floor(2)'], + map: x => B.core.math.floor([x]), + }, + 'ceil': { + '@desc': 'smallest integer not less than x', + '@examples': ['ceil(2)'], + map: x => B.core.math.ceil([x]), + }, + 'sin': { + '@desc': 'sine of x', + '@examples': ['sin(2)'], + map: x => B.core.math.sin([x]), + }, + 'cos': { + '@desc': 'cosine of x', + '@examples': ['cos(2)'], + map: x => B.core.math.cos([x]), + }, + 'tan': { + '@desc': 'tangent of x', + '@examples': ['tan(2)'], + map: x => B.core.math.tan([x]), + }, + 'atan': { + '@desc': 'arctangent of x', + '@examples': ['atan(2)'], + map: x => B.core.math.atan([x]), + }, + 'asin': { + '@desc': 'arcsin of x', + '@examples': ['asin(2)'], + map: x => B.core.math.asin([x]), + }, + 'acos': { + '@desc': 'arccos of x', + '@examples': ['acos(2)'], + map: x => B.core.math.acos([x]), + }, + 'sinh': { + '@desc': 'hyperbolic sine of x', + '@examples': ['sinh(2)'], + map: x => B.core.math.sinh([x]), + }, + 'cosh': { + '@desc': 'hyperbolic cosine of x', + '@examples': ['cosh(2)'], + map: x => B.core.math.cosh([x]), + }, + 'tanh': { + '@desc': 'hyperbolic tangent of x', + '@examples': ['tanh(2)'], + map: x => B.core.math.tanh([x]), + }, + 'exp': { + '@desc': 'e to the power x', + '@examples': ['exp(2)'], + map: x => B.core.math.exp([x]), + }, + 'log': { + '@desc': 'natural log of x', + '@examples': ['log(2)'], + map: x => B.core.math.log([x]), + }, + 'log10': { + '@desc': 'log base 10 of x', + '@examples': ['log10(2)'], + map: x => B.core.math.log10([x]), + } +}; diff --git a/src/mol-script/transpilers/vmd/keywords.ts b/src/mol-script/transpilers/vmd/keywords.ts new file mode 100644 index 0000000000000000000000000000000000000000..16c27e4118f77ab97488158671c81ee857b36085 --- /dev/null +++ b/src/mol-script/transpilers/vmd/keywords.ts @@ -0,0 +1,302 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import * as h from '../helper'; +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { KeywordDict } from '../types'; + +function proteinExpr() { + return B.struct.filter.pick({ + 0: B.struct.generator.atomGroups({ + 'group-by': B.ammp('residueKey') + }), + test: B.core.set.isSubset([ + h.atomNameSet(['C', 'N', 'CA', 'O']), + B.ammpSet('label_atom_id') + ]) + }); +} + +function nucleicExpr() { + return B.struct.filter.pick({ + 0: B.struct.generator.atomGroups({ + 'group-by': B.ammp('residueKey') + }), + test: B.core.logic.and([ + B.core.set.isSubset([ + // B.core.type.set([ 'P', 'O1P', 'O2P' ]), + h.atomNameSet(['P']), + B.ammpSet('label_atom_id') + ]), + B.core.logic.or([ + B.core.set.isSubset([ + h.atomNameSet(["O3'", "C3'", "C4'", "C5'", "O5'"]), + B.ammpSet('label_atom_id') + ]), + B.core.set.isSubset([ + h.atomNameSet(['O3*', 'C3*', 'C4*', 'C5*', 'O5*']), + B.ammpSet('label_atom_id') + ]) + ]) + ]) + }); +} + +function backboneExpr() { + return B.struct.combinator.merge([ + B.struct.generator.queryInSelection({ + 0: proteinExpr(), + query: B.struct.generator.atomGroups({ + 'atom-test': B.core.set.has([ + h.atomNameSet(Backbone.protein), + B.ammp('label_atom_id') + ]) + }) + }), + B.struct.generator.queryInSelection({ + 0: nucleicExpr(), + query: B.struct.generator.atomGroups({ + 'atom-test': B.core.set.has([ + h.atomNameSet(Backbone.nucleic), + B.ammp('label_atom_id') + ]) + }) + }) + ]); +} + +function secStrucExpr(flags: string[]) { + return B.struct.generator.atomGroups({ + 'residue-test': B.core.flags.hasAll([ + B.ammp('secondaryStructureFlags'), + B.struct.type.secondaryStructureFlags(flags) + ]) + }); +} + +const Backbone = { + nucleic: ['P', "O3'", "O5'", "C5'", "C4'", "C3'", 'OP1', 'OP2', 'O3*', 'O5*', 'C5*', 'C4*', 'C3*'], + protein: ['C', 'N', 'CA', 'O'] +}; + +const ResDict = { + acidic: ['ASP', 'GLU'], + aliphatic: ['ALA', 'GLY', 'ILE', 'LEU', 'VAL'], + aromatic: ['HIS', 'PHE', 'TRP', 'TYR'], + at: ['ADA', 'A', 'THY', 'T'], + basic: ['ARG', 'HIS', 'LYS'], + buried: ['ALA', 'LEU', 'VAL', 'ILE', 'PHE', 'CYS', 'MET', 'TRP'], + cg: ['CYT', 'C', 'GUA', 'G'], + cyclic: ['HIS', 'PHE', 'PRO', 'TRP', 'TYR'], + hydrophobic: ['ALA', 'LEU', 'VAL', 'ILE', 'PRO', 'PHE', 'MET', 'TRP'], + medium: ['VAL', 'THR', 'ASP', 'ASN', 'PRO', 'CYS', 'ASX', 'PCA', 'HYP'], + neutral: ['VAL', 'PHE', 'GLN', 'TYR', 'HIS', 'CYS', 'MET', 'TRP', 'ASX', 'GLX', 'PCA', 'HYP'], + purine: ['ADE', 'A', 'GUA', 'G'], + pyrimidine: ['CYT', 'C', 'THY', 'T', 'URI', 'U'], + small: ['ALA', 'GLY', 'SER'], + water: ['H2O', 'HH0', 'OHH', 'HOH', 'OH2', 'SOL', 'WAT', 'TIP', 'TIP2', 'TIP3', 'TIP4'] +}; + +export const keywords: KeywordDict = { + all: { + '@desc': 'everything', + map: () => B.struct.generator.atomGroups() + }, + none: { + '@desc': 'nothing', + map: () => B.struct.generator.empty() + }, + protein: { + '@desc': 'a residue with atoms named C, N, CA, and O', + map: () => proteinExpr() + }, + nucleic: { + '@desc': "a residue with atoms named P, O1P, O2P and either O3', C3', C4', C5', O5' or O3*, C3*, C4*, C5*, O5*. This definition assumes that the base is phosphorylated, an assumption which will be corrected in the future.", + map: () => nucleicExpr() + }, + backbone: { + '@desc': 'the C, N, CA, and O atoms of a protein and the equivalent atoms in a nucleic acid.', + map: () => backboneExpr() + }, + sidechain: { + '@desc': 'non-backbone atoms and bonds', // TODO: what does 'bonds' mean here? + map: () => h.invertExpr(backboneExpr()) + }, + water: { + '@desc': 'all atoms with the resname H2O, HH0, OHH, HOH, OH2, SOL, WAT, TIP, TIP2, TIP3 or TIP4', + abbr: ['waters'], + map: () => h.resnameExpr(ResDict.water) + }, + at: { + '@desc': 'residues named ADA A THY T', + map: () => h.resnameExpr(ResDict.at) + }, + acidic: { + '@desc': 'residues named ASP GLU', + map: () => h.resnameExpr(ResDict.acidic) + }, + acyclic: { + '@desc': '"protein and not cyclic"', + map: () => B.struct.modifier.intersectBy({ + 0: proteinExpr(), + by: h.invertExpr(h.resnameExpr(ResDict.cyclic)) + }) + }, + aliphatic: { + '@desc': 'residues named ALA GLY ILE LEU VAL', + map: () => h.resnameExpr(ResDict.aliphatic) + }, + alpha: { + '@desc': "atom's residue is an alpha helix", + map: () => secStrucExpr(['alpha']) + }, + amino: { + '@desc': 'a residue with atoms named C, N, CA, and O', + map: () => proteinExpr() + }, + aromatic: { + '@desc': 'residues named HIS PHE TRP TYR', + map: () => h.resnameExpr(ResDict.aromatic) + }, + basic: { + '@desc': 'residues named ARG HIS LYS', + map: () => h.resnameExpr(ResDict.basic) + }, + bonded: { + '@desc': 'atoms for which numbonds > 0', + map: () => h.asAtoms(B.struct.filter.pick({ + '0': B.struct.modifier.includeConnected({ + '0': B.struct.generator.atomGroups(), + 'bond-test': B.core.flags.hasAny([ + B.struct.bondProperty.flags(), + B.struct.type.bondFlags(['covalent', 'metallic', 'sulfide']) + ]) + }), + test: B.core.rel.gr([ + B.struct.atomSet.atomCount(), 1 + ]) + })) + }, + buried: { + '@desc': 'residues named ALA LEU VAL ILE PHE CYS MET TRP', + map: () => h.resnameExpr(ResDict.buried) + }, + cg: { + '@desc': 'residues named CYT C GUA G', + map: () => h.resnameExpr(ResDict.cg) + }, + charged: { + '@desc': '"basic or acidic"', + map: () => h.resnameExpr(ResDict.basic.concat(ResDict.acidic)) + }, + cyclic: { + '@desc': 'residues named HIS PHE PRO TRP TYR', + map: () => h.resnameExpr(ResDict.cyclic) + }, + hetero: { + '@desc': '"not (protein or nucleic)"', + map: () => h.invertExpr( + B.struct.combinator.merge([proteinExpr(), nucleicExpr()]) + ) + }, + hydrogen: { + '@desc': 'name "[0-9]?H.*"', + map: () => B.struct.generator.atomGroups({ + 'atom-test': B.core.str.match([ + B.core.type.regex(['^[0-9]?[H].*$', 'i']), + B.core.type.str([B.ammp('label_atom_id')]) + ]) + }) + }, + large: { + '@desc': '"protein and not (small or medium)"', + map: () => B.struct.modifier.intersectBy({ + 0: proteinExpr(), + by: h.invertExpr( + h.resnameExpr(ResDict.small.concat(ResDict.medium)) + ) + }) + }, + medium: { + '@desc': 'residues named VAL THR ASP ASN PRO CYS ASX PCA HYP', + map: () => h.resnameExpr(ResDict.medium) + }, + neutral: { + '@desc': 'residues named VAL PHE GLN TYR HIS CYS MET TRP ASX GLX PCA HYP', + map: () => h.resnameExpr(ResDict.neutral) + }, + hydrophobic: { + '@desc': 'hydrophobic resname ALA LEU VAL ILE PRO PHE MET TRP', + map: () => h.resnameExpr(ResDict.hydrophobic) + }, + polar: { + '@desc': '"protein and not hydrophobic"', + map: () => B.struct.modifier.intersectBy({ + 0: proteinExpr(), + by: h.invertExpr(h.resnameExpr(ResDict.hydrophobic)) + }) + }, + purine: { + '@desc': 'residues named ADE A GUA G', + map: () => h.resnameExpr(ResDict.purine) + }, + pyrimidine: { + '@desc': 'residues named CYT C THY T URI U', + map: () => h.resnameExpr(ResDict.pyrimidine) + }, + small: { + '@desc': 'residues named ALA GLY SER', + map: () => h.resnameExpr(ResDict.small) + }, + surface: { + '@desc': '"protein and not buried"', + map: () => B.struct.modifier.intersectBy({ + 0: proteinExpr(), + by: h.invertExpr(h.resnameExpr(ResDict.buried)) + }) + }, + alpha_helix: { + '@desc': "atom's residue is in an alpha helix", + map: () => secStrucExpr(['alpha']) + }, + pi_helix: { + '@desc': "atom's residue is in a pi helix", + map: () => secStrucExpr(['pi']) + }, + helix_3_10: { + '@desc': "atom's residue is in a 3-10 helix", + map: () => secStrucExpr(['3-10']) + }, + helix: { + '@desc': "atom's residue is in an alpha or pi or 3-10 helix", + map: () => secStrucExpr(['helix']) + }, + extended_beta: { + '@desc': "atom's residue is a beta sheet", + map: () => secStrucExpr(['sheet']) + }, + bridge_beta: { + '@desc': "atom's residue is a beta sheet", + map: () => secStrucExpr(['strand']) + }, + sheet: { + '@desc': "atom's residue is a beta sheet", + map: () => secStrucExpr(['beta']) + }, + turn: { + '@desc': "atom's residue is in a turn conformation", + map: () => secStrucExpr(['turn']) + }, + coil: { + '@desc': "atom's residue is in a coil conformation", + map: () => B.struct.modifier.intersectBy({ + 0: proteinExpr(), + by: secStrucExpr(['none']) + }) + } +}; diff --git a/src/mol-script/transpilers/vmd/markdown-docs.ts b/src/mol-script/transpilers/vmd/markdown-docs.ts new file mode 100644 index 0000000000000000000000000000000000000000..93a1851963c2e197b201f42dade461d4b66c108c --- /dev/null +++ b/src/mol-script/transpilers/vmd/markdown-docs.ts @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; +import { functions } from './functions'; + +const _docs: string[] = [ + 'VMD', + '============', + '--------------------------------', + '' +]; + +_docs.push(`## Properties\n\n`); +_docs.push('--------------------------------\n'); +for (const name in properties) { + if (properties[name].isUnsupported) continue; + + const names = [name]; + if (properties[name].abbr) names.push(...properties[name].abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (properties[name]['@desc']) { + _docs.push(`*${properties[name]['@desc']}*\n`); + } +} + +_docs.push(`## Operators\n\n`); +_docs.push('--------------------------------\n'); +operators.forEach(o => { + if (o.isUnsupported) return; + + const names = [o.name]; + if (o.abbr) names.push(...o.abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (o['@desc']) { + _docs.push(`*${o['@desc']}*\n`); + } +}); + +_docs.push(`## Keywords\n\n`); +_docs.push('--------------------------------\n'); +for (const name in keywords) { + if (!keywords[name].map) continue; + + const names = [name]; + if (keywords[name].abbr) names.push(...keywords[name].abbr!); + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (keywords[name]['@desc']) { + _docs.push(`*${keywords[name]['@desc']}*\n`); + } +} + +_docs.push(`## Functions\n\n`); +_docs.push('--------------------------------\n'); +for (const name in functions) { + if (!functions[name].map) continue; + + const names = [name]; + _docs.push(`\`\`\`\n${names.join(', ')}\n\`\`\`\n`); + + if (functions[name]['@desc']) { + _docs.push(`*${functions[name]['@desc']}*\n`); + } +} + +export const docs = _docs.join('\n'); \ No newline at end of file diff --git a/src/mol-script/transpilers/vmd/operators.ts b/src/mol-script/transpilers/vmd/operators.ts new file mode 100644 index 0000000000000000000000000000000000000000..b347b98c84cc1d481a1940886320f71ea9394913 --- /dev/null +++ b/src/mol-script/transpilers/vmd/operators.ts @@ -0,0 +1,83 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import * as P from '../../../mol-util/monadic-parser'; +import * as h from '../helper'; +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { properties } from './properties'; +import { Expression } from '../../language/expression'; +import { OperatorList } from '../types'; + +const propNames = Object.keys(properties).sort(h.strLenSortFn) + .filter(name => !properties[name].isUnsupported).join('|'); + +export const operators: OperatorList = [ + { + '@desc': 'Selects atoms that are not included in s1.', + '@examples': ['not protein'], + name: 'not', + type: h.prefix, + rule: P.MonadicParser.regexp(/NOT/i).skip(P.MonadicParser.whitespace), + map: (op, selection) => h.invertExpr(selection), + }, + { + '@desc': 'Selects atoms within a specified distance of a selection', + '@examples': ['within 5 of name FE'], + name: 'within', + type: h.prefix, + rule: h.prefixOp(/WITHIN\s+([-+]?[0-9]*\.?[0-9]+)\s+OF/i, 1).map(x => parseFloat(x)), + map: (radius: number, selection: Expression) => { + return B.struct.modifier.includeSurroundings({ 0: selection, radius }); + } + }, + { + '@desc': 'Exclusive within, equivalent to (within 3 of X) and not X', + '@examples': ['exwithin 10 of resname HEM'], + name: 'exwithin', + type: h.prefix, + rule: h.prefixOp(/EXWITHIN\s+([-+]?[0-9]*\.?[0-9]+)\s+OF/i, 1).map(x => parseFloat(x)), + map: (radius: number, target: Expression) => { + return B.struct.modifier.exceptBy({ + '0': B.struct.filter.within({ + '0': B.struct.generator.atomGroups(), target, 'max-radius': radius + }), + by: target + }); + } + }, + { + '@desc': 'Selects atoms which have the same keyword as the atoms in a given selection', + '@examples': ['same resid as name FE'], + name: 'same', + type: h.prefix, + rule: h.prefixOp(new RegExp(`SAME\\s+(${propNames})\\s+AS`, 'i'), 1).map(x => properties[x].property), + map: (property: Expression, source: Expression) => { + return B.struct.filter.withSameAtomProperties({ + '0': B.struct.generator.atomGroups(), + source, + property + }); + } + }, + { + '@desc': 'Selects atoms included in both s1 and s2.', + '@examples': ['backbone and protein'], + name: 'and', + type: h.binaryLeft, + rule: P.MonadicParser.alt(h.infixOp(/AND/i), P.MonadicParser.whitespace), + map: (op, selection, by) => B.struct.modifier.intersectBy({ 0: selection, by }) + }, + { + '@desc': 'Selects atoms included in either s1 or s2.', + '@examples': ['water or protein'], + name: 'or', + type: h.binaryLeft, + rule: h.infixOp(/OR/i), + map: (op, s1, s2) => B.struct.combinator.merge([s1, s2]) + } +]; diff --git a/src/mol-script/transpilers/vmd/parser.ts b/src/mol-script/transpilers/vmd/parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..cadcd91d5e89c01c7544b5daba938c9c1dcd7654 --- /dev/null +++ b/src/mol-script/transpilers/vmd/parser.ts @@ -0,0 +1,251 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import * as P from '../../../mol-util/monadic-parser'; +import * as h from '../helper'; +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { sstrucMap, sstrucDict, properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; +import { functions } from './functions'; +import { OperatorList } from '../types'; +import { Transpiler } from '../transpiler'; + +// const propertiesDict = h.getPropertyRules(properties) + +// <, <=, = or ==, >=, >, and != +// lt, le, eq, ge, gt, and ne, =~ +const valueOperators: OperatorList = [ + { + '@desc': 'multiplication, division', + '@examples': [], + name: 'mul-div', + type: h.binaryLeft, + rule: P.MonadicParser.regexp(/\s*(\*|\/)\s*/, 1), + map: (op, e1, e2) => { + switch (op) { + case '*': return B.core.math.mult([e1, e2]); + case '/': return B.core.math.div([e1, e2]); + default: throw new Error(`value operator '${op}' not supported`); + } + } + }, + { + '@desc': 'addition, substraction', + '@examples': [], + name: 'add-sub', + type: h.binaryLeft, + rule: P.MonadicParser.regexp(/\s*(-|\+)\s*/, 1), + map: (op, e1, e2) => { + switch (op) { + case '-': return B.core.math.sub([e1, e2]); + case '+': return B.core.math.add([e1, e2]); + default: throw new Error(`value operator '${op}' not supported`); + } + } + }, + { + '@desc': 'value comparisons', + '@examples': [], + name: 'comparison', + type: h.binaryLeft, + rule: P.MonadicParser.alt(P.MonadicParser.regexp(/\s*(=~|==|>=|<=|=|!=|>|<)\s*/, 1), P.MonadicParser.whitespace.result('=')), + map: (op, e1, e2) => { + // console.log(op, e1, e2) + let expr; + if (e1.head === 'structure.atom-property.macromolecular.secondary-structure-flags') { + expr = B.core.flags.hasAny([e1, sstrucMap(e2)]); + } else if (e2.head === 'structure.atom-property.macromolecular.secondary-structure-flags') { + expr = B.core.flags.hasAny([e2, sstrucMap(e1)]); + } else if (e1.head === 'core.type.regex') { + expr = B.core.str.match([e1, B.core.type.str([e2])]); + } else if (e2.head === 'core.type.regex') { + expr = B.core.str.match([e2, B.core.type.str([e1])]); + } else if (op === '=~') { + if (e1.head) { + expr = B.core.str.match([ + B.core.type.regex([`^${e2}$`, 'i']), + B.core.type.str([e1]) + ]); + } else { + expr = B.core.str.match([ + B.core.type.regex([`^${e1}$`, 'i']), + B.core.type.str([e2]) + ]); + } + } + if (!expr) { + if (e1.head) e2 = h.wrapValue(e1, e2); + if (e2.head) e1 = h.wrapValue(e2, e1); + switch (op) { + case '=': + case '==': + expr = B.core.rel.eq([e1, e2]); + break; + case '!=': + expr = B.core.rel.neq([e1, e2]); + break; + case '>': + expr = B.core.rel.gr([e1, e2]); + break; + case '<': + expr = B.core.rel.lt([e1, e2]); + break; + case '>=': + expr = B.core.rel.gre([e1, e2]); + break; + case '<=': + expr = B.core.rel.lte([e1, e2]); + break; + default: throw new Error(`value operator '${op}' not supported`); + } + } + return B.struct.generator.atomGroups({ 'atom-test': expr }); + } + } +]; + +const lang = P.MonadicParser.createLanguage({ + Parens: function (r:any) { + return P.MonadicParser.alt( + r.Parens, + r.Operator, + r.Expression + ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')')); + }, + + Expression: function (r:any) { + return P.MonadicParser.alt( + r.RangeListProperty, + r.ValueQuery, + r.Keywords, + ); + }, + + Keywords: () => P.MonadicParser.alt(...h.getKeywordRules(keywords)), + + ValueRange: function (r:any) { + return P.MonadicParser.seq( + r.Value + .skip(P.MonadicParser.regexp(/\s+TO\s+/i)), + r.Value + ).map(x => ({ range: x })); + }, + + RangeListProperty: function (r:any) { + return P.MonadicParser.seq( + P.MonadicParser.alt(...h.getPropertyNameRules(properties, /\s/)) + .skip(P.MonadicParser.whitespace), + P.MonadicParser.alt( + r.ValueRange, + r.Value + ).sepBy1(P.MonadicParser.whitespace) + ).map(x => { + const [property, values] = x; + const listValues: (string | number)[] = []; + const rangeValues: any[] = []; + + values.forEach((v:any) => { + if (v.range) { + rangeValues.push( + B.core.rel.inRange([property, v.range[0], v.range[1]]) + ); + } else { + listValues.push(h.wrapValue(property, v, sstrucDict)); + } + }); + + const rangeTest = h.orExpr(rangeValues); + const listTest = h.valuesTest(property, listValues); + + let test; + if (rangeTest && listTest) { + test = B.core.logic.or([rangeTest, listTest]); + } else { + test = rangeTest ? rangeTest : listTest; + } + + return B.struct.generator.atomGroups({ [h.testLevel(property)]: test }); + }); + }, + + Operator: function (r:any) { + return h.combineOperators(operators, P.MonadicParser.alt(r.Parens, r.Expression, r.ValueQuery)); + }, + + Query: function (r:any) { + return P.MonadicParser.alt( + r.Operator, + r.Parens, + r.Expression + ).trim(P.MonadicParser.optWhitespace); + }, + + Number: function () { + return P.MonadicParser.regexp(/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/) + .map(Number) + .desc('number'); + }, + + String: function () { + const w = h.getReservedWords(properties, keywords, operators) + .sort(h.strLenSortFn).map(h.escapeRegExp).join('|'); + return P.MonadicParser.alt( + P.MonadicParser.regexp(new RegExp(`(?!(${w}))[A-Z0-9_]+`, 'i')), + P.MonadicParser.regexp(/'((?:[^"\\]|\\.)*)'/, 1), + P.MonadicParser.regexp(/"((?:[^"\\]|\\.)*)"/, 1).map(x => B.core.type.regex([`^${x}$`, 'i'])) + ).desc('string'); + }, + + Value: function (r:any) { + return P.MonadicParser.alt(r.Number, r.String); + }, + + ValueParens: function (r:any) { + return P.MonadicParser.alt( + r.ValueParens, + r.ValueOperator, + r.ValueExpressions + ).wrap(P.MonadicParser.string('('), P.MonadicParser.string(')')); + }, + + ValuePropertyNames: function () { + return P.MonadicParser.alt(...h.getPropertyNameRules(properties, /=~|==|>=|<=|=|!=|>|<|\)|\s|\+|-|\*|\//i)); + }, + + ValueOperator: function (r:any) { + return h.combineOperators(valueOperators, P.MonadicParser.alt(r.ValueParens, r.ValueExpressions)); + }, + + ValueExpressions: function (r:any) { + return P.MonadicParser.alt( + r.ValueFunctions, + r.Value, + r.ValuePropertyNames + ); + }, + + ValueFunctions: function (r:any) { + return P.MonadicParser.alt(...h.getFunctionRules(functions, r.ValueOperator)); + }, + + ValueQuery: function (r:any) { + return P.MonadicParser.alt( + r.ValueOperator.map((x:any) => { + // if (!x.head || x.head.startsWith('core.math') || x.head.startsWith('structure.atom-property')) { + if (!x.head || !x.head.startsWith('structure.generator')) { + throw new Error(`values must be part of an comparison, value '${x}'`); + } else { + return x as any; + } + }) + ); + } +}); + +export const transpiler: Transpiler = str => lang.Query.tryParse(str); diff --git a/src/mol-script/transpilers/vmd/properties.ts b/src/mol-script/transpilers/vmd/properties.ts new file mode 100644 index 0000000000000000000000000000000000000000..306b9e152265bfe8d914cf5d8087ae31ce314ce8 --- /dev/null +++ b/src/mol-script/transpilers/vmd/properties.ts @@ -0,0 +1,267 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { MolScriptBuilder } from '../../../mol-script/language/builder'; +const B = MolScriptBuilder; +import { PropertyDict } from '../types'; + +const reFloat = /[-+]?[0-9]*\.?[0-9]+/; +const rePosInt = /[+]?[0-9]+/; +const reInt = /[-+]?[0-9]+/; + +function str(x: string) { return x; } + +export const sstrucDict: { [key: string]: string } = { + T: 'turn', // Turn + E: 'sheet', // Extended conformation ($\beta$ sheets) + B: 'strand', // Isolated bridge + H: 'alpha', // Alpha helix + G: '3-10', // 3-10 helix + I: 'pi', // Pi helix + C: 'none', // Coil +}; +export function sstrucMap(x: string) { + return B.struct.type.secondaryStructureFlags( + [sstrucDict[x.toUpperCase()] || 'none'] + ); +} + +export const properties: PropertyDict = { + name: { + '@desc': 'str atom name', + '@examples': ['name CA'], + regex: /[a-zA-Z0-9]+/, map: B.atomName, + level: 'atom-test', property: B.ammp('label_atom_id') + }, + type: { + '@desc': 'str atom type', + '@examples': ['type C3'], + isUnsupported: true, + regex: /[a-zA-Z0-9]+/, map: str, + level: 'atom-test' + }, + index: { + '@desc': 'num the atom number, starting at 0', + '@examples': ['index 10'], + isNumeric: true, + regex: rePosInt, map: x => (parseInt(x) - 1), + level: 'atom-test', property: B.ammp('id') + }, + serial: { + '@desc': 'num the atom number, starting at 1', + '@examples': ['serial 11'], + isNumeric: true, + regex: rePosInt, map: x => parseInt(x), + level: 'atom-test', property: B.ammp('id') + }, + atomicnumber: { + '@desc': 'num atomic number (0 if undefined)', + '@examples': ['atomicnumber 13'], + isNumeric: true, + regex: rePosInt, map: x => parseInt(x), + level: 'atom-test', property: B.acp('atomicNumber') + }, + element: { + '@desc': 'str atomic element symbol string ("X" if undefined)', + '@examples': ['element N'], + regex: /[a-zA-Z0-9]{1,3}/, map: x => B.es(x), + level: 'atom-test', property: B.acp('elementSymbol') + }, + altloc: { + '@desc': 'str alternate location/conformation identifier', + '@examples': ['altloc C'], + regex: /[a-zA-Z0-9]+/, map: str, + level: 'atom-test', property: B.ammp('label_alt_id') + }, + chain: { + '@desc': 'str the one-character chain identifier', + '@examples': ['chain A'], + regex: /[a-zA-Z0-9]+/, map: str, + level: 'residue-test', property: B.ammp('auth_asym_id') + }, + residue: { + '@desc': 'num a set of connected atoms with the same residue number', + '@examples': ['residue < 11', 'residue 11'], + isNumeric: true, + regex: reInt, map: x => parseInt(x), + level: 'residue-test', property: B.ammp('auth_seq_id') + }, + fragment: { + '@desc': 'num a set of connected residues', + '@examples': ['fragment 42'], + isUnsupported: true, + isNumeric: true, + regex: reInt, map: x => parseInt(x), + level: 'residue-test' + }, + pfrag: { + '@desc': 'num a set of connected protein residues', + '@examples': ['pfrag 42'], + isUnsupported: true, + isNumeric: true, + regex: reInt, map: x => parseInt(x), + level: 'residue-test' + }, + nfrag: { + '@desc': 'num a set of connected nucleic residues', + '@examples': ['nfrag 42'], + isUnsupported: true, + isNumeric: true, + regex: reInt, map: x => parseInt(x), + level: 'residue-test' + }, + sequence: { + '@desc': 'str a sequence given by one letter names', + '@examples': ['sequence PGATTACA'], + isUnsupported: true, + regex: /[a-zA-Z0-9]+/, map: str, + level: 'residue-test' + }, + numbonds: { + '@desc': 'num number of bonds', + '@examples': ['numbonds = 2', 'numbonds >= 3'], + isNumeric: true, + regex: rePosInt, map: x => parseInt(x), + level: 'atom-test', property: B.acp('bondCount') + }, + resname: { + '@desc': 'str residue name', + '@examples': ['resname ALA'], + regex: /[a-zA-Z0-9]+/, map: str, + level: 'residue-test', property: B.ammp('auth_comp_id') + }, + resid: { + '@desc': 'num residue id', + '@examples': ['resid 42'], + isNumeric: true, + regex: reInt, map: x => parseInt(x), + level: 'residue-test', property: B.ammp('auth_seq_id') + }, + segname: { + '@desc': 'str segment name', + '@examples': ['segname B'], + regex: /[a-zA-Z0-9]+/, map: str, + level: 'residue-test', property: B.ammp('label_asym_id') + }, + x: { + '@desc': 'float x coordinate', + '@examples': ['x 42'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.acp('x') + }, + y: { + '@desc': 'float y coordinate', + '@examples': ['y > 1.7'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.acp('y') + }, + z: { + '@desc': 'float z coordinate', + '@examples': ['z < 11', 'z > -21'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.acp('z') + }, + radius: { + '@desc': 'float atomic radius', + '@examples': ['radius > 1.3'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.acp('vdw') + }, + mass: { + '@desc': 'float atomic mass', + '@examples': ['mass > 2'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.acp('mass') + }, + charge: { + '@desc': 'float atomic charge', + '@examples': ['charge > 0', 'charge 1'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('pdbx_formal_charge') + }, + beta: { + '@desc': 'float temperature factor', + '@examples': ['beta < 20', 'beta > 35'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('B_iso_or_equiv') + }, + occupancy: { + '@desc': 'float occupancy', + '@examples': ['occupancy 1', 'occupancy < 1'], + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test', property: B.ammp('occupancy') + }, + user: { + '@desc': 'float time-varying user-specified value', + '@examples': ['user < 0.1'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'atom-test' + }, + rasmol: { + '@desc': 'str translates Rasmol selection string to VMD', + '@examples': ["rasmol 'all'"], + isUnsupported: true, + regex: /[^']*/, map: str, + level: 'atom-test' + }, + structure: { + '@desc': 'str single letter name for the secondary structure', + '@examples': ['structure H', 'structure H E'], + regex: /T|E|B|H|G|I|C/i, map: sstrucMap, + level: 'atom-test', property: B.ammp('secondaryStructureFlags') + }, + phi: { + '@desc': 'float phi backbone conformational angles', + '@examples': ['phi < 160'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'residue-test' + }, + psi: { + '@desc': 'float psi backbone conformational angles', + '@examples': ['psi < 160'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseFloat(x), + level: 'residue-test' + }, + ufx: { + '@desc': 'num force to apply in the x coordinate', + '@examples': ['ufx 1'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseInt(x), + level: 'atom-test' + }, + ufy: { + '@desc': 'num force to apply in the y coordinate', + '@examples': ['ufy 1'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseInt(x), + level: 'atom-test' + }, + ufz: { + '@desc': 'num force to apply in the z coordinate', + '@examples': ['ufz 1'], + isUnsupported: true, + isNumeric: true, + regex: reFloat, map: x => parseInt(x), + level: 'atom-test' + }, +}; diff --git a/src/mol-script/transpilers/vmd/symbols.ts b/src/mol-script/transpilers/vmd/symbols.ts new file mode 100644 index 0000000000000000000000000000000000000000..bc6da7e1ac0969bec658c54556c63017cd5e1ccc --- /dev/null +++ b/src/mol-script/transpilers/vmd/symbols.ts @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com> + */ + +import { properties } from './properties'; +import { operators } from './operators'; +import { keywords } from './keywords'; +import { functions } from './functions'; + +export const Properties: string[] = []; +for (const name in properties) { + if (properties[name].isUnsupported) continue; + Properties.push(name); + if (properties[name].abbr) Properties.push(...properties[name].abbr!); +} + +export const Operators: string[] = []; +operators.forEach(o => { + if (o.isUnsupported) return; + Operators.push(o.name); + if (o.abbr) Operators.push(...o.abbr); +}); + +export const Keywords: string[] = []; +for (const name in keywords) { + if (!keywords[name].map) continue; + Keywords.push(name); + if (keywords[name].abbr) Keywords.push(...keywords[name].abbr!); +} + +export const Functions: string[] = []; +for (const name in functions) { + if (!functions[name].map) continue; + Functions.push(name); +} + +export const all = { Properties, Operators: [...Operators, ...Functions], Keywords }; diff --git a/src/mol-util/monadic-parser.ts b/src/mol-util/monadic-parser.ts index 0bf8004d303558b95bc2c707cf75e7c3b66763d0..41f3f844e0e91556044a966b8d9b55c532073ab6 100644 --- a/src/mol-util/monadic-parser.ts +++ b/src/mol-util/monadic-parser.ts @@ -20,6 +20,7 @@ export class MonadicParser<A> { return { success: false, index: makeLineColumnIndex(input, result.furthest), expected: result.expected }; }; + tryParse(str: string) { const result = this.parse(str); if (result.success) { @@ -117,6 +118,7 @@ export class MonadicParser<A> { return MonadicParser.seq(this.times(n), this.many()).map(r => [...r[0], ...r[1]]); }; + map<B>(f: (a: A) => B): MonadicParser<B> { return new MonadicParser((input, i) => { const result = this._(input, i); @@ -234,15 +236,36 @@ export namespace MonadicParser { export type Result<T> = Success<T> | Failure - // export function createLanguage(parsers: any) { - // const language: any = {}; - // for (const key of Object.keys(parsers)) { - // (function (key) { - // language[key] = lazy(() => parsers[key](language)); - // })(key); - // } - // return language; - // } + //export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> { + //export function seq(...parsers: MonadicParser<any>[]): MonadicParser<any[]> { + //export function seq<A, B, C>(a: MonadicParser<A>, b: MonadicParser<B>, c: MonadicParser<C>): MonadicParser<[A, B, C]> +// export function alt(...parsers: MonadicParser<any>[]): MonadicParser<any> { + // const numParsers = parsers.length; + // if (numParsers === 0) { + // return fail('zero alternates'); +// } + + export function seqMap( a: MonadicParser<any>,b:MonadicParser<any>,c:any) { + var args = [].slice.call(arguments); + if (args.length === 0) { + throw new Error("seqMap needs at least one argument"); + } + var mapper = args.pop(); + assertFunction(mapper); + return seq.apply(null, args).map(function(results: any) { + return mapper.apply(null, results); + }); + } + + export function createLanguage(parsers: any) { + const language: any = {}; + for (const key of Object.keys(parsers)) { + (function (key) { + language[key] = lazy(() => parsers[key](language)); + })(key); + } + return language; + } export function seq<A>(a: MonadicParser<A>): MonadicParser<[A]> export function seq<A, B>(a: MonadicParser<A>, b: MonadicParser<B>): MonadicParser<[A, B]> @@ -348,7 +371,7 @@ export namespace MonadicParser { export function fail(expected: string): MonadicParser<any> { return new MonadicParser((input, i) => makeFailure(i, expected)); } - + export function lookahead<A>(x: MonadicParser<A> | string | RegExp): MonadicParser<null> { if (isParser(x)) { return new MonadicParser((input, i) => { @@ -455,6 +478,17 @@ export namespace MonadicParser { export const crlf = string('\r\n'); export const newline = alt(crlf, lf, cr).desc('newline'); export const end = alt(newline, eof); + + export function of(A:any){ + return succeed(A); + } + MonadicParser.createLanguage = createLanguage; + MonadicParser.seq = seq; + MonadicParser.seqMap = seqMap; + MonadicParser.of = succeed; + MonadicParser.regexp = regexp; +// MonadicParser.regexp.lookahead = lookahead; + //MonadicParser.RegExp = regexp; } function seqPick(idx: number, ...parsers: MonadicParser<any>[]): MonadicParser<any> { @@ -550,4 +584,12 @@ function unsafeUnion(xs: string[], ys: string[]) { function isParser(obj: any): obj is MonadicParser<any> { return obj instanceof MonadicParser; -} \ No newline at end of file +} + +function assertFunction(x:any) { + if (typeof x !== "function") { + throw new Error("not a function: " + x); + } +} + +