From aa3a42f94e0b3148ef391e05129d11a7fa2525b6 Mon Sep 17 00:00:00 2001 From: Alexander Rose <alex.rose@rcsb.org> Date: Fri, 20 Sep 2019 17:13:13 -0700 Subject: [PATCH] added simple xml parser --- src/mol-util/data-source.ts | 17 +++-- src/mol-util/xml-parser.ts | 134 ++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 src/mol-util/xml-parser.ts diff --git a/src/mol-util/data-source.ts b/src/mol-util/data-source.ts index 99c54d8d6..42de77608 100644 --- a/src/mol-util/data-source.ts +++ b/src/mol-util/data-source.ts @@ -9,6 +9,7 @@ import { Task, RuntimeContext } from '../mol-task'; import { utf8Read } from '../mol-io/common/utf8'; +import { parseXml } from './xml-parser'; // polyfill XMLHttpRequest in node.js const XHR = typeof document === 'undefined' ? require('xhr2') as { prototype: XMLHttpRequest; @@ -25,7 +26,7 @@ const XHR = typeof document === 'undefined' ? require('xhr2') as { // Gzip // } -export interface AjaxGetParams<T extends 'string' | 'binary' | 'json' = 'string'> { +export interface AjaxGetParams<T extends 'string' | 'binary' | 'json' | 'xml' = 'string'> { url: string, type?: T, title?: string, @@ -49,10 +50,10 @@ export function readFromFile(file: File, type: 'string' | 'binary') { export function ajaxGet(url: string): Task<string> export function ajaxGet(params: AjaxGetParams<'string'>): Task<string> export function ajaxGet(params: AjaxGetParams<'binary'>): Task<Uint8Array> -export function ajaxGet<T = any>(params: AjaxGetParams<'json'>): Task<T> +export function ajaxGet<T = any>(params: AjaxGetParams<'json' | 'xml'>): Task<T> export function ajaxGet(params: AjaxGetParams<'string' | 'binary'>): Task<string | Uint8Array> -export function ajaxGet(params: AjaxGetParams<'string' | 'binary' | 'json'>): Task<string | Uint8Array | object> -export function ajaxGet(params: AjaxGetParams<'string' | 'binary' | 'json'> | string) { +export function ajaxGet(params: AjaxGetParams<'string' | 'binary' | 'json' | 'xml'>): Task<string | Uint8Array | object> +export function ajaxGet(params: AjaxGetParams<'string' | 'binary' | 'json' | 'xml'> | string) { if (typeof params === 'string') return ajaxGetInternal(params, params, 'string', false); return ajaxGetInternal(params.title, params.url, params.type || 'string', false /* params.compression === DataCompressionMethod.Gzip */, params.body); } @@ -174,7 +175,7 @@ async function processAjax(ctx: RuntimeContext, asUint8Array: boolean, decompres } } -function ajaxGetInternal(title: string | undefined, url: string, type: 'json' | 'string' | 'binary', decompressGzip: boolean, body?: string): Task<string | Uint8Array> { +function ajaxGetInternal(title: string | undefined, url: string, type: 'json' | 'xml' | 'string' | 'binary', decompressGzip: boolean, body?: string): Task<string | Uint8Array> { let xhttp: XMLHttpRequest | undefined = void 0; return Task.create(title ? title : 'Download', async ctx => { try { @@ -195,8 +196,10 @@ function ajaxGetInternal(title: string | undefined, url: string, type: 'json' | if (type === 'json') { ctx.update({ message: 'Parsing JSON...', canAbort: false }); - const data = JSON.parse(result); - return data; + return JSON.parse(result); + } else if (type === 'xml') { + ctx.update({ message: 'Parsing XML...', canAbort: false }); + return parseXml(result); } return result; diff --git a/src/mol-util/xml-parser.ts b/src/mol-util/xml-parser.ts new file mode 100644 index 000000000..841296298 --- /dev/null +++ b/src/mol-util/xml-parser.ts @@ -0,0 +1,134 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +export type XMLNodeAttributes = { [k: string]: any } +export interface XMLNode { + name?: string + content?: string + attributes: XMLNodeAttributes + children?: XMLNode[] +} +export interface XMLDocument { + declaration?: XMLNode, + root?: XMLNode +} + +export function getXMLNodeByName(name: string, children: XMLNode[]) { + for (let i = 0, il = children.length; i < il; ++i) { + if (children[i].name === name) return children[i] + } +} + +const reStrip = /^['"]|['"]$/g +const reTag = /^<([\w-:.]+)\s*/ +const reContent = /^([^<]*)/ +const reAttr = /([\w:-]+)\s*=\s*("[^"]*"|'[^']*'|\w+)\s*/ + +function strip (val: string) { + return val.replace(reStrip, '') +} + +/** + * Simple XML parser + * adapted from https://github.com/segmentio/xml-parser (MIT license) + */ +export function parseXml (xml: string): XMLDocument { + // trim and strip comments + xml = xml.trim().replace(/<!--[\s\S]*?-->/g, '') + + return document() + + function document () { + return { + declaration: declaration(), + root: tag() + } + } + + function declaration () { + const m = match(/^<\?xml\s*/) + if (!m) return + + // tag + const node: XMLNode = { + attributes: {} + } + + // attributes + while (!(eos() || is('?>'))) { + const attr = attribute() + if (!attr) return node + node.attributes[attr.name] = attr.value + } + match(/\?>\s*/) + return node + } + + function tag () { + const m = match(reTag) + if (!m) return + + // name + const node: XMLNode = { + name: m[1], + attributes: {}, + children: [] + } + + // attributes + while (!(eos() || is('>') || is('?>') || is('/>'))) { + const attr = attribute() + if (!attr) return node + node.attributes[attr.name] = attr.value + } + + // self closing tag + if (match(/^\s*\/>\s*/)) { + return node + } + match(/\??>\s*/) + + // content + node.content = content() + + // children + let child + while ((child = tag())) { + node.children!.push(child) + } + + // closing + match(/^<\/[\w-:.]+>\s*/) + return node + } + + function content () { + const m = match(reContent) + if (m) return m[1] + return '' + } + + function attribute () { + const m = match(reAttr) + if (!m) return + return { name: m[1], value: strip(m[2]) } + } + + function match (re: RegExp) { + const m = xml.match(re) + if (!m) return + xml = xml.slice(m[0].length) + return m + } + + function eos () { + return xml.length === 0 + } + + function is (prefix: string) { + return xml.indexOf(prefix) === 0 + } +} \ No newline at end of file -- GitLab