From 88aa9303d792c514e50496bdcc1557c4886da300 Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Wed, 18 Mar 2020 13:20:08 +0100 Subject: [PATCH] model-server: fixed data_source bug, allow fetching source data over http(s) --- src/mol-util/retry-if.ts | 3 + src/servers/model/config.ts | 32 ++++++- src/servers/model/preprocess/preprocess.ts | 2 +- src/servers/model/server/api-web.ts | 58 +++++++----- src/servers/model/server/api.ts | 2 +- src/servers/model/server/landing.ts | 93 ------------------- src/servers/model/server/structure-wrapper.ts | 75 ++++++++++++--- src/servers/model/utils/fetch-retry.ts | 5 +- 8 files changed, 129 insertions(+), 141 deletions(-) delete mode 100644 src/servers/model/server/landing.ts diff --git a/src/mol-util/retry-if.ts b/src/mol-util/retry-if.ts index c78712dcd..8993e1209 100644 --- a/src/mol-util/retry-if.ts +++ b/src/mol-util/retry-if.ts @@ -7,11 +7,14 @@ export async function retryIf<T>(promiseProvider: () => Promise<T>, params: { retryThenIf?: (result: T) => boolean, retryCatchIf?: (error: any) => boolean, + onRetry?: () => void, retryCount: number }) { let count = 0; while (count <= params.retryCount) { try { + if (count > 0) params.onRetry?.(); + const result = await promiseProvider(); if (params.retryThenIf && params.retryThenIf(result)) { count++; diff --git a/src/servers/model/config.ts b/src/servers/model/config.ts index 68fbb6168..b08c25db3 100644 --- a/src/servers/model/config.ts +++ b/src/servers/model/config.ts @@ -86,17 +86,24 @@ const DefaultModelServerConfig = { defaultSource: 'pdb-cif' as string, /** - * Maps a request identifier to a filename given a 'source' and 'id' variables. + * Maps a request identifier to either: + * - filename [source, mapping] + * - URI [source, mapping, format] * + * Mapping is provided 'source' and 'id' variables to interpolate. + * * /static query uses 'pdb-cif' and 'pdb-bcif' source names. */ sourceMap: [ ['pdb-cif', 'e:/test/quick/${id}_updated.cif'], // ['pdb-bcif', 'e:/test/quick/${id}.bcif'], - ] as [string, string][] + ] as ([string, string] | [string, string, ModelServerFetchFormats])[] }; -export let mapSourceAndIdToFilename: (source: string, id: string) => string = () => { +export const ModelServerFetchFormats = ['cif', 'bcif', 'cif.gz', 'bcif.gz'] as const +export type ModelServerFetchFormats = (typeof ModelServerFetchFormats)[number] + +export let mapSourceAndIdToFilename: (source: string, id: string) => [string, ModelServerFetchFormats] = () => { throw new Error('call setupConfig & validateConfigAndSetupSourceMap to initialize this function'); } @@ -159,6 +166,16 @@ function addServerArgs(parser: argparse.ArgumentParser) { 'The `SOURCE` variable (e.g. `pdb-bcif`) is arbitrary and depends on how you plan to use the server.' ].join('\n'), }); + parser.addArgument([ '--sourceMapUrl' ], { + nargs: 3, + action: 'append', + metavar: ['SOURCE', 'PATH', 'SOURCE_MAP_FORMAT'] as any, + help: [ + 'Same as --sourceMap but for URL. --sourceMap src url format', + 'Example: pdb-cif "https://www.ebi.ac.uk/pdbe/entry-files/download/${id}_updated.cif" cif', + 'Format is either cif or bcif' + ].join('\n'), + }); } export type ModelServerConfig = typeof DefaultModelServerConfig @@ -170,7 +187,7 @@ export const ModelServerConfigTemplate: ModelServerConfig = { sourceMap: [ ['pdb-bcif', './path-to-binary-cif/${id.substr(1, 2)}/${id}.bcif'], ['pdb-cif', './path-to-text-cif/${id.substr(1, 2)}/${id}.cif'], - ['pdb-updated', './path-to-updated-cif/${id}.bcif'] + ['pdb-updated', 'https://www.ebi.ac.uk/pdbe/entry-files/download/${id}_updated.cif', 'cif'] ] as [string, string][] } @@ -199,6 +216,11 @@ function setConfig(config: ModelServerConfig) { for (const k of ObjectKeys(ModelServerConfig)) { if (config[k] !== void 0) (ModelServerConfig as any)[k] = config[k]; } + + if ((config as any).sourceMapUrl) { + if (!ModelServerConfig.sourceMap) ModelServerConfig.sourceMap = []; + ModelServerConfig.sourceMap.push(...(config as any).sourceMapUrl); + } } function validateConfigAndSetupSourceMap() { @@ -208,7 +230,7 @@ function validateConfigAndSetupSourceMap() { mapSourceAndIdToFilename = new Function('source', 'id', [ 'switch (source.toLowerCase()) {', - ...ModelServerConfig.sourceMap.map(([source, path]) => `case '${source.toLowerCase()}': return \`${path}\`;`), + ...ModelServerConfig.sourceMap.map(([source, path, format]) => `case '${source.toLowerCase()}': return [\`${path}\`, '${format}'];`), '}', ].join('\n')) as any; } diff --git a/src/servers/model/preprocess/preprocess.ts b/src/servers/model/preprocess/preprocess.ts index 397674826..2b45f8c60 100644 --- a/src/servers/model/preprocess/preprocess.ts +++ b/src/servers/model/preprocess/preprocess.ts @@ -22,7 +22,7 @@ export function preprocessFile(filename: string, propertyProvider?: ModelPropert } async function preprocess(filename: string, propertyProvider?: ModelPropertiesProvider, outputCif?: string, outputBcif?: string) { - const input = await readStructureWrapper('entry', '_local_', filename, propertyProvider); + const input = await readStructureWrapper('entry', '_local_', filename, void 0, propertyProvider); const categories = await classifyCif(input.cifFrame); const inputStructures = (await resolveStructures(input))!; const exportCtx = CifExportContext.create(inputStructures); diff --git a/src/servers/model/server/api-web.ts b/src/servers/model/server/api-web.ts index 8ad9a70c2..64f279291 100644 --- a/src/servers/model/server/api-web.ts +++ b/src/servers/model/server/api-web.ts @@ -119,36 +119,46 @@ function mapQuery(app: express.Express, queryName: string, queryDefinition: Quer }); } -export function initWebApi(app: express.Express) { - app.use(bodyParser.json({ limit: '1mb' })); - - app.get(makePath('static/:format/:id'), async (req, res) => { - const binary = req.params.format === 'bcif'; - const id = req.params.id; - const fn = mapSourceAndIdToFilename(binary ? 'pdb-bcif' : 'pdb-cif', id); - if (!fn || !fs.existsSync(fn)) { +function serveStatic(req: express.Request, res: express.Response) { + const source = req.params.source === 'bcif' + ? 'pdb-bcif' + : req.params.source === 'cif' + ? 'pdb-cif' + : req.params.source; + + const id = req.params.id; + const [fn, format] = mapSourceAndIdToFilename(source, id); + const binary = format === 'bcif' || fn.indexOf('.bcif') > 0; + + if (!fn || !fs.existsSync(fn)) { + res.status(404); + res.end(); + return; + } + fs.readFile(fn, (err, data) => { + if (err) { res.status(404); res.end(); return; } - fs.readFile(fn, (err, data) => { - if (err) { - res.status(404); - res.end(); - return; - } - const f = path.parse(fn); - res.writeHead(200, { - 'Content-Type': binary ? 'application/octet-stream' : 'text/plain; charset=utf-8', - 'Access-Control-Allow-Origin': '*', - 'Access-Control-Allow-Headers': 'X-Requested-With', - 'Content-Disposition': `inline; filename="${f.name}${f.ext}"` - }); - res.write(data); - res.end(); + const f = path.parse(fn); + res.writeHead(200, { + 'Content-Type': binary ? 'application/octet-stream' : 'text/plain; charset=utf-8', + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': 'X-Requested-With', + 'Content-Disposition': `inline; filename="${f.name}${f.ext}"` }); - }) + res.write(data); + res.end(); + }); +} + +export function initWebApi(app: express.Express) { + app.use(bodyParser.json({ limit: '1mb' })); + + app.get(makePath('static/:source/:id'), (req, res) => serveStatic(req, res)); + app.get(makePath('v1/static/:source/:id'), (req, res) => serveStatic(req, res)); // app.get(makePath('v1/json'), (req, res) => { // const query = /\?(.*)$/.exec(req.url)![1]; diff --git a/src/servers/model/server/api.ts b/src/servers/model/server/api.ts index cc6a53dda..e0c6652e2 100644 --- a/src/servers/model/server/api.ts +++ b/src/servers/model/server/api.ts @@ -44,7 +44,7 @@ export interface QueryDefinition<Params = any> { export const CommonQueryParamsInfo: QueryParamInfo[] = [ { name: 'model_nums', type: QueryParamType.String, description: `A comma-separated list of model ids (i.e. 1,2). If set, only include atoms with the corresponding '_atom_site.pdbx_PDB_model_num' field.` }, { name: 'encoding', type: QueryParamType.String, defaultValue: 'cif', description: `Determines the output encoding (text based 'CIF' or binary 'BCIF').`, supportedValues: ['cif', 'bcif'] }, - { name: 'data_Source', type: QueryParamType.String, defaultValue: '', description: 'Allows to control how the provided data source ID maps to input file (as specified by the server instance config).' } + { name: 'data_source', type: QueryParamType.String, defaultValue: '', description: 'Allows to control how the provided data source ID maps to input file (as specified by the server instance config).' } ]; export interface CommonQueryParamsInfo { diff --git a/src/servers/model/server/landing.ts b/src/servers/model/server/landing.ts deleted file mode 100644 index a0bc1866d..000000000 --- a/src/servers/model/server/landing.ts +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. - * - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import Version from '../version' - -const examples = [{ - name: 'Atoms', - params: { - id: '1cbs', - name: 'atoms', - params: { atom_site: { label_comp_id: 'ALA' } } - } -}, { - name: 'Residue Interaction', - params: { - id: '1cbs', - name: 'residueInteraction', - params: { - radius: 5, - atom_site: { 'label_comp_id': 'REA' } - } - } -}, { - name: 'Full', - params: { - id: '1tqn', - name: 'full' - } -}, { - name: 'Full (binary)', - params: { - id: '1tqn', - name: 'full', - binary: true - } -}, { - name: 'Full (specific models)', - params: { - id: '1grm', - name: 'full', - modelNums: [ 2, 3 ] - } -}]; - -function create() { - return `<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8" /> - <meta name="viewport" content="width=device-width, user-scalable=no, minimum-scale=1.0, maximum-scale=1.0"> - <title>Mol* ModelServer ${Version}</title> - <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/skeleton/2.0.4/skeleton.min.css" /> - </head> - <body> - <h1>Mol* Model Server ${Version}</h1> - <select id='example'> - <option value='-1'>Select example...</option> - ${examples.map((e, i) => `<option value=${i}>${e.name}</option>`)} - </select> - <br/> - <textarea style="height: 280px; width: 600px; font-family: monospace" id="query-text"></textarea><br> - <button class="button button-primary" style="width: 600px" id="query">Query</button> - <div id='error' style='color: red; font-weight: blue'></div> - <div>Static input files available as CIF and BinaryCIF at <a href='/ModelServer/static/cif/1cbs' target='_blank'>static/cif/id</a> and <a href='/ModelServer/static/bcif/1cbs' target='_blank'>static/bcif/id</a> respectively.</div> - <script> - var Examples = ${JSON.stringify(examples)}; - var err = document.getElementById('error'); - var exampleEl = document.getElementById('example'), queryTextEl = document.getElementById('query-text'); - exampleEl.onchange = function () { - var i = +exampleEl.value; - if (i < 0) return; - queryTextEl.value = JSON.stringify(Examples[i].params, null, 2); - }; - document.getElementById('query').onclick = function () { - err.innerText = ''; - try { - var q = JSON.parse(queryTextEl.value); - var path = '/ModelServer/api/v1?' + encodeURIComponent(JSON.stringify(q)); - console.log(path); - window.open(path, '_blank'); - } catch (e) { - err.innerText = '' + e; - } - }; - </script> - </body> -</html>`; -} - -export const LandingPage = create(); \ No newline at end of file diff --git a/src/servers/model/server/structure-wrapper.ts b/src/servers/model/server/structure-wrapper.ts index b9c6a4251..e3bdfb9fb 100644 --- a/src/servers/model/server/structure-wrapper.ts +++ b/src/servers/model/server/structure-wrapper.ts @@ -7,7 +7,7 @@ import { Structure, Model } from '../../../mol-model/structure'; import { PerformanceMonitor } from '../../../mol-util/performance-monitor'; import { Cache } from './cache'; -import { ModelServerConfig as Config, mapSourceAndIdToFilename } from '../config'; +import { ModelServerConfig as Config, mapSourceAndIdToFilename, ModelServerFetchFormats } from '../config'; import { CIF, CifFrame, CifBlock } from '../../../mol-io/reader/cif' import * as util from 'util' import * as fs from 'fs' @@ -16,6 +16,7 @@ import { Job } from './jobs'; import { ConsoleLogger } from '../../../mol-util/console-logger'; import { ModelPropertiesProvider } from '../property-provider'; import { trajectoryFromMmCIF } from '../../../mol-model-formats/structure/mmcif'; +import { fetchRetry } from '../utils/fetch-retry'; require('util.promisify').shim(); @@ -53,7 +54,7 @@ export async function createStructureWrapperFromJob(job: Job, propertyProvider: const ret = StructureCache.get(job.key); if (ret) return ret; } - const ret = await readStructureWrapper(job.key, job.sourceId, job.entryId, propertyProvider); + const ret = await readStructureWrapper(job.key, job.sourceId, job.entryId, job.id, propertyProvider); if (allowCache && Config.cacheMaxSizeInBytes > 0) { StructureCache.add(ret); } @@ -73,13 +74,13 @@ async function readFile(filename: string) { if (isGz) input = await unzipAsync(input); const data = new Uint8Array(input.byteLength); for (let i = 0; i < input.byteLength; i++) data[i] = input[i]; - return data; + return { data, isBinary: true }; } else { if (isGz) { const data = await unzipAsync(await readFileAsync(filename)); - return data.toString('utf8'); + return { data: data.toString('utf8'), isBinary: false }; } - return readFileAsync(filename, 'utf8'); + return { data: await readFileAsync(filename, 'utf8'), isBinary: false }; } } @@ -90,11 +91,13 @@ async function parseCif(data: string|Uint8Array) { return parsed.result; } -export async function readDataAndFrame(filename: string, key?: string): Promise<{ data: string | Uint8Array, frame: CifBlock }> { +export async function readDataAndFrame(filename: string, key?: string): Promise<{ data: string | Uint8Array, frame: CifBlock, isBinary: boolean }> { perf.start('read'); - let data; + let data, isBinary; try { - data = await readFile(filename); + const read = await readFile(filename); + data = read.data; + isBinary = read.isBinary; } catch (e) { ConsoleLogger.error(key || filename, '' + e); throw new Error(`Could not read the file for '${key || filename}' from disk.`); @@ -105,15 +108,57 @@ export async function readDataAndFrame(filename: string, key?: string): Promise< const frame = (await parseCif(data)).blocks[0]; perf.end('parse'); - return { data, frame }; + return { data, frame, isBinary }; +} + +async function fetchDataAndFrame(jobId: string, uri: string, format: ModelServerFetchFormats, key?: string): Promise<{ data: string | Uint8Array, frame: CifBlock, isBinary: boolean }> { + perf.start('read'); + const isBinary = format.startsWith('bcif'); + let data; + try { + ConsoleLogger.logId(jobId, 'Fetch', `${uri}`); + const response = await fetchRetry(uri, 500, 3, () => ConsoleLogger.logId(jobId, 'Fetch', `Retrying to fetch '${uri}'`)); + + if (format.endsWith('.gz')) { + const input = await unzipAsync(await response.arrayBuffer()); + + if (isBinary) { + data = new Uint8Array(input.byteLength); + for (let i = 0; i < input.byteLength; i++) data[i] = input[i]; + } else { + data = input.toString('utf8'); + } + } else { + data = isBinary ? new Uint8Array(await response.arrayBuffer()) : await response.text(); + } + } catch (e) { + ConsoleLogger.error(key || uri, '' + e); + throw new Error(`Could not fetch the file for '${key || uri}'.`); + } + + perf.end('read'); + perf.start('parse'); + const frame = (await parseCif(data)).blocks[0]; + perf.end('parse'); + + return { data, frame, isBinary }; } -export async function readStructureWrapper(key: string, sourceId: string | '_local_', entryId: string, propertyProvider: ModelPropertiesProvider | undefined) { - const filename = sourceId === '_local_' ? entryId : mapSourceAndIdToFilename(sourceId, entryId); - if (!filename) throw new Error(`Cound not map '${key}' to a valid filename.`); - if (!fs.existsSync(filename)) throw new Error(`Could not find source file for '${key}'.`); +function readOrFetch(jobId: string, key: string, sourceId: string | '_local_', entryId: string) { + const mapped = sourceId === '_local_' ? [entryId] as const : mapSourceAndIdToFilename(sourceId, entryId); + if (!mapped) throw new Error(`Cound not map '${key}' for a resource.`); + + const uri = mapped[0].toLowerCase(); + if (uri.startsWith('http://') || uri.startsWith('https://') || uri.startsWith('ftp://')) { + return fetchDataAndFrame(jobId, mapped[0], (mapped[1] || 'cif').toLowerCase() as any, key); + } + + if (!fs.existsSync(mapped[0])) throw new Error(`Could not find source file for '${key}'.`); + return readDataAndFrame(mapped[0], key); +} - const { data, frame } = await readDataAndFrame(filename, key); +export async function readStructureWrapper(key: string, sourceId: string | '_local_', entryId: string, jobId: string | undefined, propertyProvider: ModelPropertiesProvider | undefined) { + const { data, frame, isBinary } = await readOrFetch(jobId || '', key, sourceId, entryId); perf.start('createModel'); const models = await trajectoryFromMmCIF(frame).run(); perf.end('createModel'); @@ -133,7 +178,7 @@ export async function readStructureWrapper(key: string, sourceId: string | '_loc sourceId, entryId }, - isBinary: /\.bcif/.test(filename), + isBinary, key, approximateSize: typeof data === 'string' ? 2 * data.length : data.length, models, diff --git a/src/servers/model/utils/fetch-retry.ts b/src/servers/model/utils/fetch-retry.ts index 38cad5f9b..4ce146bb6 100644 --- a/src/servers/model/utils/fetch-retry.ts +++ b/src/servers/model/utils/fetch-retry.ts @@ -16,11 +16,12 @@ function isRetriableNetworkError(error: any) { return error && RETRIABLE_NETWORK_ERRORS.includes(error.code); } -export async function fetchRetry(url: string, timeout: number, retryCount: number): Promise<Response> { +export async function fetchRetry(url: string, timeout: number, retryCount: number, onRetry?: () => void): Promise<Response> { const result = await retryIf(() => fetch(url, { timeout }), { - retryThenIf: r => r.status >= 500 && r.status < 600, + retryThenIf: r => r.status === 408 /** timeout */ || r.status === 429 /** too mant requests */ || (r.status >= 500 && r.status < 600), // TODO test retryCatchIf retryCatchIf: e => isRetriableNetworkError(e), + onRetry, retryCount }); -- GitLab