diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index 4d267a2a0050daa38c685e578fdac500281d4ecd..fa90cab9466f3d9412a25ecfe129a3be84b86f5b 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -117,7 +117,7 @@ export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Sch let floatCount = 0, hasString = false; for (let i = 0, _i = field.rowCount; i < _i; i++) { const k = field.valueKind(i); - if (k !== Column.ValueKind.Present) continue + if (k !== Column.ValueKind.Present) continue; const type = getNumberType(field.str(i)); if (type === NumberType.Int) continue; else if (type === NumberType.Float) floatCount++; diff --git a/src/mol-io/reader/common/text/number-parser.ts b/src/mol-io/reader/common/text/number-parser.ts index 8fbde1cb424035502579f6261089ba83c3855bcf..97320495e143b18f4ee56b192eec3dc5c6453919 100644 --- a/src/mol-io/reader/common/text/number-parser.ts +++ b/src/mol-io/reader/common/text/number-parser.ts @@ -128,5 +128,5 @@ export function getNumberType(str: string): NumberType { } else break; } - return NumberType.Int; + return start === end ? NumberType.Int : NumberType.NaN; } diff --git a/src/mol-model/structure/export/mmcif.ts b/src/mol-model/structure/export/mmcif.ts index 428bcbee3c8103ccb0c57b8736675ec2e8bcc3cc..dd9c71b1f3b64573eacbb03d828bc96f812b62b9 100644 --- a/src/mol-model/structure/export/mmcif.ts +++ b/src/mol-model/structure/export/mmcif.ts @@ -20,6 +20,12 @@ export interface CifExportContext { cache: any } +export namespace CifExportContext { + export function create(structure: Structure, model: Model): CifExportContext { + return { structure, model, cache: Object.create(null) }; + } +} + function copy_mmCif_category(name: keyof mmCIF_Schema): CifCategory<CifExportContext> { return { name, @@ -87,14 +93,17 @@ export const mmCIF_Export_Filters = { } /** Doesn't start a data block */ -export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure) { +export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure, params?: { skipCategoryNames?: Set<string>, exportCtx?: CifExportContext }) { const models = structure.models; if (models.length !== 1) throw 'Can\'t export stucture composed from multiple models.'; const model = models[0]; - const ctx: CifExportContext[] = [{ structure, model, cache: Object.create(null) }]; + const _params = params || { }; + + const ctx: CifExportContext[] = [_params.exportCtx ? _params.exportCtx : CifExportContext.create(structure, model)]; for (const cat of Categories) { + if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue; encoder.writeCategory(cat, ctx); } for (const customProp of model.customProperties.all) { @@ -103,6 +112,7 @@ export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: S const prefix = customProp.cifExport.prefix; const cats = customProp.cifExport.categories; for (const cat of cats) { + if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue; if (cat.name.indexOf(prefix) !== 0) throw new Error(`Custom category '${cat.name}' name must start with prefix '${prefix}.'`); encoder.writeCategory(cat, ctx); } diff --git a/src/servers/model/preprocess.ts b/src/servers/model/preprocess.ts new file mode 100644 index 0000000000000000000000000000000000000000..c61288185032aebc8351e243f5c69f922d459590 --- /dev/null +++ b/src/servers/model/preprocess.ts @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import * as argparse from 'argparse' +import { preprocessFile } from './preprocess/preprocess'; + +const cmdParser = new argparse.ArgumentParser({ + addHelp: true, + description: 'Preprocess CIF files to include custom properties and convert them to BinaryCIF format.' +}); +cmdParser.addArgument(['--input', '-i'], { help: 'Input filename', required: true }); +cmdParser.addArgument(['--outCIF', '-oc'], { help: 'Output CIF filename', required: false }); +cmdParser.addArgument(['--outBCIF', '-ob'], { help: 'Output BinaryCIF filename', required: false }); + +// TODO: "bulk" mode + +interface CmdArgs { + input: string, + outCIF?: string, + outBCIF?: string +} + +const cmdArgs = cmdParser.parseArgs() as CmdArgs; + +if (cmdArgs.input) preprocessFile(cmdArgs.input, cmdArgs.outCIF, cmdArgs.outBCIF); + +// example: +// node build\node_modules\servers\model\preprocess -i e:\test\Quick\1cbs_updated.cif -oc e:\test\mol-star\model\1cbs.cif -ob e:\test\mol-star\model\1cbs.bcif \ No newline at end of file diff --git a/src/servers/model/preprocess/converter.ts b/src/servers/model/preprocess/converter.ts new file mode 100644 index 0000000000000000000000000000000000000000..bad7f7d865f61d04ff2069c0525de30f79716a32 --- /dev/null +++ b/src/servers/model/preprocess/converter.ts @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { CifCategory, CifField, CifFrame, getCifFieldType } from 'mol-io/reader/cif'; +import { CifWriter } from 'mol-io/writer/cif'; +import { Task } from 'mol-task'; +import { showProgress } from './util'; + +function getCategoryInstanceProvider(cat: CifCategory, fields: CifWriter.Field[]): CifWriter.Category { + return { + name: cat.name, + instance: () => ({ data: cat, fields, rowCount: cat.rowCount }) + }; +} + +function classify(name: string, field: CifField): CifWriter.Field { + const type = getCifFieldType(field); + if (type['@type'] === 'str') { + return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind }; + } else if (type['@type'] === 'float') { + return CifWriter.Field.float(name, field.float, { valueKind: field.valueKind, typedArray: Float64Array }); + } else { + return CifWriter.Field.int(name, field.int, { valueKind: field.valueKind, typedArray: Int32Array }); + } +} + +export function classifyCif(frame: CifFrame) { + return Task.create('Classify CIF Data', async ctx => { + let maxProgress = 0; + for (const c of frame.categoryNames) maxProgress += frame.categories[c].fieldNames.length; + + const ret: CifWriter.Category[] = []; + + let current = 0; + for (const c of frame.categoryNames) { + const cat = frame.categories[c]; + const fields: CifWriter.Field[] = []; + for (const f of cat.fieldNames) { + const cifField = classify(f, cat.getField(f)!); + fields.push(cifField); + current++; + if (ctx.shouldUpdate) await ctx.update({ message: 'Classifying...', current, max: maxProgress }); + } + ret.push(getCategoryInstanceProvider(cat, fields)); + } + return ret; + }).run(showProgress, 250); +} \ No newline at end of file diff --git a/src/servers/model/preprocess/preprocess.ts b/src/servers/model/preprocess/preprocess.ts new file mode 100644 index 0000000000000000000000000000000000000000..394fe205ac81e4a8e2bbb908531dcf1e08c58a43 --- /dev/null +++ b/src/servers/model/preprocess/preprocess.ts @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { readStructure } from '../server/structure-wrapper'; +import { classifyCif } from './converter'; +import { ConsoleLogger } from 'mol-util/console-logger'; +import { Structure } from 'mol-model/structure'; +import { CifWriter } from 'mol-io/writer/cif'; +import Writer from 'mol-io/writer/writer'; +import { wrapFileToWriter } from '../server/api-local'; +import { Task } from 'mol-task'; +import { showProgress, clearLine } from './util'; +import { encode_mmCIF_categories, CifExportContext } from 'mol-model/structure/export/mmcif'; + +// TODO: error handling, bulk mode + +export async function preprocessFile(filename: string, outputCif?: string, outputBcif?: string) { + ConsoleLogger.log('ModelServer', `Reading ${filename}...`); + const input = await readStructure('entry', '_local_', filename); + ConsoleLogger.log('ModelServer', `Classifying CIF categories...`); + const categories = await classifyCif(input.cifFrame); + clearLine(); + + const exportCtx = CifExportContext.create(input.structure, input.structure.models[0]); + + if (outputCif) { + ConsoleLogger.log('ModelServer', `Encoding CIF...`); + const writer = wrapFileToWriter(outputCif); + const encoder = CifWriter.createEncoder({ binary: false }); + await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer); + clearLine(); + writer.end(); + } + + if (outputBcif) { + ConsoleLogger.log('ModelServer', `Encoding BinaryCIF...`); + const writer = wrapFileToWriter(outputBcif); + const encoder = CifWriter.createEncoder({ binary: true, binaryAutoClassifyEncoding: true }); + await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer); + clearLine(); + writer.end(); + } + ConsoleLogger.log('ModelServer', `Done.`); +} + +function encode(structure: Structure, header: string, categories: CifWriter.Category[], encoder: CifWriter.Encoder, exportCtx: CifExportContext, writer: Writer) { + return Task.create('Encode', async ctx => { + const skipCategoryNames = new Set<string>(categories.map(c => c.name)); + encoder.startDataBlock(header); + let current = 0; + for (const cat of categories){ + encoder.writeCategory(cat); + current++; + if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: categories.length }); + } + encode_mmCIF_categories(encoder, structure, { skipCategoryNames, exportCtx }); + encoder.encode(); + encoder.writeTo(writer); + }).run(showProgress, 250); +} \ No newline at end of file diff --git a/src/servers/model/preprocess/util.ts b/src/servers/model/preprocess/util.ts new file mode 100644 index 0000000000000000000000000000000000000000..0cb595c682920e7b3c150ba9382608092806754f --- /dev/null +++ b/src/servers/model/preprocess/util.ts @@ -0,0 +1,17 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { Progress } from 'mol-task'; + +export function showProgress(p: Progress) { + process.stdout.write(`\r${new Array(80).join(' ')}`); + process.stdout.write(`\r${Progress.format(p)}`); +} + +export function clearLine() { + process.stdout.write(`\r${new Array(80).join(' ')}`); + process.stdout.write(`\r`); +} \ No newline at end of file diff --git a/src/servers/model/server/api-local.ts b/src/servers/model/server/api-local.ts index ec7ceec0389d4415bf0905bf7134718d11b744e8..be895be820fb3e555e2ce36e9c84364c663403d8 100644 --- a/src/servers/model/server/api-local.ts +++ b/src/servers/model/server/api-local.ts @@ -39,7 +39,7 @@ export async function runLocal(input: LocalInput) { while (job) { try { const encoder = await resolveJob(job); - const writer = wrapFile(job.outputFilename!); + const writer = wrapFileToWriter(job.outputFilename!); encoder.writeTo(writer); writer.end(); ConsoleLogger.logId(job.id, 'Query', 'Written.'); @@ -61,7 +61,7 @@ export async function runLocal(input: LocalInput) { StructureCache.expireAll(); } -function wrapFile(fn: string) { +export function wrapFileToWriter(fn: string) { const w = { open(this: any) { if (this.opened) return; @@ -71,7 +71,7 @@ function wrapFile(fn: string) { }, writeBinary(this: any, data: Uint8Array) { this.open(); - fs.writeSync(this.file, new Buffer(data)); + fs.writeSync(this.file, new Buffer(data.buffer)); return true; }, writeString(this: any, data: string) { diff --git a/src/servers/model/server/structure-wrapper.ts b/src/servers/model/server/structure-wrapper.ts index 8774e578cecb8556f560c7134b5510f0e04ff064..073c737e3bb669f9e44e47c74b89ce01995b3022 100644 --- a/src/servers/model/server/structure-wrapper.ts +++ b/src/servers/model/server/structure-wrapper.ts @@ -8,7 +8,7 @@ import { Structure, Model, Format } from 'mol-model/structure'; import { PerformanceMonitor } from 'mol-util/performance-monitor'; import { Cache } from './cache'; import Config from '../config'; -import CIF from 'mol-io/reader/cif' +import CIF, { CifFrame } from 'mol-io/reader/cif' import * as util from 'util' import * as fs from 'fs' import * as zlib from 'zlib' @@ -34,21 +34,22 @@ export interface StructureInfo { entryId: string } -export class StructureWrapper { - info: StructureInfo; +export interface StructureWrapper { + info: StructureInfo, - key: string; - approximateSize: number; - structure: Structure; + key: string, + approximateSize: number, + structure: Structure, + cifFrame: CifFrame } -export async function getStructure(job: Job): Promise<StructureWrapper> { - if (Config.cacheParams.useCache) { +export async function getStructure(job: Job, allowCache = true): Promise<StructureWrapper> { + if (allowCache && Config.cacheParams.useCache) { const ret = StructureCache.get(job.key); if (ret) return ret; } const ret = await readStructure(job.key, job.sourceId, job.entryId); - if (Config.cacheParams.useCache) { + if (allowCache && Config.cacheParams.useCache) { StructureCache.add(ret); } return ret; @@ -84,7 +85,7 @@ async function parseCif(data: string|Uint8Array) { return parsed.result; } -async function readStructure(key: string, sourceId: string, entryId: string) { +export async function readStructure(key: string, sourceId: string | '_local_', entryId: string) { const filename = sourceId === '_local_' ? entryId : Config.mapFile(sourceId, entryId); if (!filename) throw new Error(`Cound not map '${key}' to a valid filename.`); if (!fs.existsSync(filename)) throw new Error(`Could not find source file for '${key}'.`); @@ -127,7 +128,8 @@ async function readStructure(key: string, sourceId: string, entryId: string) { }, key, approximateSize: typeof data === 'string' ? 2 * data.length : data.length, - structure + structure, + cifFrame: frame }; return ret;