Skip to content
Snippets Groups Projects
Commit 20ee4149 authored by David Sehnal's avatar David Sehnal
Browse files

Basic model-server preprocessor to create "updated" (Binary)CIF

parent e878e285
No related branches found
No related tags found
No related merge requests found
......@@ -117,7 +117,7 @@ export function getCifFieldType(field: CifField): Column.Schema.Int | Column.Sch
let floatCount = 0, hasString = false;
for (let i = 0, _i = field.rowCount; i < _i; i++) {
const k = field.valueKind(i);
if (k !== Column.ValueKind.Present) continue
if (k !== Column.ValueKind.Present) continue;
const type = getNumberType(field.str(i));
if (type === NumberType.Int) continue;
else if (type === NumberType.Float) floatCount++;
......
......@@ -128,5 +128,5 @@ export function getNumberType(str: string): NumberType {
}
else break;
}
return NumberType.Int;
return start === end ? NumberType.Int : NumberType.NaN;
}
......@@ -20,6 +20,12 @@ export interface CifExportContext {
cache: any
}
export namespace CifExportContext {
export function create(structure: Structure, model: Model): CifExportContext {
return { structure, model, cache: Object.create(null) };
}
}
function copy_mmCif_category(name: keyof mmCIF_Schema): CifCategory<CifExportContext> {
return {
name,
......@@ -87,14 +93,17 @@ export const mmCIF_Export_Filters = {
}
/** Doesn't start a data block */
export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure) {
export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: Structure, params?: { skipCategoryNames?: Set<string>, exportCtx?: CifExportContext }) {
const models = structure.models;
if (models.length !== 1) throw 'Can\'t export stucture composed from multiple models.';
const model = models[0];
const ctx: CifExportContext[] = [{ structure, model, cache: Object.create(null) }];
const _params = params || { };
const ctx: CifExportContext[] = [_params.exportCtx ? _params.exportCtx : CifExportContext.create(structure, model)];
for (const cat of Categories) {
if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue;
encoder.writeCategory(cat, ctx);
}
for (const customProp of model.customProperties.all) {
......@@ -103,6 +112,7 @@ export function encode_mmCIF_categories(encoder: CifWriter.Encoder, structure: S
const prefix = customProp.cifExport.prefix;
const cats = customProp.cifExport.categories;
for (const cat of cats) {
if (_params.skipCategoryNames && _params.skipCategoryNames.has(cat.name)) continue;
if (cat.name.indexOf(prefix) !== 0) throw new Error(`Custom category '${cat.name}' name must start with prefix '${prefix}.'`);
encoder.writeCategory(cat, ctx);
}
......
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as argparse from 'argparse'
import { preprocessFile } from './preprocess/preprocess';
const cmdParser = new argparse.ArgumentParser({
addHelp: true,
description: 'Preprocess CIF files to include custom properties and convert them to BinaryCIF format.'
});
cmdParser.addArgument(['--input', '-i'], { help: 'Input filename', required: true });
cmdParser.addArgument(['--outCIF', '-oc'], { help: 'Output CIF filename', required: false });
cmdParser.addArgument(['--outBCIF', '-ob'], { help: 'Output BinaryCIF filename', required: false });
// TODO: "bulk" mode
interface CmdArgs {
input: string,
outCIF?: string,
outBCIF?: string
}
const cmdArgs = cmdParser.parseArgs() as CmdArgs;
if (cmdArgs.input) preprocessFile(cmdArgs.input, cmdArgs.outCIF, cmdArgs.outBCIF);
// example:
// node build\node_modules\servers\model\preprocess -i e:\test\Quick\1cbs_updated.cif -oc e:\test\mol-star\model\1cbs.cif -ob e:\test\mol-star\model\1cbs.bcif
\ No newline at end of file
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { CifCategory, CifField, CifFrame, getCifFieldType } from 'mol-io/reader/cif';
import { CifWriter } from 'mol-io/writer/cif';
import { Task } from 'mol-task';
import { showProgress } from './util';
function getCategoryInstanceProvider(cat: CifCategory, fields: CifWriter.Field[]): CifWriter.Category {
return {
name: cat.name,
instance: () => ({ data: cat, fields, rowCount: cat.rowCount })
};
}
function classify(name: string, field: CifField): CifWriter.Field {
const type = getCifFieldType(field);
if (type['@type'] === 'str') {
return { name, type: CifWriter.Field.Type.Str, value: field.str, valueKind: field.valueKind };
} else if (type['@type'] === 'float') {
return CifWriter.Field.float(name, field.float, { valueKind: field.valueKind, typedArray: Float64Array });
} else {
return CifWriter.Field.int(name, field.int, { valueKind: field.valueKind, typedArray: Int32Array });
}
}
export function classifyCif(frame: CifFrame) {
return Task.create('Classify CIF Data', async ctx => {
let maxProgress = 0;
for (const c of frame.categoryNames) maxProgress += frame.categories[c].fieldNames.length;
const ret: CifWriter.Category[] = [];
let current = 0;
for (const c of frame.categoryNames) {
const cat = frame.categories[c];
const fields: CifWriter.Field[] = [];
for (const f of cat.fieldNames) {
const cifField = classify(f, cat.getField(f)!);
fields.push(cifField);
current++;
if (ctx.shouldUpdate) await ctx.update({ message: 'Classifying...', current, max: maxProgress });
}
ret.push(getCategoryInstanceProvider(cat, fields));
}
return ret;
}).run(showProgress, 250);
}
\ No newline at end of file
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { readStructure } from '../server/structure-wrapper';
import { classifyCif } from './converter';
import { ConsoleLogger } from 'mol-util/console-logger';
import { Structure } from 'mol-model/structure';
import { CifWriter } from 'mol-io/writer/cif';
import Writer from 'mol-io/writer/writer';
import { wrapFileToWriter } from '../server/api-local';
import { Task } from 'mol-task';
import { showProgress, clearLine } from './util';
import { encode_mmCIF_categories, CifExportContext } from 'mol-model/structure/export/mmcif';
// TODO: error handling, bulk mode
export async function preprocessFile(filename: string, outputCif?: string, outputBcif?: string) {
ConsoleLogger.log('ModelServer', `Reading ${filename}...`);
const input = await readStructure('entry', '_local_', filename);
ConsoleLogger.log('ModelServer', `Classifying CIF categories...`);
const categories = await classifyCif(input.cifFrame);
clearLine();
const exportCtx = CifExportContext.create(input.structure, input.structure.models[0]);
if (outputCif) {
ConsoleLogger.log('ModelServer', `Encoding CIF...`);
const writer = wrapFileToWriter(outputCif);
const encoder = CifWriter.createEncoder({ binary: false });
await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer);
clearLine();
writer.end();
}
if (outputBcif) {
ConsoleLogger.log('ModelServer', `Encoding BinaryCIF...`);
const writer = wrapFileToWriter(outputBcif);
const encoder = CifWriter.createEncoder({ binary: true, binaryAutoClassifyEncoding: true });
await encode(input.structure, input.cifFrame.header, categories, encoder, exportCtx, writer);
clearLine();
writer.end();
}
ConsoleLogger.log('ModelServer', `Done.`);
}
function encode(structure: Structure, header: string, categories: CifWriter.Category[], encoder: CifWriter.Encoder, exportCtx: CifExportContext, writer: Writer) {
return Task.create('Encode', async ctx => {
const skipCategoryNames = new Set<string>(categories.map(c => c.name));
encoder.startDataBlock(header);
let current = 0;
for (const cat of categories){
encoder.writeCategory(cat);
current++;
if (ctx.shouldUpdate) await ctx.update({ message: 'Encoding...', current, max: categories.length });
}
encode_mmCIF_categories(encoder, structure, { skipCategoryNames, exportCtx });
encoder.encode();
encoder.writeTo(writer);
}).run(showProgress, 250);
}
\ No newline at end of file
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { Progress } from 'mol-task';
export function showProgress(p: Progress) {
process.stdout.write(`\r${new Array(80).join(' ')}`);
process.stdout.write(`\r${Progress.format(p)}`);
}
export function clearLine() {
process.stdout.write(`\r${new Array(80).join(' ')}`);
process.stdout.write(`\r`);
}
\ No newline at end of file
......@@ -39,7 +39,7 @@ export async function runLocal(input: LocalInput) {
while (job) {
try {
const encoder = await resolveJob(job);
const writer = wrapFile(job.outputFilename!);
const writer = wrapFileToWriter(job.outputFilename!);
encoder.writeTo(writer);
writer.end();
ConsoleLogger.logId(job.id, 'Query', 'Written.');
......@@ -61,7 +61,7 @@ export async function runLocal(input: LocalInput) {
StructureCache.expireAll();
}
function wrapFile(fn: string) {
export function wrapFileToWriter(fn: string) {
const w = {
open(this: any) {
if (this.opened) return;
......@@ -71,7 +71,7 @@ function wrapFile(fn: string) {
},
writeBinary(this: any, data: Uint8Array) {
this.open();
fs.writeSync(this.file, new Buffer(data));
fs.writeSync(this.file, new Buffer(data.buffer));
return true;
},
writeString(this: any, data: string) {
......
......@@ -8,7 +8,7 @@ import { Structure, Model, Format } from 'mol-model/structure';
import { PerformanceMonitor } from 'mol-util/performance-monitor';
import { Cache } from './cache';
import Config from '../config';
import CIF from 'mol-io/reader/cif'
import CIF, { CifFrame } from 'mol-io/reader/cif'
import * as util from 'util'
import * as fs from 'fs'
import * as zlib from 'zlib'
......@@ -34,21 +34,22 @@ export interface StructureInfo {
entryId: string
}
export class StructureWrapper {
info: StructureInfo;
export interface StructureWrapper {
info: StructureInfo,
key: string;
approximateSize: number;
structure: Structure;
key: string,
approximateSize: number,
structure: Structure,
cifFrame: CifFrame
}
export async function getStructure(job: Job): Promise<StructureWrapper> {
if (Config.cacheParams.useCache) {
export async function getStructure(job: Job, allowCache = true): Promise<StructureWrapper> {
if (allowCache && Config.cacheParams.useCache) {
const ret = StructureCache.get(job.key);
if (ret) return ret;
}
const ret = await readStructure(job.key, job.sourceId, job.entryId);
if (Config.cacheParams.useCache) {
if (allowCache && Config.cacheParams.useCache) {
StructureCache.add(ret);
}
return ret;
......@@ -84,7 +85,7 @@ async function parseCif(data: string|Uint8Array) {
return parsed.result;
}
async function readStructure(key: string, sourceId: string, entryId: string) {
export async function readStructure(key: string, sourceId: string | '_local_', entryId: string) {
const filename = sourceId === '_local_' ? entryId : Config.mapFile(sourceId, entryId);
if (!filename) throw new Error(`Cound not map '${key}' to a valid filename.`);
if (!fs.existsSync(filename)) throw new Error(`Could not find source file for '${key}'.`);
......@@ -127,7 +128,8 @@ async function readStructure(key: string, sourceId: string, entryId: string) {
},
key,
approximateSize: typeof data === 'string' ? 2 * data.length : data.length,
structure
structure,
cifFrame: frame
};
return ret;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment