Skip to content
Snippets Groups Projects
Commit a8070e41 authored by David Sehnal's avatar David Sehnal
Browse files

CIF parser now uses mol-task

parent 088350c0
Branches
Tags
No related merge requests found
......@@ -9,10 +9,11 @@ import CIF, { Category } from 'mol-io/reader/cif'
import * as Encoder from 'mol-io/writer/cif'
import * as fs from 'fs'
import classify from './field-classifier'
import { Run } from 'mol-task'
async function getCIF(path: string) {
const str = fs.readFileSync(path, 'utf8');
const parsed = await CIF.parseText(str)();
const parsed = await Run(CIF.parseText(str));
if (parsed.isError) {
throw new Error(parsed.toString());
}
......
......@@ -13,11 +13,11 @@ require('util.promisify').shim();
const readFile = util.promisify(fs.readFile);
const writeFile = util.promisify(fs.writeFile);
import { Run, Progress } from 'mol-task'
import { Database, Table, DatabaseCollection } from 'mol-data/db'
import CIF from 'mol-io/reader/cif'
// import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd'
import * as Encoder from 'mol-io/writer/cif'
import Computation from 'mol-util/computation'
import { mmCIF_Schema, mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
import { CCD_Schema } from 'mol-io/reader/cif/schema/ccd';
import { BIRD_Schema } from 'mol-io/reader/cif/schema/bird';
......@@ -48,10 +48,6 @@ export async function ensureDataAvailable() {
await ensureAvailable(BIRD_PATH, BIRD_URL)
}
function showProgress(tag: string, p: Computation.Progress) {
console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`)
}
export async function readFileAsCollection<S extends Database.Schema>(path: string, schema: S) {
const parsed = await parseCif(await readFile(path, 'utf8'))
return CIF.toDatabaseCollection(schema, parsed.result)
......@@ -80,14 +76,10 @@ export async function getBIRD() {
}
async function parseCif(data: string|Uint8Array) {
const comp = CIF.parse(data)
const ctx = Computation.observable({
updateRateMs: 250,
observer: p => showProgress(`cif parser ${typeof data === 'string' ? 'string' : 'binary'}`, p)
});
console.time('parse cif')
const parsed = await comp(ctx);
console.timeEnd('parse cif')
const comp = CIF.parse(data);
console.time('parse cif');
const parsed = await Run(comp, p => console.log(Progress.format(p)), 250);
console.timeEnd('parse cif');
if (parsed.isError) throw parsed;
return parsed
}
......
......@@ -14,11 +14,12 @@ import CIF from 'mol-io/reader/cif'
import { generateSchema } from './util/cif-dic'
import { generate } from './util/generate'
import { Filter, mergeFilters } from './util/json-schema'
import { Run } from 'mol-task';
async function runGenerateSchema(name: string, fieldNamesPath?: string, minCount = 0, typescript = false, out?: string) {
await ensureMmcifDicAvailable()
const comp = CIF.parseText(fs.readFileSync(MMCIF_DIC_PATH, 'utf8'))
const parsed = await comp();
const parsed = await Run(comp);
if (parsed.isError) throw parsed
// console.log(fieldNamesPath, minCount)
......
......@@ -10,20 +10,12 @@ require('util.promisify').shim();
// import { Table } from 'mol-data/db'
import CIF from 'mol-io/reader/cif'
import Computation from 'mol-util/computation'
import { Model } from 'mol-model/structure'
function showProgress(tag: string, p: Computation.Progress) {
console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`)
}
import { Run, Progress } from 'mol-task'
async function parseCif(data: string|Uint8Array) {
const comp = CIF.parse(data)
const ctx = Computation.observable({
updateRateMs: 250,
observer: p => showProgress(`cif parser ${typeof data === 'string' ? 'string' : 'binary'}`, p)
});
const parsed = await comp(ctx);
const comp = CIF.parse(data);
const parsed = await Run(comp, p => console.log(Progress.format(p)), 250);
if (parsed.isError) throw parsed;
return parsed
}
......
......@@ -23,7 +23,6 @@ function messageTree(root: Progress.Node, prefix = ''): string {
const subTree = root.children.map(c => messageTree(c, newPrefix));
if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}\n${subTree.join('\n')}`;
return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}\n${subTree.join('\n')}`;
}
function createTask<T>(delayMs: number, r: T): Task<T> {
......
......@@ -9,7 +9,7 @@ import { EncodedCategory, EncodedFile } from '../../../common/binary-cif'
import Field from './field'
import Result from '../../result'
import decodeMsgPack from '../../../common/msgpack/decode'
import Computation from 'mol-util/computation'
import { Task } from 'mol-task'
function checkVersions(min: number[], current: number[]) {
for (let i = 0; i < 2; i++) {
......@@ -37,7 +37,7 @@ function Category(data: EncodedCategory): Data.Category {
}
export default function parse(data: Uint8Array) {
return Computation.create<Result<Data.File>>(async ctx => {
return Task.create<Result<Data.File>>('Parse BinaryCIF', async ctx => {
const minVersion = [0, 3];
try {
......
......@@ -26,7 +26,7 @@ import * as Data from '../data-model'
import Field from './field'
import { Tokens, TokenBuilder } from '../../common/text/tokenizer'
import Result from '../../result'
import Computation from 'mol-util/computation'
import { Task, RuntimeContext, ChunkedSubtask } from 'mol-task'
/**
* Types of supported mmCIF tokens.
......@@ -42,18 +42,18 @@ const enum CifTokenType {
}
interface TokenizerState {
data: string;
data: string,
position: number;
length: number;
isEscaped: boolean;
position: number,
length: number,
isEscaped: boolean,
lineNumber: number;
tokenType: CifTokenType;
tokenStart: number;
tokenEnd: number;
lineNumber: number,
tokenType: CifTokenType,
tokenStart: number,
tokenEnd: number,
chunker: Computation.Chunker
runtimeCtx: RuntimeContext
}
/**
......@@ -387,7 +387,7 @@ function moveNext(state: TokenizerState) {
while (state.tokenType === CifTokenType.Comment) moveNextInternal(state);
}
function createTokenizer(data: string, ctx: Computation.Context): TokenizerState {
function createTokenizer(data: string, runtimeCtx: RuntimeContext): TokenizerState {
return {
data,
length: data.length,
......@@ -398,7 +398,7 @@ function createTokenizer(data: string, ctx: Computation.Context): TokenizerState
lineNumber: 1,
isEscaped: false,
chunker: Computation.chunker(ctx, 1000000)
runtimeCtx
};
}
......@@ -481,11 +481,9 @@ function readLoopChunk(state: LoopReadState, chunkSize: number) {
return counter;
}
function readLoopChunks(state: LoopReadState) {
return state.tokenizer.chunker.process(
chunkSize => readLoopChunk(state, chunkSize),
update => update({ message: 'Parsing...', current: state.tokenizer.position, max: state.tokenizer.data.length }));
}
const readLoopChunks = ChunkedSubtask(1000000,
(size, state: LoopReadState) => readLoopChunk(state, size),
(ctx, state) => ctx.update({ message: 'Parsing...', current: state.tokenizer.position, max: state.tokenizer.data.length }));
/**
* Reads a loop.
......@@ -514,7 +512,7 @@ async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise
tokens
};
await readLoopChunks(state);
await readLoopChunks(tokenizer.runtimeCtx, state);
if (state.tokenCount % fieldCount !== 0) {
return {
......@@ -560,9 +558,9 @@ function result(data: Data.File) {
*
* @returns CifParserResult wrapper of the result.
*/
async function parseInternal(data: string, ctx: Computation.Context) {
async function parseInternal(data: string, runtimeCtx: RuntimeContext) {
const dataBlocks: Data.Block[] = [];
const tokenizer = createTokenizer(data, ctx);
const tokenizer = createTokenizer(data, runtimeCtx);
let blockHeader = '';
let blockCtx = FrameContext();
......@@ -574,7 +572,7 @@ async function parseInternal(data: string, ctx: Computation.Context) {
let saveCtx = FrameContext();
let saveFrame: Data.Frame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, '');
ctx.update({ message: 'Parsing...', current: 0, max: data.length });
runtimeCtx.update({ message: 'Parsing...', current: 0, max: data.length });
moveNext(tokenizer);
while (tokenizer.tokenType !== CifTokenType.End) {
......@@ -641,7 +639,7 @@ async function parseInternal(data: string, ctx: Computation.Context) {
}
export default function parse(data: string) {
return Computation.create<Result<Data.File>>(async ctx => {
return Task.create<Result<Data.File>>('Parse CIF', async ctx => {
return await parseInternal(data, ctx);
});
}
\ No newline at end of file
......@@ -19,6 +19,21 @@ namespace Progress {
}
export interface Observer { (progress: Progress): void }
function _format(root: Progress.Node, prefix = ''): string {
const p = root.progress;
if (!root.children.length) {
if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}`;
return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}`;
}
const newPrefix = prefix + ' |_ ';
const subTree = root.children.map(c => _format(c, newPrefix));
if (p.isIndeterminate) return `${prefix}${p.taskName}: ${p.message}\n${subTree.join('\n')}`;
return `${prefix}${p.taskName}: [${p.current}/${p.max}] ${p.message}\n${subTree.join('\n')}`;
}
export function format(p: Progress) { return _format(p.root); }
}
export { Progress }
\ No newline at end of file
......@@ -15,6 +15,7 @@ import { Structure, Model, Queries as Q, Atom, AtomGroup, AtomSet, Selection, Sy
import { Segmentation } from 'mol-data/int'
import to_mmCIF from 'mol-model/structure/export/mmcif'
import { Run } from 'mol-task';
require('util.promisify').shim();
const readFileAsync = util.promisify(fs.readFile);
......@@ -59,7 +60,7 @@ export async function readCIF(path: string) {
console.time('parse');
const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
const parsed = await comp();
const parsed = await Run(comp);
console.timeEnd('parse');
if (parsed.isError) {
throw parsed;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment