Skip to content
Snippets Groups Projects
Commit f749c361 authored by David Sehnal's avatar David Sehnal
Browse files

CIF parser using computation

parent 2693d26d
Branches
Tags
No related merge requests found
...@@ -9,6 +9,7 @@ import * as Encoding from './encoding' ...@@ -9,6 +9,7 @@ import * as Encoding from './encoding'
import Field from './field' import Field from './field'
import Result from '../../result' import Result from '../../result'
import decodeMsgPack from '../../../utils/msgpack/decode' import decodeMsgPack from '../../../utils/msgpack/decode'
import Computation from '../../../utils/computation'
function checkVersions(min: number[], current: number[]) { function checkVersions(min: number[], current: number[]) {
for (let i = 0; i < 2; i++) { for (let i = 0; i < 2; i++) {
...@@ -29,7 +30,8 @@ function Category(data: Encoding.EncodedCategory): Data.Category { ...@@ -29,7 +30,8 @@ function Category(data: Encoding.EncodedCategory): Data.Category {
} }
} }
export default function parse(data: Uint8Array): Result<Data.File> { export default function parse(data: Uint8Array) {
return new Computation<Result<Data.File>>(async ctx => {
const minVersion = [0, 3]; const minVersion = [0, 3];
try { try {
...@@ -46,4 +48,5 @@ export default function parse(data: Uint8Array): Result<Data.File> { ...@@ -46,4 +48,5 @@ export default function parse(data: Uint8Array): Result<Data.File> {
} catch (e) { } catch (e) {
return Result.error<Data.File>('' + e); return Result.error<Data.File>('' + e);
} }
})
} }
\ No newline at end of file
...@@ -26,6 +26,7 @@ import * as Data from '../data-model' ...@@ -26,6 +26,7 @@ import * as Data from '../data-model'
import Field from './field' import Field from './field'
import { Tokens, TokenBuilder } from '../../common/text/tokenizer' import { Tokens, TokenBuilder } from '../../common/text/tokenizer'
import Result from '../../result' import Result from '../../result'
import Computation from '../../../utils/computation'
/** /**
* Types of supported mmCIF tokens. * Types of supported mmCIF tokens.
...@@ -51,6 +52,8 @@ interface TokenizerState { ...@@ -51,6 +52,8 @@ interface TokenizerState {
currentTokenType: CifTokenType; currentTokenType: CifTokenType;
currentTokenStart: number; currentTokenStart: number;
currentTokenEnd: number; currentTokenEnd: number;
computation: Computation.Chunked
} }
/** /**
...@@ -384,7 +387,7 @@ function moveNext(state: TokenizerState) { ...@@ -384,7 +387,7 @@ function moveNext(state: TokenizerState) {
while (state.currentTokenType === CifTokenType.Comment) moveNextInternal(state); while (state.currentTokenType === CifTokenType.Comment) moveNextInternal(state);
} }
function createTokenizer(data: string): TokenizerState { function createTokenizer(data: string, ctx: Computation.Context): TokenizerState {
return { return {
data, data,
length: data.length, length: data.length,
...@@ -393,7 +396,8 @@ function createTokenizer(data: string): TokenizerState { ...@@ -393,7 +396,8 @@ function createTokenizer(data: string): TokenizerState {
currentTokenEnd: 0, currentTokenEnd: 0,
currentTokenType: CifTokenType.End, currentTokenType: CifTokenType.End,
currentLineNumber: 1, currentLineNumber: 1,
isEscaped: false isEscaped: false,
computation: new Computation.Chunked(ctx, 1000000)
}; };
} }
...@@ -443,10 +447,39 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D ...@@ -443,10 +447,39 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D
}; };
} }
interface LoopReadState {
tokenizer: TokenizerState,
tokens: Tokens[],
fieldCount: number,
tokenCount: number
}
function readLoopChunk(state: LoopReadState, chunkSize: number) {
const { tokenizer, tokens, fieldCount } = state;
let tokenCount = state.tokenCount;
let counter = 0;
while (tokenizer.currentTokenType === CifTokenType.Value && counter < chunkSize) {
TokenBuilder.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
moveNext(tokenizer);
counter++;
}
state.tokenCount = tokenCount;
return tokenizer.currentTokenType === CifTokenType.Value;
}
async function readLoopChunks(state: LoopReadState) {
const { computation } = state.tokenizer;
while (readLoopChunk(state, computation.chunkSize)) {
if (computation.requiresUpdate) {
await computation.updateProgress('Parsing...', void 0, state.tokenizer.position, state.tokenizer.data.length);
}
}
}
/** /**
* Reads a loop. * Reads a loop.
*/ */
function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): CifCategoryResult { async function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): Promise<CifCategoryResult> {
const loopLine = tokenizer.currentLineNumber; const loopLine = tokenizer.currentLineNumber;
moveNext(tokenizer); moveNext(tokenizer);
...@@ -463,13 +496,22 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat ...@@ -463,13 +496,22 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
const fieldCount = fieldNames.length; const fieldCount = fieldNames.length;
for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate); for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate);
let tokenCount = 0; const state: LoopReadState = {
while (tokenizer.currentTokenType === CifTokenType.Value) { fieldCount,
TokenBuilder.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd); tokenCount: 0,
moveNext(tokenizer); tokenizer,
} tokens
};
// let tokenCount = 0;
// while (tokenizer.currentTokenType === CifTokenType.Value) {
// TokenBuilder.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
// moveNext(tokenizer);
// }
if (tokenCount % fieldCount !== 0) { await readLoopChunks(state);
if (state.tokenCount % fieldCount !== 0) {
return { return {
hasError: true, hasError: true,
errorLine: tokenizer.currentLineNumber, errorLine: tokenizer.currentLineNumber,
...@@ -477,7 +519,7 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat ...@@ -477,7 +519,7 @@ function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Dat
}; };
} }
const rowCount = (tokenCount / fieldCount) | 0; const rowCount = (state.tokenCount / fieldCount) | 0;
const fields = Object.create(null); const fields = Object.create(null);
for (let i = 0; i < fieldCount; i++) { for (let i = 0; i < fieldCount; i++) {
fields[fieldNames[i]] = Field(tokens[i], rowCount); fields[fieldNames[i]] = Field(tokens[i], rowCount);
...@@ -511,9 +553,9 @@ function result(data: Data.File) { ...@@ -511,9 +553,9 @@ function result(data: Data.File) {
* *
* @returns CifParserResult wrapper of the result. * @returns CifParserResult wrapper of the result.
*/ */
function parseInternal(data: string): Result<Data.File> { async function parseInternal(data: string, ctx: Computation.Context) {
const dataBlocks: Data.Block[] = []; const dataBlocks: Data.Block[] = [];
const tokenizer = createTokenizer(data); const tokenizer = createTokenizer(data, ctx);
let blockHeader: string = ''; let blockHeader: string = '';
let blockCategories = Object.create(null); let blockCategories = Object.create(null);
...@@ -521,6 +563,8 @@ function parseInternal(data: string): Result<Data.File> { ...@@ -521,6 +563,8 @@ function parseInternal(data: string): Result<Data.File> {
//inSaveFrame = false, //inSaveFrame = false,
//blockSaveFrames: any; //blockSaveFrames: any;
ctx.updateProgress('Parsing...');
moveNext(tokenizer); moveNext(tokenizer);
while (tokenizer.currentTokenType !== CifTokenType.End) { while (tokenizer.currentTokenType !== CifTokenType.End) {
let token = tokenizer.currentTokenType; let token = tokenizer.currentTokenType;
...@@ -561,7 +605,7 @@ function parseInternal(data: string): Result<Data.File> { ...@@ -561,7 +605,7 @@ function parseInternal(data: string): Result<Data.File> {
moveNext(tokenizer); moveNext(tokenizer);
// Loop // Loop
} */ else if (token === CifTokenType.Loop) { } */ else if (token === CifTokenType.Loop) {
const cat = handleLoop(tokenizer, /*inSaveFrame ? saveFrame : */ blockCategories); const cat = await handleLoop(tokenizer, /*inSaveFrame ? saveFrame : */ blockCategories);
if (cat.hasError) { if (cat.hasError) {
return error(cat.errorLine, cat.errorMessage); return error(cat.errorLine, cat.errorMessage);
} }
...@@ -590,5 +634,7 @@ function parseInternal(data: string): Result<Data.File> { ...@@ -590,5 +634,7 @@ function parseInternal(data: string): Result<Data.File> {
} }
export default function parse(data: string) { export default function parse(data: string) {
return parseInternal(data); return new Computation<Result<Data.File>>(async ctx => {
return await parseInternal(data, ctx);
});
} }
\ No newline at end of file
...@@ -72,9 +72,13 @@ export function _gro() { ...@@ -72,9 +72,13 @@ export function _gro() {
}); });
} }
function runCIF(input: string | Uint8Array) { async function runCIF(input: string | Uint8Array) {
console.time('parseCIF'); console.time('parseCIF');
const parsed = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input); const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input);
const running = comp.runWithContext(new Computation.ObservableContext({ updateRateMs: 250 }));
running.subscribe(p => console.log(`${(p.current / p.max * 100).toFixed(2)} (${p.elapsedMs | 0}ms)`));
const parsed = await running.result;
console.timeEnd('parseCIF'); console.timeEnd('parseCIF');
if (parsed.isError) { if (parsed.isError) {
console.log(parsed); console.log(parsed);
...@@ -94,7 +98,7 @@ function runCIF(input: string | Uint8Array) { ...@@ -94,7 +98,7 @@ function runCIF(input: string | Uint8Array) {
export function _cif() { export function _cif() {
let path = `./examples/1cbs_updated.cif`; let path = `./examples/1cbs_updated.cif`;
//path = 'c:/test/quick/3j3q.cif'; path = 'c:/test/quick/3j3q.cif';
fs.readFile(path, 'utf8', function (err, input) { fs.readFile(path, 'utf8', function (err, input) {
if (err) { if (err) {
return console.log(err); return console.log(err);
...@@ -122,7 +126,7 @@ _cif(); ...@@ -122,7 +126,7 @@ _cif();
import Computation from './utils/computation' import Computation from './utils/computation'
const comp = new Computation(async ctx => { const comp = new Computation(async ctx => {
for (let i = 0; i < 3; i++) { for (let i = 0; i < 0; i++) {
await new Promise(res => setTimeout(res, 500)); await new Promise(res => setTimeout(res, 500));
if (ctx.requiresUpdate) await ctx.updateProgress('working', void 0, i, 2); if (ctx.requiresUpdate) await ctx.updateProgress('working', void 0, i, 2);
} }
......
...@@ -56,11 +56,12 @@ namespace Computation { ...@@ -56,11 +56,12 @@ namespace Computation {
export const Aborted = 'Aborted'; export const Aborted = 'Aborted';
export interface Progress { export interface Progress {
message: string; message: string,
isIndeterminate: boolean; isIndeterminate: boolean,
current: number; current: number,
max: number; max: number,
requestAbort?: () => void; elapsedMs: number,
requestAbort?: () => void
} }
export interface Context { export interface Context {
...@@ -69,6 +70,8 @@ namespace Computation { ...@@ -69,6 +70,8 @@ namespace Computation {
/** /**
* Checks if the computation was aborted. If so, throws. * Checks if the computation was aborted. If so, throws.
* Otherwise, updates the progress. * Otherwise, updates the progress.
*
* Returns the number of ms since the last update.
*/ */
updateProgress(msg: string, abort?: boolean | (() => void), current?: number, max?: number): Promise<void> | void updateProgress(msg: string, abort?: boolean | (() => void), current?: number, max?: number): Promise<void> | void
} }
...@@ -87,13 +90,16 @@ namespace Computation { ...@@ -87,13 +90,16 @@ namespace Computation {
} }
export class ObservableContext implements Context { export class ObservableContext implements Context {
private updateRate: number; readonly updateRate: number;
private isSynchronous: boolean; private isSynchronous: boolean;
private level = 0; private level = 0;
private startedTime = 0;
private abortRequested = false; private abortRequested = false;
private lastUpdated = 0; private lastUpdated = 0;
private observers: ProgressObserver[] | undefined = void 0; private observers: ProgressObserver[] | undefined = void 0;
private progress: Progress = { message: 'Working...', current: 0, max: 0, isIndeterminate: true, requestAbort: void 0 }; private progress: Progress = { message: 'Working...', current: 0, max: 0, elapsedMs: 0, isIndeterminate: true, requestAbort: void 0 };
lastDelta = 0;
private checkAborted() { private checkAborted() {
if (this.abortRequested) throw Aborted; if (this.abortRequested) throw Aborted;
...@@ -117,6 +123,8 @@ namespace Computation { ...@@ -117,6 +123,8 @@ namespace Computation {
updateProgress(msg: string, abort?: boolean | (() => void), current?: number, max?: number): Promise<void> | void { updateProgress(msg: string, abort?: boolean | (() => void), current?: number, max?: number): Promise<void> | void {
this.checkAborted(); this.checkAborted();
const time = Helpers.getTime();
if (typeof abort === 'boolean') { if (typeof abort === 'boolean') {
this.progress.requestAbort = abort ? this.abortRequester : void 0; this.progress.requestAbort = abort ? this.abortRequester : void 0;
} else { } else {
...@@ -125,6 +133,7 @@ namespace Computation { ...@@ -125,6 +133,7 @@ namespace Computation {
} }
this.progress.message = msg; this.progress.message = msg;
this.progress.elapsedMs = time - this.startedTime;
if (isNaN(current!)) { if (isNaN(current!)) {
this.progress.isIndeterminate = true; this.progress.isIndeterminate = true;
} else { } else {
...@@ -138,7 +147,8 @@ namespace Computation { ...@@ -138,7 +147,8 @@ namespace Computation {
for (const o of this.observers) setTimeout(o, 0, p); for (const o of this.observers) setTimeout(o, 0, p);
} }
this.lastUpdated = Helpers.getTime(); this.lastDelta = time - this.lastUpdated;
this.lastUpdated = time;
return new Promise<void>(res => setTimeout(res, 0)); return new Promise<void>(res => setTimeout(res, 0));
} }
...@@ -146,10 +156,11 @@ namespace Computation { ...@@ -146,10 +156,11 @@ namespace Computation {
get requiresUpdate() { get requiresUpdate() {
this.checkAborted(); this.checkAborted();
if (this.isSynchronous) return false; if (this.isSynchronous) return false;
return Helpers.getTime() - this.lastUpdated > this.updateRate; return Helpers.getTime() - this.lastUpdated > this.updateRate / 2;
} }
started() { started() {
if (!this.level) this.startedTime = Helpers.getTime();
this.level++; this.level++;
} }
...@@ -161,11 +172,46 @@ namespace Computation { ...@@ -161,11 +172,46 @@ namespace Computation {
if (!this.level) this.observers = void 0; if (!this.level) this.observers = void 0;
} }
constructor(params?: Params) { constructor(params?: Partial<Params>) {
this.updateRate = (params && params.updateRateMs) || DefaulUpdateRateMs; this.updateRate = (params && params.updateRateMs) || DefaulUpdateRateMs;
this.isSynchronous = !!(params && params.isSynchronous); this.isSynchronous = !!(params && params.isSynchronous);
} }
} }
export class Chunked {
private currentChunkSize: number;
private computeChunkSize() {
const lastDelta = (this.context as ObservableContext).lastDelta || 0;
if (!lastDelta) return this.defaultChunkSize;
const rate = (this.context as ObservableContext).updateRate || 0;
return Math.round(this.currentChunkSize * rate / lastDelta + 1);
}
get chunkSize() {
return this.defaultChunkSize;
}
set chunkSize(value: number) {
this.defaultChunkSize = value;
this.currentChunkSize = value;
}
get requiresUpdate() {
const ret = this.context.requiresUpdate;
if (!ret) this.currentChunkSize += this.chunkSize;
return ret;
}
async updateProgress(msg: string, abort?: boolean | (() => void), current?: number, max?: number) {
await this.context.updateProgress(msg, abort, current, max);
this.defaultChunkSize = this.computeChunkSize();
}
constructor(public context: Context, private defaultChunkSize: number) {
this.currentChunkSize = defaultChunkSize;
}
}
} }
namespace Helpers { namespace Helpers {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment