From 6d842b96a90df1326f5feae94c061f4f2da9de2a Mon Sep 17 00:00:00 2001 From: Alexander Rose <alex.rose@rcsb.org> Date: Wed, 4 Oct 2017 19:28:16 -0700 Subject: [PATCH] first attempt at cif dictionary parsing --- src/reader/cif/data-model.ts | 16 ++++++++-- src/reader/cif/text/parser.ts | 60 +++++++++++++++++------------------ src/script.ts | 40 ++++++++++++++++++++--- 3 files changed, 78 insertions(+), 38 deletions(-) diff --git a/src/reader/cif/data-model.ts b/src/reader/cif/data-model.ts index fdc54b5f6..666bdf88c 100644 --- a/src/reader/cif/data-model.ts +++ b/src/reader/cif/data-model.ts @@ -17,16 +17,28 @@ export function File(blocks: ArrayLike<Block>, name?: string): File { export interface Block { readonly header: string, - readonly categories: { readonly [name: string]: Category } + readonly categories: Categories + readonly saveFrames: SafeFrame[] } -export function Block(categories: { readonly [name: string]: Category }, header: string): Block { +export function Block(categories: Categories, header: string, saveFrames: SafeFrame[] = []): Block { if (Object.keys(categories).some(k => k[0] !== '_')) { throw new Error(`Category names must start with '_'.`); } + return { header, categories, saveFrames }; +} + +export interface SafeFrame { + readonly header: string, + readonly categories: Categories +} + +export function SafeFrame(categories: Categories, header: string): SafeFrame { return { header, categories }; } +export type Categories = { readonly [name: string]: Category } + export interface Category { readonly rowCount: number, getField(name: string): Field | undefined diff --git a/src/reader/cif/text/parser.ts b/src/reader/cif/text/parser.ts index d27dcd240..3b02bd78d 100644 --- a/src/reader/cif/text/parser.ts +++ b/src/reader/cif/text/parser.ts @@ -551,9 +551,12 @@ async function parseInternal(data: string, ctx: Computation.Context) { let blockHeader: string = ''; let blockCategories = Object.create(null); - //saveFrame = new DataBlock(data, "empty"), - //inSaveFrame = false, - //blockSaveFrames: any; + let inSaveFrame = false + + // the next three initial values are never used in valid files + let saveFrames: Data.SafeFrame[] = []; + let saveCategories = Object.create(null); + let saveFrame: Data.SafeFrame = Data.SafeFrame(saveCategories, ''); ctx.update({ message: 'Parsing...', current: 0, max: data.length }); @@ -563,63 +566,58 @@ async function parseInternal(data: string, ctx: Computation.Context) { // Data block if (token === CifTokenType.Data) { - // if (inSaveFrame) { - // return error(tokenizer.currentLineNumber, "Unexpected data block inside a save frame."); - // } + if (inSaveFrame) { + return error(tokenizer.lineNumber, "Unexpected data block inside a save frame."); + } if (Object.keys(blockCategories).length > 0) { - dataBlocks.push(Data.Block(blockCategories, blockHeader)); + dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames)); } blockHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd); blockCategories = Object.create(null); + saveFrames = [] moveNext(tokenizer); - } - /* // Save frame + // Save frame } else if (token === CifTokenType.Save) { - id = data.substring(tokenizer.currentTokenStart + 5, tokenizer.currentTokenEnd); - - if (id.length === 0) { - if (saveFrame.categories.length > 0) { - blockSaveFrames = blockCategories.additionalData["saveFrames"]; - if (!blockSaveFrames) { - blockSaveFrames = []; - blockCategories.additionalData["saveFrames"] = blockSaveFrames; - } - blockSaveFrames[blockSaveFrames.length] = saveFrame; + const saveHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd); + if (saveHeader.length === 0) { + if (Object.keys(saveCategories).length > 0) { + saveFrames[saveFrames.length] = saveFrame } inSaveFrame = false; } else { if (inSaveFrame) { - return error(tokenizer.currentLineNumber, "Save frames cannot be nested."); + return error(tokenizer.lineNumber, "Save frames cannot be nested."); } inSaveFrame = true; - saveFrame = new DataBlock(data, id); + saveCategories = Object.create(null); + saveFrame = Data.SafeFrame(saveCategories, saveHeader); } moveNext(tokenizer); - // Loop - } */ else if (token === CifTokenType.Loop) { - const cat = await handleLoop(tokenizer, /*inSaveFrame ? saveFrame : */ blockCategories); + // Loop + } else if (token === CifTokenType.Loop) { + const cat = await handleLoop(tokenizer, inSaveFrame ? saveCategories : blockCategories); if (cat.hasError) { return error(cat.errorLine, cat.errorMessage); } - // Single row + // Single row } else if (token === CifTokenType.ColumnName) { - const cat = handleSingle(tokenizer, /*inSaveFrame ? saveFrame :*/ blockCategories); + const cat = handleSingle(tokenizer, inSaveFrame ? saveCategories : blockCategories); if (cat.hasError) { return error(cat.errorLine, cat.errorMessage); } - // Out of options + // Out of options } else { return error(tokenizer.lineNumber, 'Unexpected token. Expected data_, loop_, or data name.'); } } // Check if the latest save frame was closed. - // if (inSaveFrame) { - // return error(tokenizer.currentLineNumber, "Unfinished save frame (`" + saveFrame.header + "`)."); - // } + if (inSaveFrame) { + return error(tokenizer.lineNumber, "Unfinished save frame (`" + saveFrame.header + "`)."); + } if (Object.keys(blockCategories).length > 0) { - dataBlocks.push(Data.Block(blockCategories, blockHeader)); + dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames)); } return result(Data.File(dataBlocks)); diff --git a/src/script.ts b/src/script.ts index f4cf5a07a..724767d15 100644 --- a/src/script.ts +++ b/src/script.ts @@ -5,7 +5,7 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -// import * as util from 'util' +import * as util from 'util' import * as fs from 'fs' import Gro from './reader/gro/parser' @@ -74,7 +74,7 @@ async function runGro(input: string) { console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1]) } -function _gro() { +export function _gro() { fs.readFile(`./examples/${file}`, 'utf8', function (err, input) { if (err) { return console.log(err); @@ -83,7 +83,7 @@ function _gro() { }); } -_gro() +// _gro() async function runCIF(input: string | Uint8Array) { console.time('parseCIF'); @@ -110,7 +110,7 @@ async function runCIF(input: string | Uint8Array) { export function _cif() { let path = `./examples/1cbs_updated.cif`; - path = 'c:/test/quick/3j3q.cif'; + path = '../test/3j3q.cif' // lets have a relative path for big test files fs.readFile(path, 'utf8', function (err, input) { if (err) { return console.log(err); @@ -134,7 +134,37 @@ export function _cif() { }); } -_cif(); +// _cif(); + +async function runDic(input: string | Uint8Array) { + console.time('parseDic'); + const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input); + + const ctx = Computation.observable({ updateRateMs: 250, observer: p => showProgress('DIC', p) }); + const parsed = await comp(ctx); + console.timeEnd('parseDic'); + if (parsed.isError) { + console.log(parsed); + return; + } + + const data = parsed.result.blocks[0]; + console.log(util.inspect(data.saveFrames, {showHidden: false, depth: 3})) +} + +export function _dic() { + let path = '../test/mmcif_pdbx_v50.dic' + fs.readFile(path, 'utf8', function (err, input) { + if (err) { + return console.log(err); + } + console.log('------------------'); + console.log('Text DIC:'); + runDic(input); + }); +} + +_dic(); import Computation from './utils/computation' const comp = Computation.create(async ctx => { -- GitLab