diff --git a/src/apps/cif2bcif/converter.ts b/src/apps/cif2bcif/converter.ts index c8f049846ad293d862fffc95cf764573880344af..deda28faf2d04566369c256b4af8d7bbf24c8632 100644 --- a/src/apps/cif2bcif/converter.ts +++ b/src/apps/cif2bcif/converter.ts @@ -45,8 +45,8 @@ export default async function convert(path: string, asText = false) { const encoder = asText ? new TextCIFEncoder() : new BinaryCIFEncoder('mol* cif2bcif'); for (const b of cif.blocks) { encoder.startDataBlock(b.header); - for (const _c of Object.keys(b.categories)) { - encoder.writeCategory(getCategoryInstanceProvider(b.categories[_c])); + for (const c of b.categoryNames) { + encoder.writeCategory(getCategoryInstanceProvider(b.categories[c])); } } return encoder.getData(); diff --git a/src/mol-io/reader/_spec/cif.spec.ts b/src/mol-io/reader/_spec/cif.spec.ts index 24ee319dfdf50a715267c82379f31f11b20050b4..b2309f006d26115b2e39be551821010d1c5d2e0c 100644 --- a/src/mol-io/reader/_spec/cif.spec.ts +++ b/src/mol-io/reader/_spec/cif.spec.ts @@ -14,8 +14,8 @@ const columnData = `123abc`; const intField = TextField({ data: columnData, indices: [0, 1, 1, 2, 2, 3], count: 3 }, 3); const strField = TextField({ data: columnData, indices: [3, 4, 4, 5, 5, 6], count: 3 }, 3); -const testBlock = Data.Block({ - _atoms: Data.Category(3, { +const testBlock = Data.Block(['atoms'], { + atoms: Data.Category('atoms', 3, ['x', 'name'], { x: intField, name: strField }) diff --git a/src/mol-io/reader/cif/binary/parser.ts b/src/mol-io/reader/cif/binary/parser.ts index 03b9e71c7548e6c578f233ebb0ac94ed1486d591..c72a9f4384550976767597498d4633635f1f8a93 100644 --- a/src/mol-io/reader/cif/binary/parser.ts +++ b/src/mol-io/reader/cif/binary/parser.ts @@ -24,7 +24,7 @@ function Category(data: EncodedCategory): Data.Category { for (const col of data.columns) map[col.name] = col; return { rowCount: data.rowCount, - name: data.name, + name: data.name.substr(1), fieldNames: data.columns.map(c => c.name), getField(name) { const col = map[name]; @@ -47,8 +47,8 @@ export default function parse(data: Uint8Array) { } const file = Data.File(unpacked.dataBlocks.map(block => { const cats = Object.create(null); - for (const cat of block.categories) cats[cat.name] = Category(cat); - return Data.Block(cats, block.header); + for (const cat of block.categories) cats[cat.name.substr(1)] = Category(cat); + return Data.Block(block.categories.map(c => c.name.substr(1)), cats, block.header); })); return Result.success(file); } catch (e) { diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index 4d2ffdf7fb628584722603b8bbeeca4b50c08193..55cc01095e0f323df9480381a7d20177ba5fc2e9 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -17,6 +17,7 @@ export function File(blocks: ArrayLike<Block>, name?: string): File { export interface Frame { readonly header: string, + readonly categoryNames: ReadonlyArray<string>, readonly categories: Categories } @@ -24,15 +25,12 @@ export interface Block extends Frame { readonly saveFrames: Frame[] } -export function Block(categories: Categories, header: string, saveFrames: Frame[] = []): Block { - if (Object.keys(categories).some(k => k[0] !== '_')) { - throw new Error(`Category names must start with '_'.`); - } - return { header, categories, saveFrames }; +export function Block(categoryNames: string[], categories: Categories, header: string, saveFrames: Frame[] = []): Block { + return { categoryNames, header, categories, saveFrames }; } -export function SafeFrame(categories: Categories, header: string): Frame { - return { header, categories }; +export function SafeFrame(categoryNames: string[], categories: Categories, header: string): Frame { + return { categoryNames, header, categories }; } export type Categories = { readonly [name: string]: Category } diff --git a/src/mol-io/reader/cif/schema.ts b/src/mol-io/reader/cif/schema.ts index ef1e95ca8a500991530db6e80b88f0c49e089f26..ee788fdfd49d2bf85edb7e62e4801a9b27222226 100644 --- a/src/mol-io/reader/cif/schema.ts +++ b/src/mol-io/reader/cif/schema.ts @@ -87,6 +87,6 @@ function createDatabase(schema: Database.Schema, frame: Data.Frame): Database<an } function createTable(key: string, schema: Table.Schema, frame: Data.Frame) { - const cat = frame.categories[key[0] === '_' ? key : '_' + key]; + const cat = frame.categories[key]; return new CategoryTable(cat || Data.Category.empty(key), schema, !!cat); } \ No newline at end of file diff --git a/src/mol-io/reader/cif/schema/utils.ts b/src/mol-io/reader/cif/schema/utils.ts index a02876af79e9be8d06dbaa7fcf22bf800d7c60b4..41078ab078a80bc8bcb551390fe45947fa2c64ba 100644 --- a/src/mol-io/reader/cif/schema/utils.ts +++ b/src/mol-io/reader/cif/schema/utils.ts @@ -87,7 +87,7 @@ function getField ( category: string, field: string, d: Data.Frame, ctx: FrameDa } function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined { - const value = getField('_item_enumeration', 'value', d, ctx) + const value = getField('item_enumeration', 'value', d, ctx) if (value) { const enums: string[] = [] for (let i = 0; i < value.rowCount; ++i) { @@ -101,7 +101,7 @@ function getEnums (d: Data.Frame, ctx: FrameData): string[]|undefined { } function getCode (d: Data.Frame, ctx: FrameData): [string, string[]]|undefined { - const code = getField('_item_type', 'code', d, ctx) + const code = getField('item_type', 'code', d, ctx) if (code) { let c = code.str(0) let e = [] @@ -147,7 +147,7 @@ export function generateSchema (dic: Data.Block) { // todo Block needs to be sp dic.saveFrames.forEach(d => { if (d.header[0] !== '_') return categories[d.header] = d - const item_linked = d.categories['_item_linked'] + const item_linked = d.categories['item_linked'] if (item_linked) { const child_name = item_linked.getField('child_name') const parent_name = item_linked.getField('parent_name') diff --git a/src/mol-io/reader/cif/text/parser.ts b/src/mol-io/reader/cif/text/parser.ts index 71643b34435014f8d3e2974ba7c482360c5625c1..0a3f13c240c1e1d7a2b83b42f2f5aa033231fa0d 100644 --- a/src/mol-io/reader/cif/text/parser.ts +++ b/src/mol-io/reader/cif/text/parser.ts @@ -411,10 +411,19 @@ interface CifCategoryResult { errorMessage: string; } +type FrameContext = { + categoryNames: string[], + categories: { [name: string]: Data.Category } +} + +function FrameContext(): FrameContext { + return { categoryNames: [], categories: Object.create(null) }; +} + /** * Reads a category containing a single row. */ -function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): CifCategoryResult { +function handleSingle(tokenizer: TokenizerState, ctx: FrameContext): CifCategoryResult { const nsStart = tokenizer.tokenStart, nsEnd = getNamespaceEnd(tokenizer); const name = getNamespace(tokenizer, nsEnd); const fields = Object.create(null); @@ -441,7 +450,9 @@ function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: D moveNext(tokenizer); } - categories[name] = Data.Category(name.substr(1), 1, fieldNames, fields); + const catName = name.substr(1); + ctx.categories[catName] = Data.Category(catName, 1, fieldNames, fields); + ctx.categoryNames.push(catName); return { hasError: false, @@ -479,7 +490,7 @@ function readLoopChunks(state: LoopReadState) { /** * Reads a loop. */ -async function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): Promise<CifCategoryResult> { +async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise<CifCategoryResult> { const loopLine = tokenizer.lineNumber; moveNext(tokenizer); @@ -491,7 +502,7 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string moveNext(tokenizer); } - const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32; + const rowCountEstimate = name === 'atom_site' ? (tokenizer.data.length / 100) | 0 : 32; const tokens: Tokens[] = []; const fieldCount = fieldNames.length; for (let i = 0; i < fieldCount; i++) tokens[i] = TokenBuilder.create(tokenizer, rowCountEstimate); @@ -519,7 +530,9 @@ async function handleLoop(tokenizer: TokenizerState, categories: { [name: string fields[fieldNames[i]] = Field(tokens[i], rowCount); } - categories[name] = Data.Category(name.substr(1), rowCount, fieldNames, fields); + const catName = name.substr(1); + ctx.categories[catName] = Data.Category(catName, rowCount, fieldNames, fields); + ctx.categoryNames.push(catName); return { hasError: false, @@ -551,14 +564,15 @@ async function parseInternal(data: string, ctx: Computation.Context) { const dataBlocks: Data.Block[] = []; const tokenizer = createTokenizer(data, ctx); let blockHeader: string = ''; - let blockCategories = Object.create(null); - let inSaveFrame = false + let blockCtx = FrameContext(); + + let inSaveFrame = false; // the next three initial values are never used in valid files let saveFrames: Data.Frame[] = []; - let saveCategories = Object.create(null); - let saveFrame: Data.Frame = Data.SafeFrame(saveCategories, ''); + let saveCtx = FrameContext(); + let saveFrame: Data.Frame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, ''); ctx.update({ message: 'Parsing...', current: 0, max: data.length }); @@ -571,19 +585,19 @@ async function parseInternal(data: string, ctx: Computation.Context) { if (inSaveFrame) { return error(tokenizer.lineNumber, 'Unexpected data block inside a save frame.'); } - if (Object.keys(blockCategories).length > 0) { - dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames)); + if (blockCtx.categoryNames.length > 0) { + dataBlocks.push(Data.Block(blockCtx.categoryNames, blockCtx.categories, blockHeader, saveFrames)); } blockHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd); - blockCategories = Object.create(null); + blockCtx = FrameContext(); saveFrames = [] moveNext(tokenizer); // Save frame } else if (token === CifTokenType.Save) { const saveHeader = data.substring(tokenizer.tokenStart + 5, tokenizer.tokenEnd); if (saveHeader.length === 0) { - if (Object.keys(saveCategories).length > 0) { - saveFrames[saveFrames.length] = saveFrame + if (saveCtx.categoryNames.length > 0) { + saveFrames[saveFrames.length] = saveFrame; } inSaveFrame = false; } else { @@ -591,19 +605,19 @@ async function parseInternal(data: string, ctx: Computation.Context) { return error(tokenizer.lineNumber, 'Save frames cannot be nested.'); } inSaveFrame = true; - saveCategories = Object.create(null); - saveFrame = Data.SafeFrame(saveCategories, saveHeader); + saveCtx = FrameContext(); + saveFrame = Data.SafeFrame(saveCtx.categoryNames, saveCtx.categories, ''); } moveNext(tokenizer); // Loop } else if (token === CifTokenType.Loop) { - const cat = await handleLoop(tokenizer, inSaveFrame ? saveCategories : blockCategories); + const cat = await handleLoop(tokenizer, inSaveFrame ? saveCtx : blockCtx); if (cat.hasError) { return error(cat.errorLine, cat.errorMessage); } // Single row } else if (token === CifTokenType.ColumnName) { - const cat = handleSingle(tokenizer, inSaveFrame ? saveCategories : blockCategories); + const cat = handleSingle(tokenizer, inSaveFrame ? saveCtx : blockCtx); if (cat.hasError) { return error(cat.errorLine, cat.errorMessage); } @@ -618,8 +632,8 @@ async function parseInternal(data: string, ctx: Computation.Context) { return error(tokenizer.lineNumber, 'Unfinished save frame (`' + saveFrame.header + '`).'); } - if (Object.keys(blockCategories).length > 0) { - dataBlocks.push(Data.Block(blockCategories, blockHeader, saveFrames)); + if (blockCtx.categoryNames.length > 0) { + dataBlocks.push(Data.Block(blockCtx.categoryNames, blockCtx.categories, blockHeader, saveFrames)); } return result(Data.File(dataBlocks)); diff --git a/src/perf-tests/structure.ts b/src/perf-tests/structure.ts index 60d27832cb793fcc86fafeb166f5729735964e23..e9c34250a516d0e7b58f6ccf1d79691365c57394 100644 --- a/src/perf-tests/structure.ts +++ b/src/perf-tests/structure.ts @@ -45,6 +45,7 @@ export async function readCIF(path: string) { const data = parsed.result.blocks[0]; console.time('schema') const mmcif = CIF.schema.mmCIF(data); + console.timeEnd('schema') console.time('buildModels') const models = Model.create({ kind: 'mmCIF', data: mmcif }); @@ -237,9 +238,9 @@ export namespace PropertyAccess { // } export async function run() { - const { structures, models } = await readCIF('./examples/1cbs_full.bcif'); + //const { structures, models } = await readCIF('./examples/1cbs_full.bcif'); //const { structures, models } = await readCIF('e:/test/quick/3j3q_full.bcif'); - //const { structures, models } = await readCIF('e:/test/quick/3j3q_updated.cif'); + const { structures, models } = await readCIF('e:/test/quick/1cbs_updated.cif'); //const { structures, models } = await readCIF('e:/test/molstar/3j3q.bcif');