diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index fe044426017e052ae96a2e9d420f8c13d52c53c1..40f3a11133ac7ad90637e17328778558e0a67400 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -5,10 +5,12 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -import { Column } from 'mol-data/db' +import { Column, ColumnHelpers } from 'mol-data/db' import { Tensor } from 'mol-math/linear-algebra' -import { getNumberType, NumberType } from '../common/text/number-parser'; +import { getNumberType, NumberType, parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser'; import { Encoding } from '../../common/binary-cif'; +import { Tokens } from '../common/text/tokenizer'; +import { areValuesEqualProvider } from '../common/text/column/token'; export interface CifFile { readonly name?: string, @@ -81,6 +83,104 @@ export interface CifField { toFloatArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number> } +export namespace CifField { + export function ofStrings(values: string[]): CifField { + const rowCount = values.length; + const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; }; + const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; }; + const float: CifField['float'] = row => { const v = values[row]; return fastParseFloat(v, 0, v.length) || 0; }; + const valueKind: CifField['valueKind'] = row => { + const v = values[row], l = v.length; + if (l > 1) return Column.ValueKind.Present; + if (l === 0) return Column.ValueKind.NotPresent; + const c = v.charCodeAt(0); + if (c === 46 /* . */) return Column.ValueKind.NotPresent; + if (c === 63 /* ? */) return Column.ValueKind.Unknown; + return Column.ValueKind.Present; + }; + + return { + __array: void 0, + binaryEncoding: void 0, + isDefined: true, + rowCount, + str, + int, + float, + valueKind, + areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB], + toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), + toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params), + toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) + } + } + + export function ofNumbers(values: number[]): CifField { + const rowCount = values.length; + const str: CifField['str'] = row => { return '' + values[row]; }; + const float: CifField['float'] = row => values[row]; + const valueKind: CifField['valueKind'] = row => Column.ValueKind.Present; + + return { + __array: void 0, + binaryEncoding: void 0, + isDefined: true, + rowCount, + str, + int: float, + float, + valueKind, + areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB], + toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), + toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params), + toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) + } + } + + export function ofTokens(tokens: Tokens): CifField { + const { data, indices, count: rowCount } = tokens; + + const str: CifField['str'] = row => { + const ret = data.substring(indices[2 * row], indices[2 * row + 1]); + if (ret === '.' || ret === '?') return ''; + return ret; + }; + + const int: CifField['int'] = row => { + return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0; + }; + + const float: CifField['float'] = row => { + return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0; + }; + + const valueKind: CifField['valueKind'] = row => { + const s = indices[2 * row], l = indices[2 * row + 1] - s; + if (l > 1) return Column.ValueKind.Present; + if (l === 0) return Column.ValueKind.NotPresent; + const v = data.charCodeAt(s); + if (v === 46 /* . */) return Column.ValueKind.NotPresent; + if (v === 63 /* ? */) return Column.ValueKind.Unknown; + return Column.ValueKind.Present; + }; + + return { + __array: void 0, + binaryEncoding: void 0, + isDefined: true, + rowCount, + str, + int, + float, + valueKind, + areValuesEqual: areValuesEqualProvider(tokens), + toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), + toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params), + toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) + } + } +} + export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data { const ret = space.create(); const offset = zeroIndexed ? 0 : 1; diff --git a/src/mol-io/reader/cif/text/field.ts b/src/mol-io/reader/cif/text/field.ts deleted file mode 100644 index 3248cd1eff26dd48478d9dba29184b23f71cf04d..0000000000000000000000000000000000000000 --- a/src/mol-io/reader/cif/text/field.ts +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. - * - * @author David Sehnal <david.sehnal@gmail.com> - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { Column, ColumnHelpers } from 'mol-data/db' -import * as TokenColumn from '../../common/text/column/token' -import { Tokens } from '../../common/text/tokenizer' -import * as Data from '../data-model' -import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser' - -export default function CifTextField(tokens: Tokens, rowCount: number): Data.CifField { - const { data, indices } = tokens; - - const str: Data.CifField['str'] = row => { - const ret = data.substring(indices[2 * row], indices[2 * row + 1]); - if (ret === '.' || ret === '?') return ''; - return ret; - }; - - const int: Data.CifField['int'] = row => { - return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0; - }; - - const float: Data.CifField['float'] = row => { - return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0; - }; - - const valueKind: Data.CifField['valueKind'] = row => { - const s = indices[2 * row], l = indices[2 * row + 1] - s; - if (l > 1) return Column.ValueKind.Present; - if (l === 0) return Column.ValueKind.NotPresent; - const v = data.charCodeAt(s); - if (v === 46 /* . */) return Column.ValueKind.NotPresent; - if (v === 63 /* ? */) return Column.ValueKind.Unknown; - return Column.ValueKind.Present; - }; - - return { - __array: void 0, - binaryEncoding: void 0, - isDefined: true, - rowCount, - str, - int, - float, - valueKind, - areValuesEqual: TokenColumn.areValuesEqualProvider(tokens), - toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), - toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params), - toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) - } -} - -export function CifTextValueField(values: string[]): Data.CifField { - const rowCount = values.length; - - const str: Data.CifField['str'] = row => { - const ret = values[row]; - if (!ret || ret === '.' || ret === '?') return ''; - return ret; - }; - - const int: Data.CifField['int'] = row => { - const v = values[row]; - return fastParseInt(v, 0, v.length) || 0; - }; - - const float: Data.CifField['float'] = row => { - const v = values[row]; - return fastParseFloat(v, 0, v.length) || 0; - }; - - const valueKind: Data.CifField['valueKind'] = row => { - const v = values[row], l = v.length; - if (l > 1) return Column.ValueKind.Present; - if (l === 0) return Column.ValueKind.NotPresent; - const c = v.charCodeAt(0); - if (c === 46 /* . */) return Column.ValueKind.NotPresent; - if (c === 63 /* ? */) return Column.ValueKind.Unknown; - return Column.ValueKind.Present; - }; - - return { - __array: void 0, - binaryEncoding: void 0, - isDefined: true, - rowCount, - str, - int, - float, - valueKind, - areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB], - toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params), - toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params), - toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params) - } -} \ No newline at end of file diff --git a/src/mol-io/reader/cif/text/parser.ts b/src/mol-io/reader/cif/text/parser.ts index ae617076ed75406d12b3925be29a381b5ea60c82..920546393c25b5aa585b34d8740d872222e63670 100644 --- a/src/mol-io/reader/cif/text/parser.ts +++ b/src/mol-io/reader/cif/text/parser.ts @@ -23,7 +23,6 @@ */ import * as Data from '../data-model' -import Field from './field' import { Tokens, TokenBuilder } from '../../common/text/tokenizer' import { ReaderResult as Result } from '../../result' import { Task, RuntimeContext, chunkedSubtask } from 'mol-task' @@ -445,7 +444,7 @@ function handleSingle(tokenizer: TokenizerState, ctx: FrameContext): CifCategory errorMessage: 'Expected value.' } } - fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 }, 1); + fields[fieldName] = Data.CifField.ofTokens({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 }); fieldNames[fieldNames.length] = fieldName; moveNext(tokenizer); } @@ -529,7 +528,7 @@ async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise const rowCount = (state.tokenCount / fieldCount) | 0; const fields = Object.create(null); for (let i = 0; i < fieldCount; i++) { - fields[fieldNames[i]] = Field(tokens[i], rowCount); + fields[fieldNames[i]] = Data.CifField.ofTokens(tokens[i]); } const catName = name.substr(1); diff --git a/src/mol-io/reader/pdb/to-cif.ts b/src/mol-io/reader/pdb/to-cif.ts index f206a7bb26744508ee2f5e304ff721917d2b2e49..c6862994be0c603ce22227635e62d292ec10c03f 100644 --- a/src/mol-io/reader/pdb/to-cif.ts +++ b/src/mol-io/reader/pdb/to-cif.ts @@ -6,7 +6,6 @@ import { CifField, CifCategory } from '../cif'; import { mmCIF_Schema } from '../cif/schema/mmcif'; -import CifTextField, { CifTextValueField } from '../cif/text/field'; import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'; import { PdbFile } from './schema'; import { CifFile } from '../cif/data-model'; @@ -26,13 +25,14 @@ function toCategory(name: string, fields: { [name: string]: CifField | undefined function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { return { - id: CifTextValueField(['1', '2', '3']), - type: CifTextValueField(['polymer', 'non-polymer', 'water']) + id: CifField.ofStrings(['1', '2', '3']), + type: CifField.ofStrings(['polymer', 'non-polymer', 'water']) } } +type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never function atom_site_template(data: string, count: number) { - const str = () => new Array(count) as string[]; + const str = () => [] as string[]; const ts = () => TokenBuilder.create(data, 2 * count); return { index: 0, @@ -57,36 +57,36 @@ function atom_site_template(data: string, count: number) { } function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } { - const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count); - const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count); - const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count); - const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count); + const auth_asym_id = CifField.ofTokens(sites.auth_asym_id); + const auth_atom_id = CifField.ofTokens(sites.auth_atom_id); + const auth_comp_id = CifField.ofTokens(sites.auth_comp_id); + const auth_seq_id = CifField.ofTokens(sites.auth_seq_id); return { auth_asym_id, auth_atom_id, auth_comp_id, auth_seq_id, - B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count), - Cartn_x: CifTextField(sites.Cartn_x, sites.count), - Cartn_y: CifTextField(sites.Cartn_y, sites.count), - Cartn_z: CifTextField(sites.Cartn_z, sites.count), - group_PDB: CifTextField(sites.group_PDB, sites.count), - id: CifTextValueField(sites.id), + B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv), + Cartn_x: CifField.ofTokens(sites.Cartn_x), + Cartn_y: CifField.ofTokens(sites.Cartn_y), + Cartn_z: CifField.ofTokens(sites.Cartn_z), + group_PDB: CifField.ofTokens(sites.group_PDB), + id: CifField.ofStrings(sites.id), - label_alt_id: CifTextField(sites.label_alt_id, sites.count), + label_alt_id: CifField.ofTokens(sites.label_alt_id), label_asym_id: auth_asym_id, label_atom_id: auth_atom_id, label_comp_id: auth_comp_id, label_seq_id: auth_seq_id, - label_entity_id: CifTextValueField(sites.label_entity_id), + label_entity_id: CifField.ofStrings(sites.label_entity_id), - occupancy: CifTextField(sites.occupancy, sites.count), - type_symbol: CifTextField(sites.type_symbol, sites.count), + occupancy: CifField.ofTokens(sites.occupancy), + type_symbol: CifField.ofTokens(sites.type_symbol), - pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count), - pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num) + pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code), + pdbx_PDB_model_num: CifField.ofStrings(sites.pdbx_PDB_model_num) }; } @@ -208,8 +208,6 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num sites.index++; } -type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never - async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> { const { lines } = pdb; const { data, indices } = lines;