Skip to content
Snippets Groups Projects
Commit 65f1ba36 authored by David Sehnal's avatar David Sehnal
Browse files

mol-io: refactored CifField

parent 3b3de4fb
No related branches found
No related tags found
No related merge requests found
......@@ -5,10 +5,12 @@
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Column } from 'mol-data/db'
import { Column, ColumnHelpers } from 'mol-data/db'
import { Tensor } from 'mol-math/linear-algebra'
import { getNumberType, NumberType } from '../common/text/number-parser';
import { getNumberType, NumberType, parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser';
import { Encoding } from '../../common/binary-cif';
import { Tokens } from '../common/text/tokenizer';
import { areValuesEqualProvider } from '../common/text/column/token';
export interface CifFile {
readonly name?: string,
......@@ -81,6 +83,104 @@ export interface CifField {
toFloatArray(params?: Column.ToArrayParams<number>): ReadonlyArray<number>
}
export namespace CifField {
export function ofStrings(values: string[]): CifField {
const rowCount = values.length;
const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; };
const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; };
const float: CifField['float'] = row => { const v = values[row]; return fastParseFloat(v, 0, v.length) || 0; };
const valueKind: CifField['valueKind'] = row => {
const v = values[row], l = v.length;
if (l > 1) return Column.ValueKind.Present;
if (l === 0) return Column.ValueKind.NotPresent;
const c = v.charCodeAt(0);
if (c === 46 /* . */) return Column.ValueKind.NotPresent;
if (c === 63 /* ? */) return Column.ValueKind.Unknown;
return Column.ValueKind.Present;
};
return {
__array: void 0,
binaryEncoding: void 0,
isDefined: true,
rowCount,
str,
int,
float,
valueKind,
areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
}
}
export function ofNumbers(values: number[]): CifField {
const rowCount = values.length;
const str: CifField['str'] = row => { return '' + values[row]; };
const float: CifField['float'] = row => values[row];
const valueKind: CifField['valueKind'] = row => Column.ValueKind.Present;
return {
__array: void 0,
binaryEncoding: void 0,
isDefined: true,
rowCount,
str,
int: float,
float,
valueKind,
areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params),
toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
}
}
export function ofTokens(tokens: Tokens): CifField {
const { data, indices, count: rowCount } = tokens;
const str: CifField['str'] = row => {
const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
if (ret === '.' || ret === '?') return '';
return ret;
};
const int: CifField['int'] = row => {
return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
};
const float: CifField['float'] = row => {
return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
};
const valueKind: CifField['valueKind'] = row => {
const s = indices[2 * row], l = indices[2 * row + 1] - s;
if (l > 1) return Column.ValueKind.Present;
if (l === 0) return Column.ValueKind.NotPresent;
const v = data.charCodeAt(s);
if (v === 46 /* . */) return Column.ValueKind.NotPresent;
if (v === 63 /* ? */) return Column.ValueKind.Unknown;
return Column.ValueKind.Present;
};
return {
__array: void 0,
binaryEncoding: void 0,
isDefined: true,
rowCount,
str,
int,
float,
valueKind,
areValuesEqual: areValuesEqualProvider(tokens),
toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
}
}
}
export function getTensor(category: CifCategory, field: string, space: Tensor.Space, row: number, zeroIndexed: boolean): Tensor.Data {
const ret = space.create();
const offset = zeroIndexed ? 0 : 1;
......
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Column, ColumnHelpers } from 'mol-data/db'
import * as TokenColumn from '../../common/text/column/token'
import { Tokens } from '../../common/text/tokenizer'
import * as Data from '../data-model'
import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../../common/text/number-parser'
export default function CifTextField(tokens: Tokens, rowCount: number): Data.CifField {
const { data, indices } = tokens;
const str: Data.CifField['str'] = row => {
const ret = data.substring(indices[2 * row], indices[2 * row + 1]);
if (ret === '.' || ret === '?') return '';
return ret;
};
const int: Data.CifField['int'] = row => {
return fastParseInt(data, indices[2 * row], indices[2 * row + 1]) || 0;
};
const float: Data.CifField['float'] = row => {
return fastParseFloat(data, indices[2 * row], indices[2 * row + 1]) || 0;
};
const valueKind: Data.CifField['valueKind'] = row => {
const s = indices[2 * row], l = indices[2 * row + 1] - s;
if (l > 1) return Column.ValueKind.Present;
if (l === 0) return Column.ValueKind.NotPresent;
const v = data.charCodeAt(s);
if (v === 46 /* . */) return Column.ValueKind.NotPresent;
if (v === 63 /* ? */) return Column.ValueKind.Unknown;
return Column.ValueKind.Present;
};
return {
__array: void 0,
binaryEncoding: void 0,
isDefined: true,
rowCount,
str,
int,
float,
valueKind,
areValuesEqual: TokenColumn.areValuesEqualProvider(tokens),
toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
}
}
export function CifTextValueField(values: string[]): Data.CifField {
const rowCount = values.length;
const str: Data.CifField['str'] = row => {
const ret = values[row];
if (!ret || ret === '.' || ret === '?') return '';
return ret;
};
const int: Data.CifField['int'] = row => {
const v = values[row];
return fastParseInt(v, 0, v.length) || 0;
};
const float: Data.CifField['float'] = row => {
const v = values[row];
return fastParseFloat(v, 0, v.length) || 0;
};
const valueKind: Data.CifField['valueKind'] = row => {
const v = values[row], l = v.length;
if (l > 1) return Column.ValueKind.Present;
if (l === 0) return Column.ValueKind.NotPresent;
const c = v.charCodeAt(0);
if (c === 46 /* . */) return Column.ValueKind.NotPresent;
if (c === 63 /* ? */) return Column.ValueKind.Unknown;
return Column.ValueKind.Present;
};
return {
__array: void 0,
binaryEncoding: void 0,
isDefined: true,
rowCount,
str,
int,
float,
valueKind,
areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
toStringArray: params => ColumnHelpers.createAndFillArray(rowCount, str, params),
toIntArray: params => ColumnHelpers.createAndFillArray(rowCount, int, params),
toFloatArray: params => ColumnHelpers.createAndFillArray(rowCount, float, params)
}
}
\ No newline at end of file
......@@ -23,7 +23,6 @@
*/
import * as Data from '../data-model'
import Field from './field'
import { Tokens, TokenBuilder } from '../../common/text/tokenizer'
import { ReaderResult as Result } from '../../result'
import { Task, RuntimeContext, chunkedSubtask } from 'mol-task'
......@@ -445,7 +444,7 @@ function handleSingle(tokenizer: TokenizerState, ctx: FrameContext): CifCategory
errorMessage: 'Expected value.'
}
}
fields[fieldName] = Field({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 }, 1);
fields[fieldName] = Data.CifField.ofTokens({ data: tokenizer.data, indices: [tokenizer.tokenStart, tokenizer.tokenEnd], count: 1 });
fieldNames[fieldNames.length] = fieldName;
moveNext(tokenizer);
}
......@@ -529,7 +528,7 @@ async function handleLoop(tokenizer: TokenizerState, ctx: FrameContext): Promise
const rowCount = (state.tokenCount / fieldCount) | 0;
const fields = Object.create(null);
for (let i = 0; i < fieldCount; i++) {
fields[fieldNames[i]] = Field(tokens[i], rowCount);
fields[fieldNames[i]] = Data.CifField.ofTokens(tokens[i]);
}
const catName = name.substr(1);
......
......@@ -6,7 +6,6 @@
import { CifField, CifCategory } from '../cif';
import { mmCIF_Schema } from '../cif/schema/mmcif';
import CifTextField, { CifTextValueField } from '../cif/text/field';
import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
import { PdbFile } from './schema';
import { CifFile } from '../cif/data-model';
......@@ -26,13 +25,14 @@ function toCategory(name: string, fields: { [name: string]: CifField | undefined
function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
return {
id: CifTextValueField(['1', '2', '3']),
type: CifTextValueField(['polymer', 'non-polymer', 'water'])
id: CifField.ofStrings(['1', '2', '3']),
type: CifField.ofStrings(['polymer', 'non-polymer', 'water'])
}
}
type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
function atom_site_template(data: string, count: number) {
const str = () => new Array(count) as string[];
const str = () => [] as string[];
const ts = () => TokenBuilder.create(data, 2 * count);
return {
index: 0,
......@@ -57,36 +57,36 @@ function atom_site_template(data: string, count: number) {
}
function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
return {
auth_asym_id,
auth_atom_id,
auth_comp_id,
auth_seq_id,
B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
Cartn_x: CifTextField(sites.Cartn_x, sites.count),
Cartn_y: CifTextField(sites.Cartn_y, sites.count),
Cartn_z: CifTextField(sites.Cartn_z, sites.count),
group_PDB: CifTextField(sites.group_PDB, sites.count),
id: CifTextValueField(sites.id),
B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
Cartn_x: CifField.ofTokens(sites.Cartn_x),
Cartn_y: CifField.ofTokens(sites.Cartn_y),
Cartn_z: CifField.ofTokens(sites.Cartn_z),
group_PDB: CifField.ofTokens(sites.group_PDB),
id: CifField.ofStrings(sites.id),
label_alt_id: CifTextField(sites.label_alt_id, sites.count),
label_alt_id: CifField.ofTokens(sites.label_alt_id),
label_asym_id: auth_asym_id,
label_atom_id: auth_atom_id,
label_comp_id: auth_comp_id,
label_seq_id: auth_seq_id,
label_entity_id: CifTextValueField(sites.label_entity_id),
label_entity_id: CifField.ofStrings(sites.label_entity_id),
occupancy: CifTextField(sites.occupancy, sites.count),
type_symbol: CifTextField(sites.type_symbol, sites.count),
occupancy: CifField.ofTokens(sites.occupancy),
type_symbol: CifField.ofTokens(sites.type_symbol),
pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
pdbx_PDB_model_num: CifField.ofStrings(sites.pdbx_PDB_model_num)
};
}
......@@ -208,8 +208,6 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num
sites.index++;
}
type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
async function pdbToMmCIF(pdb: PdbFile): Promise<CifFile> {
const { lines } = pdb;
const { data, indices } = lines;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment