diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index 40f3a11133ac7ad90637e17328778558e0a67400..727daf63ccc5d83b0ce9e6b9dbd59d7e661d3f3a 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -57,6 +57,19 @@ export namespace CifCategory { export function empty(name: string): CifCategory { return { rowCount: 0, name, fieldNames: [], getField(name: string) { return void 0; } }; }; + + export type SomeFields<S> = { [P in keyof S]?: CifField } + export type Fields<S> = { [P in keyof S]: CifField } + + export function ofFields(name: string, fields: { [name: string]: CifField }): CifCategory { + const fieldNames = Object.keys(fields); + return { + rowCount: fieldNames.length > 0 ? fields[fieldNames[0]].rowCount : 0, + name, + fieldNames, + getField(name) { return fields[name]; } + }; + } } /** @@ -84,6 +97,10 @@ export interface CifField { } export namespace CifField { + export function ofString(value: string) { + return ofStrings([value]); + } + export function ofStrings(values: string[]): CifField { const rowCount = values.length; const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; }; diff --git a/src/mol-io/reader/common/text/tokenizer.ts b/src/mol-io/reader/common/text/tokenizer.ts index 8664601d45a214e2a15e934af85a8584d827444c..fce7c9037cc7c9eb546edae1a12fb4d838b06efa 100644 --- a/src/mol-io/reader/common/text/tokenizer.ts +++ b/src/mol-io/reader/common/text/tokenizer.ts @@ -228,6 +228,7 @@ namespace Tokenizer { state.tokenStart = s; state.tokenEnd = e + 1; state.position = end; + return state; } } @@ -265,6 +266,10 @@ export namespace TokenBuilder { tokens.count++; } + export function addToken(tokens: Tokens, tokenizer: Tokenizer) { + add(tokens, tokenizer.tokenStart, tokenizer.tokenEnd); + } + export function addUnchecked(tokens: Tokens, start: number, end: number) { (tokens as Builder).indices[(tokens as Builder).offset++] = start; (tokens as Builder).indices[(tokens as Builder).offset++] = end; diff --git a/src/mol-model-formats/structure/pdb/assembly.ts b/src/mol-model-formats/structure/pdb/assembly.ts new file mode 100644 index 0000000000000000000000000000000000000000..00827dddc59c96c3661c249a2ac18050a1da1a70 --- /dev/null +++ b/src/mol-model-formats/structure/pdb/assembly.ts @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { CifCategory, CifField } from 'mol-io/reader/cif'; +import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; + +export function parseCryst1(id: string, record: string): CifCategory[] { + // COLUMNS DATA TYPE CONTENTS + // -------------------------------------------------------------------------------- + // 1 - 6 Record name "CRYST1" + // 7 - 15 Real(9.3) a (Angstroms) + // 16 - 24 Real(9.3) b (Angstroms) + // 25 - 33 Real(9.3) c (Angstroms) + // 34 - 40 Real(7.2) alpha (degrees) + // 41 - 47 Real(7.2) beta (degrees) + // 48 - 54 Real(7.2) gamma (degrees) + // 56 - 66 LString Space group + // 67 - 70 Integer Z value + + const get = (s: number, l: number) => (record.substr(s, l) || '').trim() + + const cell: CifCategory.Fields<mmCIF_Schema['cell']> = { + entry_id: CifField.ofString(id), + length_a: CifField.ofString(get(6, 9)), + length_b: CifField.ofString(get(15, 9)), + length_c: CifField.ofString(get(24, 9)), + angle_alpha: CifField.ofString(get(33, 7)), + angle_beta: CifField.ofString(get(40, 7)), + angle_gamma: CifField.ofString(get(47, 7)), + Z_PDB: CifField.ofString(get(66, 4)), + pdbx_unique_axis: CifField.ofString('?') + }; + const symmetry: CifCategory.Fields<mmCIF_Schema['symmetry']> = { + entry_id: CifField.ofString(id), + 'space_group_name_H-M': CifField.ofString(get(55, 11)), + Int_Tables_number: CifField.ofString('?'), + cell_setting: CifField.ofString('?'), + space_group_name_Hall: CifField.ofString('?') + } + return [CifCategory.ofFields('cell', cell), CifCategory.ofFields('symmetry', symmetry)]; +} \ No newline at end of file diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 799ab8c9a4ed7b7240a38c1134f9e2d6b30abe48..2f5da2c27103eb8074c3e56f3f4d3b641c2f9c37 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -100,84 +100,63 @@ function getEntityId(residueName: string, isHet: boolean) { function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) { const { data: str } = data; - let startPos = s; - let start = s; - const end = e; - const length = end - start; + const length = e - s; // TODO: filter invalid atoms // COLUMNS DATA TYPE CONTENTS // -------------------------------------------------------------------------------- // 1 - 6 Record name "ATOM " - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6)); // 7 - 11 Integer Atom serial number. // TODO: support HEX - start = startPos + 6; - Tokenizer.trim(data, start, start + 5); + Tokenizer.trim(data, s + 6, s + 11); sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd); // 13 - 16 Atom Atom name. - start = startPos + 12; - Tokenizer.trim(data, start, start + 4); - TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16)); // 17 Character Alternate location indicator. - if (str.charCodeAt(startPos + 16) === 32) { // ' ' + if (str.charCodeAt(s + 16) === 32) { // ' ' TokenBuilder.add(sites.label_alt_id, 0, 0); } else { - TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17); + TokenBuilder.add(sites.label_alt_id, s + 16, s + 17); } // 18 - 20 Residue name Residue name. - start = startPos + 17; - Tokenizer.trim(data, start, start + 3); - TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20)); const residueName = str.substring(data.tokenStart, data.tokenEnd); // 22 Character Chain identifier. - TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22); + TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22); // 23 - 26 Integer Residue sequence number. // TODO: support HEX - start = startPos + 22; - Tokenizer.trim(data, start, start + 4); - TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26)); // 27 AChar Code for insertion of residues. - if (str.charCodeAt(startPos + 26) === 32) { // ' ' + if (str.charCodeAt(s + 26) === 32) { // ' ' TokenBuilder.add(sites.label_alt_id, 0, 0); } else { - TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27); + TokenBuilder.add(sites.label_alt_id, s + 26, s + 27); } // 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms. - start = startPos + 30; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38)); // 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms. - start = startPos + 38; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46)); // 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms. - start = startPos + 46; - Tokenizer.trim(data, start, start + 8); - TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54)); // 55 - 60 Real(6.2) Occupancy. - start = startPos + 54; - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60)); // 61 - 66 Real(6.2) Temperature factor (Default = 0.0). if (length >= 66) { - start = startPos + 60; - Tokenizer.trim(data, start, start + 6); - TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66)); } else { TokenBuilder.add(sites.label_alt_id, 0, 0); } @@ -187,17 +166,16 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num // 77 - 78 LString(2) Element symbol, right-justified. if (length >= 78) { - start = startPos + 76; - Tokenizer.trim(data, start, start + 2); + Tokenizer.trim(data, s + 76, s + 78); if (data.tokenStart < data.tokenEnd) { - TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd); + TokenBuilder.addToken(sites.type_symbol, data); } else { // "guess" the symbol - TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13); + TokenBuilder.add(sites.type_symbol, s + 12, s + 13); } } else { - TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13); + TokenBuilder.add(sites.type_symbol, s + 12, s + 13); } sites.label_entity_id[sites.index] = getEntityId(residueName, isHet);