From f4beba52156f0cc9d5bf7bd2200419ac335b8eaf Mon Sep 17 00:00:00 2001 From: Alexander Rose <alex.rose@rcsb.org> Date: Tue, 10 Sep 2019 16:49:10 -0700 Subject: [PATCH] pdb-parser, aniso records --- .../structure/pdb/anisotropic.ts | 146 ++++++++++++++++++ src/mol-model-formats/structure/pdb/to-cif.ts | 16 +- 2 files changed, 158 insertions(+), 4 deletions(-) create mode 100644 src/mol-model-formats/structure/pdb/anisotropic.ts diff --git a/src/mol-model-formats/structure/pdb/anisotropic.ts b/src/mol-model-formats/structure/pdb/anisotropic.ts new file mode 100644 index 000000000..869d59209 --- /dev/null +++ b/src/mol-model-formats/structure/pdb/anisotropic.ts @@ -0,0 +1,146 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { CifField } from '../../../mol-io/reader/cif'; +import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif'; +import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer'; +import { guessElementSymbolTokens } from '../util'; +import { parseIntSkipLeadingWhitespace as fastParseInt } from '../../../mol-io/reader/common/text/number-parser'; + +type AnisotropicTemplate = typeof getAnisotropicTemplate extends (...args: any) => infer T ? T : never +export function getAnisotropicTemplate(data: string, count: number) { + const str = () => [] as string[]; + const float = () => new Float32Array(count); + const ts = () => TokenBuilder.create(data, 2 * count); + return { + index: 0, + count, + id: str(), + type_symbol: ts(), + pdbx_label_atom_id: ts(), + pdbx_label_alt_id: ts(), + pdbx_label_comp_id: ts(), + pdbx_label_asym_id: ts(), + pdbx_label_seq_id: ts(), + pdbx_PDB_ins_code: ts(), + 'U[1][1]': float(), + 'U[2][2]': float(), + 'U[3][3]': float(), + 'U[1][2]': float(), + 'U[1][3]': float(), + 'U[2][3]': float(), + pdbx_auth_seq_id: ts(), + pdbx_auth_comp_id: ts(), + pdbx_auth_asym_id: ts(), + pdbx_auth_atom_id: ts(), + }; +} + +export function getAnisotropic(sites: AnisotropicTemplate): { [K in keyof mmCIF_Schema['atom_site_anisotrop']]?: CifField } { + const pdbx_auth_seq_id = CifField.ofTokens(sites.pdbx_auth_seq_id); + const pdbx_auth_comp_id = CifField.ofTokens(sites.pdbx_auth_comp_id); + const pdbx_auth_asym_id = CifField.ofTokens(sites.pdbx_auth_asym_id); + const pdbx_auth_atom_id = CifField.ofTokens(sites.pdbx_auth_atom_id); + + const fields: { [K in keyof mmCIF_Schema['atom_site_anisotrop']]?: CifField } = { + id: CifField.ofStrings(sites.id), + type_symbol: CifField.ofTokens(sites.type_symbol), + pdbx_label_atom_id: pdbx_auth_atom_id, + pdbx_label_alt_id: CifField.ofTokens(sites.pdbx_label_alt_id), + pdbx_label_comp_id: pdbx_auth_comp_id, + pdbx_label_asym_id: pdbx_auth_asym_id, + pdbx_label_seq_id: pdbx_auth_seq_id, + pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code), + + pdbx_auth_seq_id, + pdbx_auth_comp_id, + pdbx_auth_asym_id, + pdbx_auth_atom_id, + }; + + (fields as any)['U[1][1]'] = CifField.ofNumbers(sites['U[1][1]']); + (fields as any)['U[2][2]'] = CifField.ofNumbers(sites['U[2][2]']); + (fields as any)['U[3][3]'] = CifField.ofNumbers(sites['U[3][3]']); + (fields as any)['U[1][2]'] = CifField.ofNumbers(sites['U[1][2]']); + (fields as any)['U[1][3]'] = CifField.ofNumbers(sites['U[1][3]']); + (fields as any)['U[2][3]'] = CifField.ofNumbers(sites['U[2][3]']); + + return fields +} + +export function addAnisotropic(sites: AnisotropicTemplate, model: string, data: Tokenizer, s: number, e: number) { + const { data: str } = data; + const length = e - s; + + // COLUMNS DATA TYPE FIELD DEFINITION + // ----------------------------------------------------------------- + // 1 - 6 Record name "ANISOU" + // 7 - 11 Integer serial Atom serial number. + Tokenizer.trim(data, s + 6, s + 11); + sites.id[sites.index] = str.substring(data.tokenStart, data.tokenEnd); + + // 13 - 16 Atom name Atom name. + TokenBuilder.addToken(sites.pdbx_auth_atom_id, Tokenizer.trim(data, s + 12, s + 16)); + + // 17 Character altLoc Alternate location indicator + if (str.charCodeAt(s + 16) === 32) { // ' ' + TokenBuilder.add(sites.pdbx_label_alt_id, 0, 0); + } else { + TokenBuilder.add(sites.pdbx_label_alt_id, s + 16, s + 17); + } + + // 18 - 20 Residue name resName Residue name. + TokenBuilder.addToken(sites.pdbx_auth_comp_id, Tokenizer.trim(data, s + 17, s + 20)); + + // 22 Character chainID Chain identifier. + TokenBuilder.add(sites.pdbx_auth_asym_id, s + 21, s + 22); + + // 23 - 26 Integer resSeq Residue sequence number. + TokenBuilder.addToken(sites.pdbx_auth_seq_id, Tokenizer.trim(data, s + 22, s + 26)); + + // 27 AChar iCode Insertion code. + if (str.charCodeAt(s + 26) === 32) { // ' ' + TokenBuilder.add(sites.pdbx_PDB_ins_code, 0, 0); + } else { + TokenBuilder.add(sites.pdbx_PDB_ins_code, s + 26, s + 27); + } + + // 29 - 35 Integer u[0][0] U(1,1) + sites['U[1][1]'][sites.index] = fastParseInt(str, s + 28, s + 35) / 10000 + + // 36 - 42 Integer u[1][1] U(2,2) + sites['U[2][2]'][sites.index] = fastParseInt(str, s + 35, s + 42) / 10000 + + // 43 - 49 Integer u[2][2] U(3,3) + sites['U[3][3]'][sites.index] = fastParseInt(str, s + 42, s + 49) / 10000 + + // 50 - 56 Integer u[0][1] U(1,2) + sites['U[1][2]'][sites.index] = fastParseInt(str, s + 49, s + 56) / 10000 + + // 57 - 63 Integer u[0][2] U(1,3) + sites['U[1][3]'][sites.index] = fastParseInt(str, s + 56, s + 63) / 10000 + + // 64 - 70 Integer u[1][2] U(2,3) + sites['U[2][3]'][sites.index] = fastParseInt(str, s + 63, s + 70) / 10000 + + // 77 - 78 LString(2) element Element symbol, right-justified. + if (length >= 78) { + Tokenizer.trim(data, s + 76, s + 78); + + if (data.tokenStart < data.tokenEnd) { + TokenBuilder.addToken(sites.type_symbol, data); + } else { + guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16) + } + } else { + guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16) + } + + // 79 - 80 LString(2) charge Charge on the atom. + // TODO + + sites.index++; +} \ No newline at end of file diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 9ef1223cd..cf967d435 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -17,6 +17,7 @@ import { EntityBuilder } from '../common/entity'; import { Column } from '../../../mol-data/db'; import { getMoleculeType } from '../../../mol-model/structure/model/types'; import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site'; +import { addAnisotropic, getAnisotropicTemplate, getAnisotropic } from './anisotropic'; export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { const { lines } = pdb; @@ -25,11 +26,13 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { // Count the atoms let atomCount = 0; + let anisotropicCount = 0; for (let i = 0, _i = lines.count; i < _i; i++) { const s = indices[2 * i], e = indices[2 * i + 1]; switch (data[s]) { case 'A': if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++; + else if (substringStartsWith(data, s, e, 'ANISOU')) anisotropicCount++; break; case 'H': if (substringStartsWith(data, s, e, 'HETATM')) atomCount++; @@ -38,6 +41,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { } const atomSite = getAtomSiteTemplate(data, atomCount); + const anisotropic = getAnisotropicTemplate(data, anisotropicCount); const entityBuilder = new EntityBuilder(); const helperCategories: CifCategory[] = []; const heteroNames: [string, string][] = []; @@ -48,9 +52,12 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { let s = indices[2 * i], e = indices[2 * i + 1]; switch (data[s]) { case 'A': - if (!substringStartsWith(data, s, e, 'ATOM ')) continue; - if (!modelNum) { modelNum++; modelStr = '' + modelNum; } - addAtom(atomSite, modelStr, tokenizer, s, e); + if (substringStartsWith(data, s, e, 'ATOM ')) { + if (!modelNum) { modelNum++; modelStr = '' + modelNum; } + addAtom(atomSite, modelStr, tokenizer, s, e); + } else if (substringStartsWith(data, s, e, 'ANISOU')) { + addAnisotropic(anisotropic, modelStr, tokenizer, s, e) + } break; case 'C': if (substringStartsWith(data, s, e, 'CRYST1')) { @@ -157,7 +164,8 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { const categories = { entity: entityBuilder.getEntityCategory(), chem_comp: componentBuilder.getChemCompCategory(), - atom_site: CifCategory.ofFields('atom_site', getAtomSite(atomSite)) + atom_site: CifCategory.ofFields('atom_site', getAtomSite(atomSite)), + atom_site_anisotrop: CifCategory.ofFields('atom_site_anisotrop', getAnisotropic(anisotropic)) } as any; for (const c of helperCategories) { -- GitLab