diff --git a/src/mol-model-formats/structure/_spec/pdb.spec.ts b/src/mol-model-formats/structure/_spec/pdb.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..8e365ed4c9c8f2fc1b01e447e32e5314eeb6bd45 --- /dev/null +++ b/src/mol-model-formats/structure/_spec/pdb.spec.ts @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { guessElementSymbol } from '../pdb/to-cif'; +import { TokenBuilder } from 'mol-io/reader/common/text/tokenizer'; + +const records = [ + ['ATOM 19 HD23 LEU A 1 151.940 143.340 155.670 0.00 0.00', 'H'], + ['ATOM 38 CA SER A 3 146.430 138.150 162.270 0.00 0.00', 'C'], + ['ATOM 38 NA SER A 3 146.430 138.150 162.270 0.00 0.00', 'NA'], + ['ATOM 38 NAA SER A 3 146.430 138.150 162.270 0.00 0.00', 'N'], +] + +describe('PDB to-cif', () => { + it('guess-element-symbol', () => { + for (let i = 0, il = records.length; i < il; ++i) { + const [ data, element ] = records[i] + const tokens = TokenBuilder.create(data, 2) + guessElementSymbol(tokens, data, 12, 16) + expect(data.substring(tokens.indices[0], tokens.indices[1])).toBe(element) + } + }); +}); \ No newline at end of file diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 853a1b9319eb97121a5394a55e772dde2621608a..0f699f297a4c3f03095cd1514bfb8d7db7c129b8 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -8,7 +8,7 @@ import { substringStartsWith } from 'mol-util/string'; import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif'; import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; -import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer'; +import { TokenBuilder, Tokenizer, Tokens } from 'mol-io/reader/common/text/tokenizer'; import { PdbFile } from 'mol-io/reader/pdb/schema'; import { parseCryst1, parseRemark350, parseMtrix } from './assembly'; import { WaterNames } from 'mol-model/structure/model/types'; @@ -89,6 +89,43 @@ function getEntityId(residueName: string, isHet: boolean) { return '1'; } +export function guessElementSymbol(tokens: Tokens, str: string, start: number, end: number) { + let s = start, e = end - 1 + + // trim spaces and numbers + let c = str.charCodeAt(s) + while ((c === 32 || (c >= 48 && c <= 57)) && s <= e) c = str.charCodeAt(++s) + c = str.charCodeAt(e) + while ((c === 32 || (c >= 48 && c <= 57)) && e >= s) c = str.charCodeAt(--e) + + ++e + + if (s === e) return TokenBuilder.add(tokens, s, e) // empty + if (s + 1 === e) return TokenBuilder.add(tokens, s, e) // one char + + c = str.charCodeAt(s) + + if (s + 2 === e) { // two chars + const c2 = str.charCodeAt(s + 1) + if ( + ((c === 78 || c === 110) && (c2 === 65 || c2 === 97)) || // NA na Na nA + ((c === 67 || c === 99) && (c2 === 76 || c2 === 108)) || // CL + ((c === 70 || c === 102) && (c2 === 69 || c2 === 101)) // FE + ) return TokenBuilder.add(tokens, s, s + 2) + } + + if ( + c === 67 || c === 99 || // C c + c === 72 || c === 104 || // H h + c === 78 || c === 110 || // N n + c === 79 || c === 111 || // O o + c === 80 || c === 112 || // P p + c === 83 || c === 115 // S s + ) return TokenBuilder.add(tokens, s, s + 1) + + TokenBuilder.add(tokens, s, s) // no reasonable guess, add empty token +} + function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) { const { data: str } = data; const length = e - s; @@ -162,11 +199,10 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num if (data.tokenStart < data.tokenEnd) { TokenBuilder.addToken(sites.type_symbol, data); } else { - // "guess" the symbol - TokenBuilder.add(sites.type_symbol, s + 12, s + 13); + guessElementSymbol(sites.type_symbol, str, s + 12, s + 16) } } else { - TokenBuilder.add(sites.type_symbol, s + 12, s + 13); + guessElementSymbol(sites.type_symbol, str, s + 12, s + 16) } sites.label_entity_id[sites.index] = getEntityId(residueName, isHet); diff --git a/src/mol-model/structure/model/properties/utils/guess-element.ts b/src/mol-model/structure/model/properties/utils/guess-element.ts index 05658249f5aed93f771b66cae471b9a74be22955..54a66a8f99537da61f09ebcdab5c4a30e33237fc 100644 --- a/src/mol-model/structure/model/properties/utils/guess-element.ts +++ b/src/mol-model/structure/model/properties/utils/guess-element.ts @@ -12,7 +12,7 @@ function charAtIsNumber(str: string, index: number) { return code >= 48 && code <= 57 } -export function guessElement (str: string) { +export function guessElement(str: string) { let at = str.trim().toUpperCase() if (charAtIsNumber(at, 0)) at = at.substr(1)