diff --git a/src/mol-io/reader/cif/data-model.ts b/src/mol-io/reader/cif/data-model.ts index c5778c7b55a847616c5c36ff57dcf300f9af1926..db35b9419e9e38c74f50ad7ed7c9be6b9cabcb51 100644 --- a/src/mol-io/reader/cif/data-model.ts +++ b/src/mol-io/reader/cif/data-model.ts @@ -101,7 +101,7 @@ export namespace CifField { return ofStrings([value]); } - export function ofStrings(values: string[]): CifField { + export function ofStrings(values: ArrayLike<string>): CifField { const rowCount = values.length; const str: CifField['str'] = row => { const ret = values[row]; if (!ret || ret === '.' || ret === '?') return ''; return ret; }; const int: CifField['int'] = row => { const v = values[row]; return fastParseInt(v, 0, v.length) || 0; }; diff --git a/src/mol-model-formats/structure/_spec/pdb.spec.ts b/src/mol-model-formats/structure/_spec/pdb.spec.ts index 8e365ed4c9c8f2fc1b01e447e32e5314eeb6bd45..90a453277857d128415367de3ab0b63113f1a9a4 100644 --- a/src/mol-model-formats/structure/_spec/pdb.spec.ts +++ b/src/mol-model-formats/structure/_spec/pdb.spec.ts @@ -4,8 +4,8 @@ * @author Alexander Rose <alexander.rose@weirdbyte.de> */ -import { guessElementSymbol } from '../pdb/to-cif'; import { TokenBuilder } from 'mol-io/reader/common/text/tokenizer'; +import { guessElementSymbolTokens } from '../util'; const records = [ ['ATOM 19 HD23 LEU A 1 151.940 143.340 155.670 0.00 0.00', 'H'], @@ -19,7 +19,7 @@ describe('PDB to-cif', () => { for (let i = 0, il = records.length; i < il; ++i) { const [ data, element ] = records[i] const tokens = TokenBuilder.create(data, 2) - guessElementSymbol(tokens, data, 12, 16) + guessElementSymbolTokens(tokens, data, 12, 16) expect(data.substring(tokens.indices[0], tokens.indices[1])).toBe(element) } }); diff --git a/src/mol-model-formats/structure/gro.ts b/src/mol-model-formats/structure/gro.ts index bd75839a3008eba2d73005d55155c0f40d210e08..2b7d95817cd4c73a9b0d9ce797cdd2e7910f7af5 100644 --- a/src/mol-model-formats/structure/gro.ts +++ b/src/mol-model-formats/structure/gro.ts @@ -12,6 +12,7 @@ import { GroFile, GroAtoms } from 'mol-io/reader/gro/schema'; import { CifCategory, CifField } from 'mol-io/reader/cif'; import { Column } from 'mol-data/db'; import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; +import { guessElementSymbolString } from './util'; // TODO multi model files // TODO seperate chains @@ -37,7 +38,7 @@ function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?: auth_comp_id, auth_seq_id, B_iso_or_equiv: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.float)), - Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)), + Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)), Cartn_y: CifField.ofNumbers(Column.mapToArray(atoms.y, y => y * 10, Float32Array)), Cartn_z: CifField.ofNumbers(Column.mapToArray(atoms.z, z => z * 10, Float32Array)), group_PDB: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), @@ -52,7 +53,8 @@ function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?: label_entity_id: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), occupancy: CifField.ofColumn(Column.ofConst(1, atoms.count, Column.Schema.float)), - type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), + type_symbol: CifField.ofStrings(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))), + // type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), pdbx_PDB_ins_code: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), pdbx_PDB_model_num: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 0f699f297a4c3f03095cd1514bfb8d7db7c129b8..d280d1d371d8aa7c87410dfc9da5c884a517c4c9 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -8,11 +8,12 @@ import { substringStartsWith } from 'mol-util/string'; import { CifField, CifCategory, CifFrame } from 'mol-io/reader/cif'; import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; -import { TokenBuilder, Tokenizer, Tokens } from 'mol-io/reader/common/text/tokenizer'; +import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer'; import { PdbFile } from 'mol-io/reader/pdb/schema'; import { parseCryst1, parseRemark350, parseMtrix } from './assembly'; import { WaterNames } from 'mol-model/structure/model/types'; import { parseHelix, parseSheet } from './secondary-structure'; +import { guessElementSymbolTokens } from '../util'; function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { return { @@ -89,43 +90,6 @@ function getEntityId(residueName: string, isHet: boolean) { return '1'; } -export function guessElementSymbol(tokens: Tokens, str: string, start: number, end: number) { - let s = start, e = end - 1 - - // trim spaces and numbers - let c = str.charCodeAt(s) - while ((c === 32 || (c >= 48 && c <= 57)) && s <= e) c = str.charCodeAt(++s) - c = str.charCodeAt(e) - while ((c === 32 || (c >= 48 && c <= 57)) && e >= s) c = str.charCodeAt(--e) - - ++e - - if (s === e) return TokenBuilder.add(tokens, s, e) // empty - if (s + 1 === e) return TokenBuilder.add(tokens, s, e) // one char - - c = str.charCodeAt(s) - - if (s + 2 === e) { // two chars - const c2 = str.charCodeAt(s + 1) - if ( - ((c === 78 || c === 110) && (c2 === 65 || c2 === 97)) || // NA na Na nA - ((c === 67 || c === 99) && (c2 === 76 || c2 === 108)) || // CL - ((c === 70 || c === 102) && (c2 === 69 || c2 === 101)) // FE - ) return TokenBuilder.add(tokens, s, s + 2) - } - - if ( - c === 67 || c === 99 || // C c - c === 72 || c === 104 || // H h - c === 78 || c === 110 || // N n - c === 79 || c === 111 || // O o - c === 80 || c === 112 || // P p - c === 83 || c === 115 // S s - ) return TokenBuilder.add(tokens, s, s + 1) - - TokenBuilder.add(tokens, s, s) // no reasonable guess, add empty token -} - function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number, isHet: boolean) { const { data: str } = data; const length = e - s; @@ -199,10 +163,10 @@ function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: num if (data.tokenStart < data.tokenEnd) { TokenBuilder.addToken(sites.type_symbol, data); } else { - guessElementSymbol(sites.type_symbol, str, s + 12, s + 16) + guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16) } } else { - guessElementSymbol(sites.type_symbol, str, s + 12, s + 16) + guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16) } sites.label_entity_id[sites.index] = getEntityId(residueName, isHet); diff --git a/src/mol-model-formats/structure/util.ts b/src/mol-model-formats/structure/util.ts new file mode 100644 index 0000000000000000000000000000000000000000..3fc4b147cd52913d580339b6e5d41ca3e3dcec04 --- /dev/null +++ b/src/mol-model-formats/structure/util.ts @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { TokenBuilder, Tokens } from 'mol-io/reader/common/text/tokenizer'; + +export function guessElementSymbolTokens(tokens: Tokens, str: string, start: number, end: number) { + let s = start, e = end - 1 + + // trim spaces and numbers + let c = str.charCodeAt(s) + while ((c === 32 || (c >= 48 && c <= 57)) && s <= e) c = str.charCodeAt(++s) + c = str.charCodeAt(e) + while ((c === 32 || (c >= 48 && c <= 57)) && e >= s) c = str.charCodeAt(--e) + + ++e + + if (s === e) return TokenBuilder.add(tokens, s, e) // empty + if (s + 1 === e) return TokenBuilder.add(tokens, s, e) // one char + + c = str.charCodeAt(s) + + if (s + 2 === e) { // two chars + const c2 = str.charCodeAt(s + 1) + if ( + ((c === 78 || c === 110) && (c2 === 65 || c2 === 97)) || // NA na Na nA + ((c === 67 || c === 99) && (c2 === 76 || c2 === 108)) || // CL + ((c === 70 || c === 102) && (c2 === 69 || c2 === 101)) // FE + ) return TokenBuilder.add(tokens, s, s + 2) + } + + if ( + c === 67 || c === 99 || // C c + c === 72 || c === 104 || // H h + c === 78 || c === 110 || // N n + c === 79 || c === 111 || // O o + c === 80 || c === 112 || // P p + c === 83 || c === 115 // S s + ) return TokenBuilder.add(tokens, s, s + 1) + + TokenBuilder.add(tokens, s, s) // no reasonable guess, add empty token +} + +export function guessElementSymbolString(str: string) { + // trim spaces and numbers, convert to upper case + str = str.trim().toUpperCase() + const l = str.length + + if (l === 0) return str // empty + if (l === 1) return str // one char + + if (l === 2) { // two chars + if (str === 'NA' || str === 'CL' || str === 'FE') return str + } + + const c = str[0] + if (c === 'C' || c === 'H' || c === 'N' || c === 'O' || c === 'P' || c === 'S') return c + + return '' // no reasonable guess, return empty string +} \ No newline at end of file