Skip to content
Snippets Groups Projects
Commit d0b5f448 authored by David Sehnal's avatar David Sehnal
Browse files

wip added mol-model-parsers module

parent 65f1ba36
No related branches found
No related tags found
No related merge requests found
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import Format from '../format';
import { Model } from '../model';
import { Task } from 'mol-task';
import { PdbFile } from 'mol-io/reader/pdb/schema';
import from_mmCIF from './mmcif';
import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
import { substringStartsWith } from 'mol-util/string';
import { TokenBuilder, Tokenizer } from 'mol-io/reader/common/text/tokenizer';
import { CifField, CifCategory } from 'mol-io/reader/cif';
import CifTextField, { CifTextValueField } from 'mol-io/reader/cif/text/field';
function toCategory(name: string, fields: { [name: string]: CifField | undefined }, rowCount: number): CifCategory {
return {
name,
fieldNames: Object.keys(fields),
rowCount,
getField(f: string) {
return fields[f];
}
}
}
function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } {
return {
id: CifTextValueField(['1', '2', '3']),
type: CifTextValueField(['polymer', 'non-polymer', 'water'])
}
}
function atom_site_template(data: string, count: number) {
const str = () => new Array(count) as string[];
const ts = () => TokenBuilder.create(data, 2 * count);
return {
count,
group_PDB: ts(),
id: str(),
auth_atom_id: ts(),
label_alt_id: ts(),
auth_comp_id: ts(),
auth_asym_id: ts(),
auth_seq_id: ts(),
pdbx_PDB_ins_code: ts(),
Cartn_x: ts(),
Cartn_y: ts(),
Cartn_z: ts(),
occupancy: ts(),
B_iso_or_equiv: ts(),
type_symbol: ts(),
pdbx_PDB_model_num: str(),
label_entity_id: str()
};
}
function _atom_site(sites: AtomSiteTemplate): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } {
const auth_asym_id = CifTextField(sites.auth_asym_id, sites.count);
const auth_atom_id = CifTextField(sites.auth_atom_id, sites.count);
const auth_comp_id = CifTextField(sites.auth_comp_id, sites.count);
const auth_seq_id = CifTextField(sites.auth_seq_id, sites.count);
return {
auth_asym_id,
auth_atom_id,
auth_comp_id,
auth_seq_id,
B_iso_or_equiv: CifTextField(sites.B_iso_or_equiv, sites.count),
Cartn_x: CifTextField(sites.Cartn_x, sites.count),
Cartn_y: CifTextField(sites.Cartn_y, sites.count),
Cartn_z: CifTextField(sites.Cartn_z, sites.count),
group_PDB: CifTextField(sites.group_PDB, sites.count),
id: CifTextValueField(sites.id),
label_alt_id: CifTextField(sites.label_alt_id, sites.count),
label_asym_id: auth_asym_id,
label_atom_id: auth_atom_id,
label_comp_id: auth_comp_id,
label_seq_id: auth_seq_id,
label_entity_id: CifTextValueField(sites.label_entity_id),
occupancy: CifTextField(sites.occupancy, sites.count),
type_symbol: CifTextField(sites.type_symbol, sites.count),
pdbx_PDB_ins_code: CifTextField(sites.pdbx_PDB_ins_code, sites.count),
pdbx_PDB_model_num: CifTextValueField(sites.pdbx_PDB_model_num)
};
}
function addAtom(sites: AtomSiteTemplate, model: string, data: Tokenizer, s: number, e: number) {
const { data: str } = data;
let startPos = s;
let start = s;
const end = e;
const length = end - start;
// TODO: filter invalid atoms
// COLUMNS DATA TYPE CONTENTS
// --------------------------------------------------------------------------------
// 1 - 6 Record name "ATOM "
Tokenizer.trim(data, start, start + 6);
TokenBuilder.add(sites.group_PDB, data.tokenStart, data.tokenEnd);
// 7 - 11 Integer Atom serial number.
// TODO: support HEX
start = startPos + 6;
Tokenizer.trim(data, start, start + 5);
sites.id[sites.id.length] = data.data.substring(data.tokenStart, data.tokenEnd);
// 13 - 16 Atom Atom name.
start = startPos + 12;
Tokenizer.trim(data, start, start + 4);
TokenBuilder.add(sites.auth_atom_id, data.tokenStart, data.tokenEnd);
// 17 Character Alternate location indicator.
if (str.charCodeAt(startPos + 16) === 32) { // ' '
TokenBuilder.add(sites.label_alt_id, 0, 0);
} else {
TokenBuilder.add(sites.label_alt_id, startPos + 16, startPos + 17);
}
// 18 - 20 Residue name Residue name.
start = startPos + 17;
Tokenizer.trim(data, start, start + 3);
TokenBuilder.add(sites.auth_comp_id, data.tokenStart, data.tokenEnd);
// 22 Character Chain identifier.
TokenBuilder.add(sites.auth_asym_id, startPos + 21, startPos + 22);
// 23 - 26 Integer Residue sequence number.
// TODO: support HEX
start = startPos + 22;
Tokenizer.trim(data, start, start + 4);
TokenBuilder.add(sites.auth_seq_id, data.tokenStart, data.tokenEnd);
// 27 AChar Code for insertion of residues.
if (str.charCodeAt(startPos + 26) === 32) { // ' '
TokenBuilder.add(sites.label_alt_id, 0, 0);
} else {
TokenBuilder.add(sites.label_alt_id, startPos + 26, startPos + 27);
}
// 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
start = startPos + 30;
Tokenizer.trim(data, start, start + 8);
TokenBuilder.add(sites.Cartn_x, data.tokenStart, data.tokenEnd);
// 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
start = startPos + 38;
Tokenizer.trim(data, start, start + 8);
TokenBuilder.add(sites.Cartn_y, data.tokenStart, data.tokenEnd);
// 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
start = startPos + 46;
Tokenizer.trim(data, start, start + 8);
TokenBuilder.add(sites.Cartn_z, data.tokenStart, data.tokenEnd);
// 55 - 60 Real(6.2) Occupancy.
start = startPos + 54;
Tokenizer.trim(data, start, start + 6);
TokenBuilder.add(sites.occupancy, data.tokenStart, data.tokenEnd);
// 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
if (length >= 66) {
start = startPos + 60;
Tokenizer.trim(data, start, start + 6);
TokenBuilder.add(sites.B_iso_or_equiv, data.tokenStart, data.tokenEnd);
} else {
TokenBuilder.add(sites.label_alt_id, 0, 0);
}
// 73 - 76 LString(4) Segment identifier, left-justified.
// ignored
// 77 - 78 LString(2) Element symbol, right-justified.
if (length >= 78) {
start = startPos + 76;
Tokenizer.trim(data, start, start + 2);
if (data.tokenStart < data.tokenEnd) {
TokenBuilder.add(sites.type_symbol, data.tokenStart, data.tokenEnd);
} else {
// "guess" the symbol
TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
}
} else {
TokenBuilder.add(sites.type_symbol, startPos + 12, startPos + 13);
}
// TODO
sites.label_entity_id.push('1');
sites.pdbx_PDB_model_num.push(model);
}
type AtomSiteTemplate = typeof atom_site_template extends (...args: any) => infer T ? T : never
async function pdbToMmCIF(pdb: PdbFile): Promise<Format.mmCIF> {
const { lines } = pdb;
const { data, indices } = lines;
const tokenizer = Tokenizer(data);
// Count the atoms
let atomCount = 0;
for (let i = 0, _i = lines.count; i < _i; i++) {
const s = indices[2 * i], e = indices[2 * i + 1];
switch (data[s]) {
case 'A':
if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++;
break;
case 'H':
if (!substringStartsWith(data, s, e, 'HETATM')) atomCount++;
break;
}
}
const atom_site = atom_site_template(data, atomCount);
let modelNum = 0, modelStr = '';
for (let i = 0, _i = lines.count; i < _i; i++) {
const s = indices[2 * i], e = indices[2 * i + 1];
switch (data[s]) {
case 'A':
if (!substringStartsWith(data, s, e, 'ATOM ')) continue;
if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
addAtom(atom_site, modelStr, tokenizer, s, e);
break;
case 'H':
if (!substringStartsWith(data, s, e, 'HETATM')) continue;
if (!modelNum) { modelNum++; modelStr = '' + modelNum; }
addAtom(atom_site, modelStr, tokenizer, s, e);
break;
case 'M':
if (substringStartsWith(data, s, e, 'MODEL ')) {
modelNum++;
modelStr = '' + modelNum;
}
break;
}
}
const categories = {
entity: toCategory('entity', _entity(), 3),
atom_site: toCategory('atom_site', _atom_site(atom_site), atomCount)
}
return Format.mmCIF({
header: pdb.id || 'PDB',
categoryNames: Object.keys(categories),
categories
});
}
function buildModels(format: Format.PDB): Task<ReadonlyArray<Model>> {
return Task.create('Create PDB Model', async ctx => {
await ctx.update('Converting to mmCIF...');
const cif = await pdbToMmCIF(format.data);
return from_mmCIF(cif).runInContext(ctx);
});
}
export default buildModels;
\ No newline at end of file
......@@ -23,6 +23,7 @@
"mol-math": ["./mol-math"],
"mol-model": ["./mol-model"],
"mol-model-props": ["./mol-model-props", "./mol-model-props/index.ts"],
"mol-model-parsers": ["./mol-model-parsers"],
"mol-ql": ["./mol-ql"],
"mol-repr": ["./mol-repr"],
"mol-script": ["./mol-script"],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment