diff --git a/package-lock.json b/package-lock.json index f7881ac23d0b275fb563287df2009a9b76f9162e..a4bb8c600a9ae60dae8eb48aea6d5b68ee67aee5 100644 Binary files a/package-lock.json and b/package-lock.json differ diff --git a/src/mol-model-formats/structure/pdb/header.ts b/src/mol-model-formats/structure/pdb/header.ts new file mode 100644 index 0000000000000000000000000000000000000000..393f83c0dbb78bbce7dce10aabdbee1035e512c0 --- /dev/null +++ b/src/mol-model-formats/structure/pdb/header.ts @@ -0,0 +1,26 @@ +export type PdbHeaderData = { + id_code?: string, + dep_date?: string, + classification?: string +}; + +export function addHeader(data: string, s: number, e: number, header: PdbHeaderData) { + + // COLUMNS DATA TYPE FIELD DEFINITION + // ------------------------------------------------------------------------------------ + // 1 - 6 Record name "HEADER" + // 11 - 50 String(40) classification Classifies the molecule(s). + // 51 - 59 Date depDate Deposition date. This is the date the + // coordinates were received at the PDB. + // 63 - 66 IDcode idCode This identifier is unique within the PDB. + + // PDB to PDBx/mmCIF Data Item Correspondences + // classification _struct_keywords.pdbx_keywords + // depDate _pdbx_database_status.recvd_initial_deposition_date + // idCode _entry.id + + const line = data.substring(s, e); + header.id_code = line.substring(62, 66); + header.dep_date = line.substring(50, 59); + header.classification = line.substring(10, 50).trim(); +} \ No newline at end of file diff --git a/src/mol-model-formats/structure/pdb/to-cif.ts b/src/mol-model-formats/structure/pdb/to-cif.ts index 792d5a136d2c45388eba6ca8612803c890c7d32c..00539b4746c503f27432061e70bc9f1b706b472e 100644 --- a/src/mol-model-formats/structure/pdb/to-cif.ts +++ b/src/mol-model-formats/structure/pdb/to-cif.ts @@ -6,7 +6,7 @@ */ import { substringStartsWith } from '../../../mol-util/string'; -import { CifCategory, CifFrame } from '../../../mol-io/reader/cif'; +import { CifCategory, CifField, CifFrame } from '../../../mol-io/reader/cif'; import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer'; import { PdbFile } from '../../../mol-io/reader/pdb/schema'; import { parseCryst1, parseRemark350, parseMtrix } from './assembly'; @@ -20,6 +20,8 @@ import { getAtomSiteTemplate, addAtom, getAtomSite } from './atom-site'; import { addAnisotropic, getAnisotropicTemplate, getAnisotropic } from './anisotropic'; import { parseConect } from './conect'; import { isDebugMode } from '../../../mol-util/debug'; +import { PdbHeaderData, addHeader } from './header'; +import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif'; export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { const { lines } = pdb; @@ -42,7 +44,7 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { break; } } - + const header: PdbHeaderData = {}; const atomSite = getAtomSiteTemplate(data, atomCount); const anisotropic = getAnisotropicTemplate(data, anisotropicCount); const entityBuilder = new EntityBuilder(); @@ -94,7 +96,9 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { } break; case 'H': - if (substringStartsWith(data, s, e, 'HETATM')) { + if (substringStartsWith(data, s, e, 'HEADER')) { + addHeader(data, s, e, header); + } else if (substringStartsWith(data, s, e, 'HETATM')) { if (!modelNum) { modelNum++; modelStr = '' + modelNum; } addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt); } else if (substringStartsWith(data, s, e, 'HELIX')) { @@ -169,6 +173,26 @@ export async function pdbToMmCif(pdb: PdbFile): Promise<CifFrame> { } } + // build entry, struct_keywords and pdbx_database_status + if (header.id_code) { + const entry: CifCategory.SomeFields<mmCIF_Schema['entry']> = { + id: CifField.ofString(header.id_code) + }; + helperCategories.push(CifCategory.ofFields('entry', entry)); + } + if (header.classification) { + const struct_keywords: CifCategory.SomeFields<mmCIF_Schema['struct_keywords']> = { + pdbx_keywords: CifField.ofString(header.classification) + }; + helperCategories.push(CifCategory.ofFields('struct_keywords', struct_keywords)); + } + if (header.dep_date) { + const pdbx_database_status: CifCategory.SomeFields<mmCIF_Schema['pdbx_database_status']> = { + recvd_initial_deposition_date: CifField.ofString(header.dep_date) + }; + helperCategories.push(CifCategory.ofFields('pdbx_database_status', pdbx_database_status)); + } + // build entity and chem_comp categories const seqIds = Column.ofIntTokens(atomSite.auth_seq_id); const atomIds = Column.ofStringTokens(atomSite.auth_atom_id);