diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts index 60fbd2a700cc5983cc8d76bb082311a902e871f1..97d6c72c2e0444d9f81cb9aa9627138b12131106 100644 --- a/src/mol-model-formats/structure/mmcif/parser.ts +++ b/src/mol-model-formats/structure/mmcif/parser.ts @@ -374,11 +374,19 @@ function getEntities(format: mmCIF_Format): Entities { } if (assignSubtype) { + const chemCompType = new Map<string, string>() + const { id, type } = format.data.chem_comp; + for (let i = 0, il = format.data.chem_comp._rowCount; i < il; i++) { + chemCompType.set(id.value(i), type.value(i)) + } + const { label_entity_id, label_comp_id } = format.data.atom_site; for (let i = 0 as ElementIndex, il = format.data.atom_site._rowCount; i < il; i++) { const entityId = label_entity_id.value(i); if (!entityIds.has(entityId)) { - subtypes[getEntityIndex(entityId)] = getEntitySubtype(label_comp_id.value(i)) + const compId = label_comp_id.value(i) + const compType = chemCompType.get(compId) || '' + subtypes[getEntityIndex(entityId)] = getEntitySubtype(compId, compType) entityIds.add(entityId) } } diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index 45f4013c3339fe4a257f79e7176a100e89b4e8ca..58b63f4cf515596089de11646a591c61ea3ac2d8 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -113,19 +113,34 @@ export const NucleicBackboneAtoms = new Set([ 'O2*', 'O3*', 'O4*', 'O5*', 'C1*', 'C2*', 'C3*', 'C4*', 'C5*' ]) -/** Chemical component type names for protein */ -export const ProteinComponentTypeNames = new Set([ - 'D-PEPTIDE LINKING', 'L-PEPTIDE LINKING', 'D-PEPTIDE NH3 AMINO TERMINUS', - 'L-PEPTIDE NH3 AMINO TERMINUS', 'D-PEPTIDE COOH CARBOXY TERMINUS', - 'L-PEPTIDE COOH CARBOXY TERMINUS', 'PEPTIDE LINKING', 'PEPTIDE-LIKE', - 'L-GAMMA-PEPTIDE, C-DELTA LINKING', 'D-GAMMA-PEPTIDE, C-DELTA LINKING', - 'L-BETA-PEPTIDE, C-GAMMA LINKING', 'D-BETA-PEPTIDE, C-GAMMA LINKING', +/** Chemical component type names for D-linked protein */ +export const DProteinComponentTypeNames = new Set([ + 'D-PEPTIDE LINKING', 'D-PEPTIDE NH3 AMINO TERMINUS', + 'D-PEPTIDE COOH CARBOXY TERMINUS', 'D-GAMMA-PEPTIDE, C-DELTA LINKING', + 'D-BETA-PEPTIDE, C-GAMMA LINKING' +]) + +/** Chemical component type names for L-linked protein */ +export const LProteinComponentTypeNames = new Set([ + 'L-PEPTIDE LINKING', 'L-PEPTIDE NH3 AMINO TERMINUS', + 'L-PEPTIDE COOH CARBOXY TERMINUS', 'L-GAMMA-PEPTIDE, C-DELTA LINKING', + 'L-BETA-PEPTIDE, C-GAMMA LINKING' +]) + +/** Chemical component type names for pepdite-like protein */ +export const OtherProteinComponentTypeNames = new Set([ + 'PEPTIDE LINKING', 'PEPTIDE-LIKE', ]) +/** Chemical component type names for protein */ +export const ProteinComponentTypeNames = SetUtils.unionMany( + DProteinComponentTypeNames, LProteinComponentTypeNames, OtherProteinComponentTypeNames +) + /** Chemical component type names for DNA */ -export const DNAComponentTypeNames = [ +export const DNAComponentTypeNames = new Set([ 'DNA LINKING', 'L-DNA LINKING', 'DNA OH 5 PRIME TERMINUS', 'DNA OH 3 PRIME TERMINUS', -] +]) /** Chemical component type names for RNA */ export const RNAComponentTypeNames = new Set([ @@ -200,7 +215,7 @@ export function getMoleculeType(compType: string, compId: string) { return MoleculeType.protein } else if (RNAComponentTypeNames.has(compType)) { return MoleculeType.RNA - } else if (DNAComponentTypeNames.includes(compType)) { + } else if (DNAComponentTypeNames.has(compType)) { return MoleculeType.DNA } else if (SaccharideComponentTypeNames.has(compType)) { return MoleculeType.saccharide @@ -243,9 +258,20 @@ export function getEntityType(compId: string): mmCIF_Schema['entity']['type']['T } } -export function getEntitySubtype(compId: string): EntitySubtype { +export function getEntitySubtype(compId: string, compType: string): EntitySubtype { compId = compId.toUpperCase() - if (SaccharideCompIdMap.has(compId)) { + compType = compType.toUpperCase() + if (LProteinComponentTypeNames.has(compType)) { + return 'polypeptide(L)' + } else if (DProteinComponentTypeNames.has(compType)) { + return 'polypeptide(D)' + } else if (RNAComponentTypeNames.has(compType)) { + return 'polyribonucleotide' + } else if (DNAComponentTypeNames.has(compType)) { + return 'polydeoxyribonucleotide' + } else if (SaccharideComponentTypeNames.has(compType)) { + return 'oligosaccharide' + } else if (SaccharideCompIdMap.has(compId)) { return 'oligosaccharide' } else if (PeptideBaseNames.has(compId)) { return 'peptide nucleic acid'