Skip to content
Snippets Groups Projects
Commit c57311d6 authored by Alexander Rose's avatar Alexander Rose
Browse files

carbohydrate improvements, updated carb table

parent 4d786dc6
No related branches found
No related tags found
No related merge requests found
...@@ -144,10 +144,13 @@ function getChemicalComponentMap(format: mmCIF_Format): Model['properties']['che ...@@ -144,10 +144,13 @@ function getChemicalComponentMap(format: mmCIF_Format): Model['properties']['che
function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap { function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap {
const map = new Map<string, SaccharideComponent>(); const map = new Map<string, SaccharideComponent>();
const { pdbx_chem_comp_identifier } = format.data
if (pdbx_chem_comp_identifier._rowCount > 0) { if (format.data.pdbx_chem_comp_identifier._rowCount > 0) {
const { comp_id, type, identifier } = pdbx_chem_comp_identifier // note that `pdbx_chem_comp_identifier` does not contain
for (let i = 0, il = pdbx_chem_comp_identifier._rowCount; i < il; ++i) { // a 'SNFG CARB SYMBOL' entry for 'Unknown' saccharide components
// so we always need to check `chem_comp` for those
const { comp_id, type, identifier } = format.data.pdbx_chem_comp_identifier
for (let i = 0, il = comp_id.rowCount; i < il; ++i) {
if (type.value(i) === 'SNFG CARB SYMBOL') { if (type.value(i) === 'SNFG CARB SYMBOL') {
const snfgName = identifier.value(i) const snfgName = identifier.value(i)
const saccharideComp = SaccharidesSnfgMap.get(snfgName) const saccharideComp = SaccharidesSnfgMap.get(snfgName)
...@@ -158,21 +161,24 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap ...@@ -158,21 +161,24 @@ function getSaccharideComponentMap(format: mmCIF_Format): SaccharideComponentMap
} }
} }
} }
} else if (format.data.chem_comp._rowCount > 0) { }
if (format.data.chem_comp._rowCount > 0) {
const { id, type } = format.data.chem_comp const { id, type } = format.data.chem_comp
for (let i = 0, il = id.rowCount; i < il; ++i) { for (let i = 0, il = id.rowCount; i < il; ++i) {
const _id = id.value(i) const _id = id.value(i)
if (map.has(_id)) continue
const _type = type.value(i) const _type = type.value(i)
if (SaccharideCompIdMap.has(_id)) { if (SaccharideCompIdMap.has(_id)) {
map.set(_id, SaccharideCompIdMap.get(_id)!) map.set(_id, SaccharideCompIdMap.get(_id)!)
} else if (!map.has(_id) && getMoleculeType(_type, _id) === MoleculeType.saccharide) { } else if (getMoleculeType(_type, _id) === MoleculeType.saccharide) {
map.set(_id, UnknownSaccharideComponent) map.set(_id, UnknownSaccharideComponent)
} }
} }
} else { } else {
const uniqueNames = getUniqueComponentNames(format) const uniqueNames = getUniqueComponentNames(format)
SaccharideCompIdMap.forEach((v, k) => { SaccharideCompIdMap.forEach((v, k) => {
if (uniqueNames.has(k)) map.set(k, v) if (!map.has(k) && uniqueNames.has(k)) map.set(k, v)
}) })
} }
return map return map
......
...@@ -224,7 +224,12 @@ export function getMoleculeType(compType: string, compId: string) { ...@@ -224,7 +224,12 @@ export function getMoleculeType(compType: string, compId: string) {
} else if (IonNames.has(compId)) { } else if (IonNames.has(compId)) {
return MoleculeType.ion return MoleculeType.ion
} else if (OtherComponentTypeNames.has(compType)) { } else if (OtherComponentTypeNames.has(compType)) {
if (SaccharideCompIdMap.has(compId)) {
// trust our saccharide table more than given 'non-polymer' or 'other' component type
return MoleculeType.saccharide
} else {
return MoleculeType.other return MoleculeType.other
}
} else { } else {
return MoleculeType.unknown return MoleculeType.unknown
} }
......
...@@ -172,6 +172,19 @@ const Monosaccharides: SaccharideComponent[] = [ ...@@ -172,6 +172,19 @@ const Monosaccharides: SaccharideComponent[] = [
{ abbr: 'Tag', name: 'Tagatose', color: SaccharideColors.Yellow, type: SaccharideType.Assigned }, { abbr: 'Tag', name: 'Tagatose', color: SaccharideColors.Yellow, type: SaccharideType.Assigned },
{ abbr: 'Sor', name: 'Sorbose', color: SaccharideColors.Orange, type: SaccharideType.Assigned }, { abbr: 'Sor', name: 'Sorbose', color: SaccharideColors.Orange, type: SaccharideType.Assigned },
{ abbr: 'Psi', name: 'Psicose', color: SaccharideColors.Pink, type: SaccharideType.Assigned }, { abbr: 'Psi', name: 'Psicose', color: SaccharideColors.Pink, type: SaccharideType.Assigned },
{ abbr: 'Hexose', name: 'Hexose', color: SaccharideColors.Secondary, type: SaccharideType.Hexose },
{ abbr: 'HexNAc', name: 'HexNAc', color: SaccharideColors.Secondary, type: SaccharideType.HexNAc },
{ abbr: 'Hexosamine', name: 'Hexosamine', color: SaccharideColors.Secondary, type: SaccharideType.Hexosamine },
{ abbr: 'Hexuronate', name: 'Hexuronate', color: SaccharideColors.Secondary, type: SaccharideType.Hexuronate },
{ abbr: 'Deoxyhexose', name: 'Deoxyhexose', color: SaccharideColors.Secondary, type: SaccharideType.Deoxyhexose },
{ abbr: 'DeoxyhexNAc', name: 'DeoxyhexNAc', color: SaccharideColors.Secondary, type: SaccharideType.DeoxyhexNAc },
{ abbr: 'Di-deoxyhexose', name: 'Di-deoxyhexose', color: SaccharideColors.Secondary, type: SaccharideType.DiDeoxyhexose },
{ abbr: 'Pentose', name: 'Pentose', color: SaccharideColors.Secondary, type: SaccharideType.Pentose },
{ abbr: 'Deoxynonulosonate', name: 'Deoxynonulosonate', color: SaccharideColors.Secondary, type: SaccharideType.Deoxynonulosonate },
{ abbr: 'Di-deoxynonulosonate', name: 'Di-deoxynonulosonate', color: SaccharideColors.Secondary, type: SaccharideType.DiDeoxynonulosonate },
{ abbr: 'Unknown', name: 'Unknown', color: SaccharideColors.Secondary, type: SaccharideType.Unknown },
{ abbr: 'Assigned', name: 'Assigned', color: SaccharideColors.Secondary, type: SaccharideType.Assigned },
] ]
export const SaccharidesSnfgMap = (function () { export const SaccharidesSnfgMap = (function () {
...@@ -200,33 +213,27 @@ const CommonSaccharideNames: { [k: string]: string[] } = { ...@@ -200,33 +213,27 @@ const CommonSaccharideNames: { [k: string]: string[] } = {
// Hexose // Hexose
Glc: [ Glc: [
'GLC', 'BGC', 'GLC', 'BGC',
'BOG', // via GlyFinder 'TRE', // di-saccharide but homomer
'TRE', // via GlyFinder, di-saccharide but homomer 'MLR', // tri-saccharide but homomer
'MLR', // via GlyFinder, tri-saccharide but homomer
], ],
Man: ['MAN', 'BMA'], Man: ['MAN', 'BMA'],
Gal: [ Gal: ['GLA', 'GAL', 'GZL'],
'GAL', 'GLA', Gul: ['4GL', 'GL0'],
'GXL' // via PubChem Alt: ['Z6H', '3MK'],
], All: ['AFD', 'ALL'],
Gul: ['GUP', 'GL0'], Tal: [],
Alt: ['ALT'], Ido: ['Z0F', '4N2'],
All: ['ALL', 'AFD'],
Tal: ['TAL'],
Ido: ['4N2'],
// HexNAc // HexNAc
GlcNAc: ['NAG', 'NDG'], GlcNAc: ['NDG', 'NAG'],
ManNAc: ['NGA', 'A2G'], ManNAc: ['BM3', 'BM7'],
GulNAc: [], GalNAc: ['A2G', 'NGA'],
GulNAc: ['LXB'],
AltNAc: [], AltNAc: [],
AllNAc: ['NAA'], AllNAc: ['NAA'],
TalNAc: [], TalNAc: [],
IdoNAc: ['HSQ'], IdoNAc: [],
// Hexosamine // Hexosamine
GlcN: [ GlcN: ['PA1', 'GCS'],
'GCS', 'PA1',
'IDU', 'SGN', 'SUS', // via GlyFinder
],
ManN: ['95Z'], ManN: ['95Z'],
GalN: ['X6X', '1GN'], GalN: ['X6X', '1GN'],
GulN: [], GulN: [],
...@@ -237,66 +244,76 @@ const CommonSaccharideNames: { [k: string]: string[] } = { ...@@ -237,66 +244,76 @@ const CommonSaccharideNames: { [k: string]: string[] } = {
// Hexuronate // Hexuronate
GlcA: ['GCU', 'BDP'], GlcA: ['GCU', 'BDP'],
ManA: ['MAV', 'BEM'], ManA: ['MAV', 'BEM'],
GalA: ['ADA', 'GTR'], GalA: ['ADA', 'GTR', 'GTK'],
GulA: ['LGU'], GulA: [],
AltA: [], AltA: [],
AllA: [], AllA: [],
TalA: ['X0X', 'X1X'], TalA: ['X1X', 'X0X'],
IdoA: [ IdoA: ['IDR'],
'IDR',
'IDS', // via GlyFinder
],
// Deoxyhexose // Deoxyhexose
Qui: ['G6D'], Qui: ['G6D', 'YYK'],
Rha: ['RAM', 'RM4'], Rha: ['RAM', 'RM4'],
'6dGul': [], '6dGul': ['66O'],
'6dAlt': [], '6dAlt': [],
'6dTal': [], '6dTal': [],
Fuc: ['FUC', 'FUL'], Fuc: ['FUC', 'FUL'],
// DeoxyhexNAc // DeoxyhexNAc
QuiNAc: [], QuiNAc: ['Z9W'],
RhaNAc: [], RhaNAc: [],
'6dAltNAc': [], '6dAltNAc': [],
'6dTalNAc': [], '6dTalNAc': [],
FucNAc: [], FucNAc: [],
// Di-deoxyhexose // Di-deoxyhexose
Oli: ['DDA'], Oli: ['DDA', 'RAE', 'Z5J'],
Tyv: ['TYV'], Tyv: ['TYV'],
Abe: ['ABE'], Abe: ['ABE'],
Par: ['PZU'], Par: ['PZU'],
Dig: [], Dig: ['Z3U'],
Col: [], Col: [],
// Pentose // Pentose
Ara: ['ARA', 'ARB'], Ara: ['ARA', 'ARB', 'AHR', 'FUB'],
Lyx: ['LDY'], Lyx: ['LDY', 'Z4W'],
Xyl: ['XYS', 'XYP'], Xyl: ['XZS', 'XYP', 'XYZ'],
Rib: ['RIP', '0MK'], Rib: ['YYM', 'RIP', 'RIB', 'BDR'],
// Deoxynonulosonate // Deoxynonulosonate
Kdn: ['KDN', 'KDM'], Kdn: ['KDM', 'KDN'],
Neu5Ac: ['SIA', 'SLB'], Neu5Ac: ['SIA', 'SLB'],
Neu5Gc: ['NGC', 'NGE'], Neu5Gc: ['NGC', 'NGE'],
Neu: [], Neu: [],
Sia: [], Sia: [],
// Di-deoxynonulosonate // Di-deoxynonulosonate
Pse: ['6PZ'], Pse: [],
Leg: [], Leg: [],
Aci: [], Aci: [],
'4eLeg': [], '4eLeg': [],
// Unknown // Unknown
Bac: ['B6D'], Bac: [],
LDManHep: ['GMH'], LDManHep: ['GMH'],
Kdo: ['KDO'], Kdo: ['KDO'],
Dha: [], Dha: [],
DDManHep: [], DDManHep: ['289'],
MurNAc: ['AMU'], MurNAc: ['MUB', 'AMU'],
MurNGc: [], MurNGc: [],
Mur: ['MUR'], Mur: ['1S4', 'MUR'],
// Assigned // Assigned
Api: ['XXM'], Api: ['XXM'],
Fru: ['BDF'], Fru: ['BDF', 'Z9N', 'FRU'],
Tag: ['T6T'], Tag: ['T6T'],
Sor: ['SOE'], Sor: ['SOE'],
Psi: [], Psi: ['PSV'],
// Generic
Hexose: [],
HexNAc: [],
Hexosamine: [],
Hexuronate: [],
Deoxyhexose: [],
DeoxyhexNAc: [],
'Di-deoxyhexose': [],
Pentose: [],
Deoxynonulosonate: [],
'Di-deoxynonulosonate': [],
Unknown: [],
Assigned: ['PUF'],
} }
const UnknownSaccharideNames = [ const UnknownSaccharideNames = [
......
...@@ -114,8 +114,11 @@ const branchedConnectedOnly = MS.struct.modifier.union([ ...@@ -114,8 +114,11 @@ const branchedConnectedOnly = MS.struct.modifier.union([
const ligand = MS.struct.modifier.union([ const ligand = MS.struct.modifier.union([
MS.struct.generator.atomGroups({ MS.struct.generator.atomGroups({
'entity-test': MS.core.logic.and([ 'entity-test': MS.core.logic.and([
MS.core.rel.neq([MS.ammp('entityType'), 'branched']), MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']),
MS.core.rel.eq([MS.ammp('entityType'), 'non-polymer']) MS.core.logic.not([MS.core.str.match([
MS.re('oligosaccharide', 'i'),
MS.ammp('entitySubtype')
])])
]), ]),
'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']), 'chain-test': MS.core.rel.eq([MS.ammp('objectPrimitive'), 'atomistic']),
'residue-test': MS.core.logic.not([ 'residue-test': MS.core.logic.not([
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment