Skip to content
Snippets Groups Projects
Commit c285e30e authored by Alexander Rose's avatar Alexander Rose
Browse files

improved gro format reading

parent 789a3273
No related branches found
No related tags found
No related merge requests found
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { _parse_mmCif } from '../mmcif/parser';
import { CifCategory, CifField } from '../../../mol-io/reader/cif';
import { Table, Column } from '../../../mol-data/db';
import { mmCIF_Schema } from '../../../mol-io/reader/cif/schema/mmcif';
import { WaterNames } from '../../../mol-model/structure/model/types';
import { SetUtils } from '../../../mol-util/set';
type Component = Table.Row<Pick<mmCIF_Schema['chem_comp'], 'id' | 'name' | 'type'>>
const ProteinAtomIdsList = [
new Set([ 'CA' ]),
new Set([ 'C' ]),
new Set([ 'N' ])
]
const RnaAtomIdsList = [
new Set([ 'P', 'O3\'', 'O3*' ]),
new Set([ 'C4\'', 'C4*' ]),
new Set([ 'O2\'', 'O2*', 'F2\'', 'F2*' ])
]
const DnaAtomIdsList = [
new Set([ 'P', 'O3\'', 'O3*' ]),
new Set([ 'C3\'', 'C3*' ]),
new Set([ 'O2\'', 'O2*', 'F2\'', 'F2*' ])
]
const StandardComponents = (function() {
const map = new Map<string, Component>()
const components: Component[] = [
{ id: 'HIS', name: 'HISTIDINE', type: 'L-peptide linking' },
{ id: 'ARG', name: 'ARGININE', type: 'L-peptide linking' },
{ id: 'LYS', name: 'LYSINE', type: 'L-peptide linking' },
{ id: 'ILE', name: 'ISOLEUCINE', type: 'L-peptide linking' },
{ id: 'PHE', name: 'PHENYLALANINE', type: 'L-peptide linking' },
{ id: 'LEU', name: 'LEUCINE', type: 'L-peptide linking' },
{ id: 'TRP', name: 'TRYPTOPHAN', type: 'L-peptide linking' },
{ id: 'ALA', name: 'ALANINE', type: 'L-peptide linking' },
{ id: 'MET', name: 'METHIONINE', type: 'L-peptide linking' },
{ id: 'CYS', name: 'CYSTEINE', type: 'L-peptide linking' },
{ id: 'ASN', name: 'ASPARAGINE', type: 'L-peptide linking' },
{ id: 'VAL', name: 'VALINE', type: 'L-peptide linking' },
{ id: 'GLY', name: 'GLYCINE', type: 'peptide linking' },
{ id: 'SER', name: 'SERINE', type: 'L-peptide linking' },
{ id: 'GLN', name: 'GLUTAMINE', type: 'L-peptide linking' },
{ id: 'TYR', name: 'TYROSINE', type: 'L-peptide linking' },
{ id: 'ASP', name: 'ASPARTIC ACID', type: 'L-peptide linking' },
{ id: 'GLU', name: 'GLUTAMIC ACID', type: 'L-peptide linking' },
{ id: 'THR', name: 'THREONINE', type: 'L-peptide linking' },
{ id: 'SEC', name: 'SELENOCYSTEINE', type: 'L-peptide linking' },
{ id: 'PYL', name: 'PYRROLYSINE', type: 'L-peptide linking' },
{ id: 'A', name: 'ADENOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
{ id: 'C', name: 'CYTIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
{ id: 'T', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
{ id: 'G', name: 'GUANOSINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
{ id: 'I', name: 'INOSINIC ACID', type: 'RNA linking' },
{ id: 'U', name: 'URIDINE-5\'-MONOPHOSPHATE', type: 'RNA linking' },
{ id: 'DA', name: '2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
{ id: 'DC', name: '2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
{ id: 'DT', name: 'THYMIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
{ id: 'DG', name: '2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
{ id: 'DI', name: '2\'-DEOXYINOSINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
{ id: 'DU', name: '2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE', type: 'DNA linking' },
]
components.forEach(c => map.set(c.id, c))
return map
})()
export class ComponentBuilder {
private comps = new Map<string, Component>()
private ids: string[] = []
private names: string[] = []
private types: mmCIF_Schema['chem_comp']['type']['T'][] = []
private set(c: Component) {
this.comps.set(c.id, c)
this.ids.push(c.id)
this.names.push(c.name)
this.types.push(c.type)
}
private getAtomIds(index: number) {
const atomIds = new Set<string>()
let prevSeqId = this.seqId.value(index)
while (index < this.seqId.rowCount) {
const seqId = this.seqId.value(index)
if (seqId !== prevSeqId) break
atomIds.add(this.atomId.value(index))
prevSeqId - seqId
index += 1
}
return atomIds
}
private hasAtomIds (atomIds: Set<string>, atomIdsList: Set<string>[]) {
for (let i = 0, il = atomIdsList.length; i < il; ++i) {
if (!SetUtils.areIntersecting(atomIds, atomIdsList[i])) {
return false
}
}
return true
}
private getType(atomIds: Set<string>): Component['type'] {
if (this.hasAtomIds(atomIds, ProteinAtomIdsList)) {
return 'peptide linking'
} else if (this.hasAtomIds(atomIds, RnaAtomIdsList)) {
return 'RNA linking'
} else if (this.hasAtomIds(atomIds, DnaAtomIdsList)) {
return 'DNA linking'
} else {
return 'other'
}
}
has(compId: string) { return this.comps.has(compId) }
get(compId: string) { return this.comps.get(compId) }
add(compId: string, index: number) {
if (!this.has(compId)) {
if (StandardComponents.has(compId)) {
this.set(StandardComponents.get(compId)!)
} else if (WaterNames.has(compId)) {
this.set({ id: compId, name: 'WATER', type: 'non-polymer' })
} else {
const atomIds = this.getAtomIds(index)
this.set({ id: compId, name: compId, type: this.getType(atomIds) })
}
}
return this.get(compId)!
}
getChemCompCategory() {
const chemComp: CifCategory.SomeFields<mmCIF_Schema['chem_comp']> = {
id: CifField.ofStrings(this.ids),
name: CifField.ofStrings(this.names),
type: CifField.ofStrings(this.types),
}
return CifCategory.ofFields('chem_comp', chemComp)
}
constructor(private seqId: Column<number>, private atomId: Column<string>) {
}
}
\ No newline at end of file
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { memoize1 } from '../../../mol-util/memoize';
const ChainIdAlphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
function _getChainId (index: number) {
const n = ChainIdAlphabet.length
let j = index
let k = 0
let chainId = ChainIdAlphabet[j % n]
while (j >= n) {
j = Math.floor(j / n)
chainId += ChainIdAlphabet[j % n]
k += 1
}
if (k >= 5) {
console.warn('getChainId overflow')
}
return chainId
}
export const getChainId = memoize1(_getChainId)
\ No newline at end of file
...@@ -13,62 +13,151 @@ import { CifCategory, CifField } from '../../mol-io/reader/cif'; ...@@ -13,62 +13,151 @@ import { CifCategory, CifField } from '../../mol-io/reader/cif';
import { Column } from '../../mol-data/db'; import { Column } from '../../mol-data/db';
import { mmCIF_Schema } from '../../mol-io/reader/cif/schema/mmcif'; import { mmCIF_Schema } from '../../mol-io/reader/cif/schema/mmcif';
import { guessElementSymbolString } from './util'; import { guessElementSymbolString } from './util';
import { MoleculeType, getMoleculeType, isPolymer } from '../../mol-model/structure/model/types';
import { ComponentBuilder } from './common/component';
import { getChainId } from './common/util';
// TODO multi model files // TODO multi model files
// TODO seperate chains
// TODO better entity handling
// TODO improve performance
function _entity(): { [K in keyof mmCIF_Schema['entity']]?: CifField } { class EntityBuilder {
return { private count = 0
id: CifField.ofStrings(['1', '2', '3']), private ids: string[] = []
type: CifField.ofStrings(['polymer', 'non-polymer', 'water']) private types: string[] = []
private descriptions: string[] = []
private heteroMap = new Map<string, string>()
private chainMap = new Map<string, string>()
private waterId?: string
private set(type: string, description: string) {
this.count += 1
this.ids.push(`${this.count}`)
this.types.push(type)
this.descriptions.push(description)
}
getEntityId(compId: string, moleculeType: MoleculeType, chainId: string): string {
if (moleculeType === MoleculeType.water) {
if (this.waterId === undefined) {
this.set('water', 'Water')
this.waterId = `${this.count}`
}
return this.waterId;
} else if (isPolymer(moleculeType)) {
if (!this.chainMap.has(chainId)) {
this.set('polymer', `Polymer ${this.chainMap.size + 1}`)
this.chainMap.set(chainId, `${this.count}`)
}
return this.chainMap.get(chainId)!
} else {
if (!this.heteroMap.has(compId)) {
this.set('non-polymer', compId)
this.heteroMap.set(compId, `${this.count}`)
}
return this.heteroMap.get(compId)!
} }
} }
function _atom_site(atoms: GroAtoms): { [K in keyof mmCIF_Schema['atom_site']]?: CifField } { getEntityCategory() {
const auth_asym_id = CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)) const entity: CifCategory.SomeFields<mmCIF_Schema['entity']> = {
id: CifField.ofStrings(this.ids),
type: CifField.ofStrings(this.types),
pdbx_description: CifField.ofStrings(this.descriptions),
}
return CifCategory.ofFields('entity', entity)
}
}
function getCategories(atoms: GroAtoms) {
const auth_atom_id = CifField.ofColumn(atoms.atomName) const auth_atom_id = CifField.ofColumn(atoms.atomName)
const auth_comp_id = CifField.ofColumn(atoms.residueName) const auth_comp_id = CifField.ofColumn(atoms.residueName)
const auth_seq_id = CifField.ofColumn(atoms.residueNumber)
return { const entityIds = new Array<string>(atoms.count)
const asymIds = new Array<string>(atoms.count)
const seqIds = new Uint32Array(atoms.count)
const ids = new Uint32Array(atoms.count)
const entityBuilder = new EntityBuilder()
const componentBuilder = new ComponentBuilder(atoms.residueNumber, atoms.atomName)
let currentEntityId = ''
let currentAsymIndex = 0
let currentAsymId = ''
let currentSeqId = 0
let prevMoleculeType = MoleculeType.unknown
let prevResidueNumber = -1
for (let i = 0, il = atoms.count; i < il; ++i) {
const residueNumber = atoms.residueNumber.value(i)
if (residueNumber !== prevResidueNumber) {
const compId = atoms.residueName.value(i)
const moleculeType = getMoleculeType(componentBuilder.add(compId, i).type, compId)
if (moleculeType !== prevMoleculeType || (
residueNumber !== prevResidueNumber + 1 && !(
// gro format allows only for 5 character residueNumbers, handle overflow here
prevResidueNumber === 99999 && residueNumber === 0
)
)) {
currentAsymId = getChainId(currentAsymIndex)
currentAsymIndex += 1
currentSeqId = 0
}
currentEntityId = entityBuilder.getEntityId(compId, moleculeType, currentAsymId)
currentSeqId += 1
prevResidueNumber = residueNumber
prevMoleculeType = moleculeType
}
entityIds[i] = currentEntityId
asymIds[i] = currentAsymId
seqIds[i] = currentSeqId
ids[i] = i
}
const auth_asym_id = CifField.ofColumn(Column.ofStringArray(asymIds))
const atom_site: CifCategory.SomeFields<mmCIF_Schema['atom_site']> = {
auth_asym_id, auth_asym_id,
auth_atom_id, auth_atom_id,
auth_comp_id, auth_comp_id,
auth_seq_id, auth_seq_id: CifField.ofColumn(atoms.residueNumber),
B_iso_or_equiv: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.float)), B_iso_or_equiv: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.float)),
Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)), Cartn_x: CifField.ofNumbers(Column.mapToArray(atoms.x, x => x * 10, Float32Array)),
Cartn_y: CifField.ofNumbers(Column.mapToArray(atoms.y, y => y * 10, Float32Array)), Cartn_y: CifField.ofNumbers(Column.mapToArray(atoms.y, y => y * 10, Float32Array)),
Cartn_z: CifField.ofNumbers(Column.mapToArray(atoms.z, z => z * 10, Float32Array)), Cartn_z: CifField.ofNumbers(Column.mapToArray(atoms.z, z => z * 10, Float32Array)),
group_PDB: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), group_PDB: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
id: CifField.ofColumn(atoms.atomNumber), id: CifField.ofColumn(Column.ofIntArray(ids)),
label_alt_id: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), label_alt_id: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
label_asym_id: auth_asym_id, label_asym_id: auth_asym_id,
label_atom_id: auth_atom_id, label_atom_id: auth_atom_id,
label_comp_id: auth_comp_id, label_comp_id: auth_comp_id,
label_seq_id: auth_seq_id, label_seq_id: CifField.ofColumn(Column.ofIntArray(seqIds)),
label_entity_id: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), label_entity_id: CifField.ofColumn(Column.ofStringArray(entityIds)),
occupancy: CifField.ofColumn(Column.ofConst(1, atoms.count, Column.Schema.float)), occupancy: CifField.ofColumn(Column.ofConst(1, atoms.count, Column.Schema.float)),
type_symbol: CifField.ofStrings(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))), type_symbol: CifField.ofStrings(Column.mapToArray(atoms.atomName, s => guessElementSymbolString(s))),
// type_symbol: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
pdbx_PDB_ins_code: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)), pdbx_PDB_ins_code: CifField.ofColumn(Column.Undefined(atoms.count, Column.Schema.str)),
pdbx_PDB_model_num: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)), pdbx_PDB_model_num: CifField.ofColumn(Column.ofConst('1', atoms.count, Column.Schema.str)),
} }
return {
entity: entityBuilder.getEntityCategory(),
chem_comp: componentBuilder.getChemCompCategory(),
atom_site: CifCategory.ofFields('atom_site', atom_site)
}
} }
async function groToMmCif(gro: GroFile) { async function groToMmCif(gro: GroFile) {
const categories = { const categories = getCategories(gro.structures[0].atoms)
entity: CifCategory.ofFields('entity', _entity()),
atom_site: CifCategory.ofFields('atom_site', _atom_site(gro.structures[0].atoms))
} as any;
return { return {
header: 'GRO', header: gro.structures[0].header.title,
categoryNames: Object.keys(categories), categoryNames: Object.keys(categories),
categories categories
}; };
......
...@@ -81,7 +81,7 @@ export class EntityBuilder { ...@@ -81,7 +81,7 @@ export class EntityBuilder {
private chainMap = new Map<string, string>() private chainMap = new Map<string, string>()
private waterId?: string private waterId?: string
private add(type: string, description: string) { private set(type: string, description: string) {
this.count += 1 this.count += 1
this.ids.push(`${this.count}`) this.ids.push(`${this.count}`)
this.types.push(type) this.types.push(type)
...@@ -92,13 +92,13 @@ export class EntityBuilder { ...@@ -92,13 +92,13 @@ export class EntityBuilder {
if (isHet) { if (isHet) {
if (WaterNames.has(residueName)) { if (WaterNames.has(residueName)) {
if (this.waterId === undefined) { if (this.waterId === undefined) {
this.add('water', 'Water') this.set('water', 'Water')
this.waterId = `${this.count}` this.waterId = `${this.count}`
} }
return this.waterId; return this.waterId;
} else { } else {
if (!this.heteroMap.has(residueName)) { if (!this.heteroMap.has(residueName)) {
this.add('non-polymer', residueName) this.set('non-polymer', residueName)
this.heteroMap.set(residueName, `${this.count}`) this.heteroMap.set(residueName, `${this.count}`)
} }
return this.heteroMap.get(residueName)! return this.heteroMap.get(residueName)!
...@@ -107,7 +107,7 @@ export class EntityBuilder { ...@@ -107,7 +107,7 @@ export class EntityBuilder {
return this.compoundsMap.get(chainId)! return this.compoundsMap.get(chainId)!
} else { } else {
if (!this.chainMap.has(chainId)) { if (!this.chainMap.has(chainId)) {
this.add('polymer', chainId) this.set('polymer', chainId)
this.chainMap.set(chainId, `${this.count}`) this.chainMap.set(chainId, `${this.count}`)
} }
return this.chainMap.get(chainId)! return this.chainMap.get(chainId)!
...@@ -126,7 +126,7 @@ export class EntityBuilder { ...@@ -126,7 +126,7 @@ export class EntityBuilder {
setCompounds(compounds: Compound[]) { setCompounds(compounds: Compound[]) {
for (let i = 0, il = compounds.length; i < il; ++i) { for (let i = 0, il = compounds.length; i < il; ++i) {
const { chains, name } = compounds[i] const { chains, name } = compounds[i]
this.add('polymer', name) this.set('polymer', name)
for (let j = 0, jl = chains.length; j < jl; ++j) { for (let j = 0, jl = chains.length; j < jl; ++j) {
this.compoundsMap.set(chains[j], `${this.count}`) this.compoundsMap.set(chains[j], `${this.count}`)
} }
......
...@@ -238,4 +238,8 @@ export namespace AtomicHierarchy { ...@@ -238,4 +238,8 @@ export namespace AtomicHierarchy {
export function chainEndResidueIndexExcl(segs: AtomicSegments, cI: ChainIndex) { export function chainEndResidueIndexExcl(segs: AtomicSegments, cI: ChainIndex) {
return segs.residueAtomSegments.index[segs.chainAtomSegments.offsets[cI + 1] - 1] + 1 as ResidueIndex; return segs.residueAtomSegments.index[segs.chainAtomSegments.offsets[cI + 1] - 1] + 1 as ResidueIndex;
} }
export function chainResidueCount(segs: AtomicSegments, cI: ChainIndex) {
return chainEndResidueIndexExcl(segs, cI) - chainStartResidueIndex(segs, cI)
}
} }
...@@ -31,6 +31,7 @@ export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemi ...@@ -31,6 +31,7 @@ export function getAtomicDerivedData(data: AtomicData, index: AtomicIndex, chemi
molType = getMoleculeType(chemCompMap.get(compId)!.type, compId) molType = getMoleculeType(chemCompMap.get(compId)!.type, compId)
moleculeTypeMap.set(compId, molType) moleculeTypeMap.set(compId, molType)
} else { } else {
console.log('chemComp not found', compId)
molType = getMoleculeType(getComponentType(compId), compId) molType = getMoleculeType(getComponentType(compId), compId)
// TODO if unknown molecule type, use atom names to guess molecule type // TODO if unknown molecule type, use atom names to guess molecule type
moleculeTypeMap.set(compId, molType) moleculeTypeMap.set(compId, molType)
......
...@@ -185,6 +185,8 @@ export const BaseNames = SetUtils.unionMany(RnaBaseNames, DnaBaseNames, PeptideB ...@@ -185,6 +185,8 @@ export const BaseNames = SetUtils.unionMany(RnaBaseNames, DnaBaseNames, PeptideB
export const isPurineBase = (compId: string) => PurineBaseNames.has(compId.toUpperCase()) export const isPurineBase = (compId: string) => PurineBaseNames.has(compId.toUpperCase())
export const isPyrimidineBase = (compId: string) => PyrimidineBaseNames.has(compId.toUpperCase()) export const isPyrimidineBase = (compId: string) => PyrimidineBaseNames.has(compId.toUpperCase())
export const PolymerNames = SetUtils.unionMany(AminoAcidNames, BaseNames)
/** get the molecule type from component type and id */ /** get the molecule type from component type and id */
export function getMoleculeType(compType: string, compId: string) { export function getMoleculeType(compType: string, compId: string) {
compType = compType.toUpperCase() compType = compType.toUpperCase()
...@@ -227,12 +229,12 @@ export function getComponentType(compId: string): mmCIF_Schema['chem_comp']['typ ...@@ -227,12 +229,12 @@ export function getComponentType(compId: string): mmCIF_Schema['chem_comp']['typ
export function getEntityType(compId: string): mmCIF_Schema['entity']['type']['T'] { export function getEntityType(compId: string): mmCIF_Schema['entity']['type']['T'] {
compId = compId.toUpperCase() compId = compId.toUpperCase()
if (AminoAcidNames.has(compId) || RnaBaseNames.has(compId) || DnaBaseNames.has(compId)) { if (WaterNames.has(compId)) {
return 'water'
} else if (PolymerNames.has(compId)) {
return 'polymer' return 'polymer'
} else if (SaccharideCompIdMap.has(compId)) { } else if (SaccharideCompIdMap.has(compId)) {
return 'branched' return 'branched'
} else if (WaterNames.has(compId)) {
return 'water'
} else { } else {
return 'non-polymer' return 'non-polymer'
} }
......
...@@ -50,6 +50,13 @@ export namespace SetUtils { ...@@ -50,6 +50,13 @@ export namespace SetUtils {
return intersection; return intersection;
} }
export function areIntersecting<T>(setA: Set<T>, setB: Set<T>): boolean {
for (const elem of Array.from(setB)) {
if (setA.has(elem)) return true;
}
return false;
}
/** Create set containing elements of set a that are not in set b. */ /** Create set containing elements of set a that are not in set b. */
export function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> { export function difference<T>(setA: Set<T>, setB: Set<T>): Set<T> {
const difference = new Set(setA); const difference = new Set(setA);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment