Skip to content
Snippets Groups Projects
secondary-structure.ts 16.3 KiB
Newer Older
Alexander Rose's avatar
Alexander Rose committed
/**
 * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
 *
 * @author Alexander Rose <alexander.rose@weirdbyte.de>
 */

import { SecondaryStructure } from 'mol-model/structure/model/properties/seconday-structure';
import { ResidueIndex } from 'mol-model/structure';
import { MoleculeType, SecondaryStructureType } from 'mol-model/structure/model/types';
import { Vec3 } from 'mol-math/linear-algebra';
import { GridLookup3D } from 'mol-math/geometry';
import { SortedArray } from 'mol-data/int';
import { IntAdjacencyGraph } from 'mol-math/graph';
import { BitFlags } from 'mol-util';
import { ElementIndex } from 'mol-model/structure/model/indexing';
import { AtomicHierarchy, AtomicConformation } from '../atomic';

export function computeSecondaryStructure(hierarchy: AtomicHierarchy, conformation: AtomicConformation): SecondaryStructure {
    // TODO use Zhang-Skolnik for CA alpha only parts or for coarse parts with per-residue elements
    return computeModelDSSP(hierarchy, conformation)
}

export function computeModelDSSP(hierarchy: AtomicHierarchy, conformation: AtomicConformation) {
    const { lookup3d, proteinResidues } = calcAtomicTraceLookup3D(hierarchy, conformation)
    const backboneIndices = calcBackboneAtomIndices(hierarchy, proteinResidues)
    const hbonds = calcBackboneHbonds(hierarchy, conformation, proteinResidues, backboneIndices, lookup3d)

    const residueCount = proteinResidues.length
    const flags = new Uint32Array(residueCount)

    const ctx: DSSPContext = {
        hierarchy,
        proteinResidues,
        flags,
        hbonds
    }

    assignBends(ctx)
    assignTurns(ctx)
    assignHelices(ctx)
    assignBridges(ctx)
    assignLadders(ctx)
    assignSheets(ctx)

    const assignment = getDSSPAssignment(flags)

    const type = new Uint32Array(hierarchy.residues._rowCount) as unknown as SecondaryStructureType[]
    for (let i = 0, il = proteinResidues.length; i < il; ++i) {
        type[proteinResidues[i]] = assignment[i]
    }
Alexander Rose's avatar
Alexander Rose committed
    const secondaryStructure: SecondaryStructure = {
        type,
        key: [], // TODO
        elements: [] // TODO
    }
    return secondaryStructure
}

interface DSSPContext {
    hierarchy: AtomicHierarchy
    proteinResidues: SortedArray<ResidueIndex>
    /** flags for each residue */
    flags: Uint32Array

    hbonds: DsspHbonds
}

interface DSSPType extends BitFlags<DSSPType.Flag> { }
namespace DSSPType {
    export const is: (t: DSSPType, f: Flag) => boolean = BitFlags.has
    export const create: (f: Flag) => DSSPType = BitFlags.create
    export const enum Flag {
        _ = 0x0,
        H = 0x1,
        B = 0x2,
        E = 0x4,
        G = 0x8,
        I = 0x10,
        S = 0x20,
        T = 0x40,
        T3 = 0x80,
        T4 = 0x100,
        T5 = 0x200,
    }
}

/** max distance between two C-alpha atoms to check for hbond */
const caMaxDist = 7.0;
Alexander Rose's avatar
Alexander Rose committed

function calcAtomicTraceLookup3D(hierarchy: AtomicHierarchy, conformation: AtomicConformation) {
    const { x, y, z } = conformation;
    const { moleculeType, traceElementIndex } = hierarchy.derived.residue
    const indices: number[] = []
    const _proteinResidues: number[] = []
    for (let i = 0, il = moleculeType.length; i < il; ++i) {
        if (moleculeType[i] === MoleculeType.protein) {
            indices[indices.length] = traceElementIndex[i]
            _proteinResidues[_proteinResidues.length] = i
        }
    }
    const lookup3d = GridLookup3D({ x, y, z, indices: SortedArray.ofSortedArray(indices) });
    const proteinResidues = SortedArray.ofSortedArray<ResidueIndex>(_proteinResidues)
    return { lookup3d, proteinResidues }
}

interface BackboneAtomIndices {
    cIndices: ArrayLike<ElementIndex | -1>
    hIndices: ArrayLike<ElementIndex | -1>
    oIndices: ArrayLike<ElementIndex | -1>
    nIndices: ArrayLike<ElementIndex | -1>
}

function calcBackboneAtomIndices(hierarchy: AtomicHierarchy, proteinResidues: SortedArray<ResidueIndex>): BackboneAtomIndices {
    const residueCount = proteinResidues.length
    const { index } = hierarchy

    const c = new Int32Array(residueCount)
    const h = new Int32Array(residueCount)
    const o = new Int32Array(residueCount)
    const n = new Int32Array(residueCount)

    for (let i = 0, il = residueCount; i < il; ++i) {
        const rI = proteinResidues[i]
        c[i] = index.findAtomOnResidue(rI, 'C')
        h[i] = index.findAtomOnResidue(rI, 'H')
        o[i] = index.findAtomOnResidue(rI, 'O')
        n[i] = index.findAtomOnResidue(rI, 'N')
    }

    return {
        cIndices: c as unknown as ArrayLike<ElementIndex | -1>,
        hIndices: h as unknown as ArrayLike<ElementIndex | -1>,
        oIndices: o as unknown as ArrayLike<ElementIndex | -1>,
        nIndices: n as unknown as ArrayLike<ElementIndex | -1>,
    }
}

type DsspHbonds = IntAdjacencyGraph<{ readonly energies: ArrayLike<number> }>

function calcBackboneHbonds(hierarchy: AtomicHierarchy, conformation: AtomicConformation, proteinResidues: SortedArray<ResidueIndex>, backboneIndices: BackboneAtomIndices, lookup3d: GridLookup3D): DsspHbonds {
    const { cIndices, hIndices, nIndices, oIndices } = backboneIndices
    const { index } = hierarchy
    const { x, y, z } = conformation
    const { traceElementIndex } = hierarchy.derived.residue

    const residueCount = proteinResidues.length
    const position = (i: number, v: Vec3) => Vec3.set(v, x[i], y[i], z[i])

    const oAtomResidues: number[] = [];
    const nAtomResidues: number[] = [];
    const energies: number[] = [];

    const oPos = Vec3.zero()
    const cPos = Vec3.zero()
    const caPos = Vec3.zero()
    const nPos = Vec3.zero()
    const hPos = Vec3.zero()

    const cPosPrev = Vec3.zero()
    const oPosPrev = Vec3.zero()

    for (let i = 0, il = proteinResidues.length; i < il; ++i) {
        const oPI = i
        const oRI = proteinResidues[i]

        const oAtom = oIndices[oPI]
        const cAtom = cIndices[oPI]
        const caAtom = traceElementIndex[oRI]

        // continue if residue is missing O or C atom
        if (oAtom === -1 || cAtom === -1) continue

        // ignore C-terminal residue as acceptor
        if (index.findAtomOnResidue(oRI, 'OXT') !== -1) continue

        position(oAtom, oPos)
        position(cAtom, cPos)
        position(caAtom, caPos)

        const { indices, count } = lookup3d.find(caPos[0], caPos[1], caPos[2], caMaxDist)

        for (let j = 0; j < count; ++j) {
            const nPI = indices[j]

            // ignore bonds within a residue or to prev or next residue, TODO take chain border into account
            if (nPI === oPI || nPI - 1 === oPI || nPI + 1 === oPI) continue

            const nAtom = nIndices[nPI]
            if (nAtom === -1) continue

            position(nAtom, nPos)

            const hAtom = hIndices[nPI]
            if (hAtom === -1) {
                // approximate calculation of H position, TODO factor out
                if (nPI === 0) continue
                const nPIprev = nPI - 1

                const oAtomPrev = oIndices[nPIprev]
                const cAtomPrev = cIndices[nPIprev]
                if (oAtomPrev === -1 || cAtomPrev === -1) continue

                position(oAtomPrev, oPosPrev)
                position(cAtomPrev, cPosPrev)

                Vec3.sub(hPos, cPosPrev, oPosPrev)
                const dist = Vec3.distance(oPosPrev, cPosPrev)
                Vec3.scaleAndAdd(hPos, nPos, hPos, 1 / dist)
            } else {
                position(hAtom, hPos)
            }

            const e = calcHbondEnergy(oPos, cPos, nPos, hPos)
            if (e > hbondEnergyCutoff) continue

            oAtomResidues[oAtomResidues.length] = oPI
            nAtomResidues[nAtomResidues.length] = nPI
            energies[energies.length] = e
        }
    }

    return buildHbondGraph(residueCount, oAtomResidues, nAtomResidues, energies);
}

function buildHbondGraph(residueCount: number, oAtomResidues: number[], nAtomResidues: number[], energies: number[]) {
    const builder = new IntAdjacencyGraph.DirectedEdgeBuilder(residueCount, oAtomResidues, nAtomResidues);
    const _energies = new Float32Array(builder.slotCount);

    for (let i = 0, _i = builder.edgeCount; i < _i; i++) {
        builder.addNextEdge();
        builder.assignProperty(_energies, energies[i]);
    }

    return builder.createGraph({ energies });
}

/** Original priority: H,B,E,G,I,T,S */
function getOriginalResidueFlag(f: DSSPType) {
    if (DSSPType.is(f, DSSPType.Flag.H)) return SecondaryStructureType.SecondaryStructureDssp.H
    if (DSSPType.is(f, DSSPType.Flag.B)) return SecondaryStructureType.SecondaryStructureDssp.B
    if (DSSPType.is(f, DSSPType.Flag.E)) return SecondaryStructureType.SecondaryStructureDssp.E
    if (DSSPType.is(f, DSSPType.Flag.G)) return SecondaryStructureType.SecondaryStructureDssp.G
    if (DSSPType.is(f, DSSPType.Flag.I)) return SecondaryStructureType.SecondaryStructureDssp.I
    if (DSSPType.is(f, DSSPType.Flag.T)) return SecondaryStructureType.SecondaryStructureDssp.T
    if (DSSPType.is(f, DSSPType.Flag.S)) return SecondaryStructureType.SecondaryStructureDssp.S
    return SecondaryStructureType.Flag.None
}

/** Version 2.1.0 priority: I,H,B,E,G,T,S */
function getUpdatedResidueFlag(f: DSSPType) {
    if (DSSPType.is(f, DSSPType.Flag.I)) return SecondaryStructureType.SecondaryStructureDssp.I
    if (DSSPType.is(f, DSSPType.Flag.H)) return SecondaryStructureType.SecondaryStructureDssp.H
    if (DSSPType.is(f, DSSPType.Flag.B)) return SecondaryStructureType.SecondaryStructureDssp.B
    if (DSSPType.is(f, DSSPType.Flag.E)) return SecondaryStructureType.SecondaryStructureDssp.E
    if (DSSPType.is(f, DSSPType.Flag.G)) return SecondaryStructureType.SecondaryStructureDssp.G
    if (DSSPType.is(f, DSSPType.Flag.T)) return SecondaryStructureType.SecondaryStructureDssp.T
    if (DSSPType.is(f, DSSPType.Flag.S)) return SecondaryStructureType.SecondaryStructureDssp.S
    return SecondaryStructureType.Flag.None
}

// function geFlagName(f: DSSPType) {
//     if (DSSPType.is(f, DSSPType.Flag.I)) return 'I'
//     if (DSSPType.is(f, DSSPType.Flag.H)) return 'H'
//     if (DSSPType.is(f, DSSPType.Flag.B)) return 'B'
//     if (DSSPType.is(f, DSSPType.Flag.E)) return 'E'
//     if (DSSPType.is(f, DSSPType.Flag.G)) return 'G'
//     if (DSSPType.is(f, DSSPType.Flag.T)) return 'T'
//     if (DSSPType.is(f, DSSPType.Flag.S)) return 'S'
//     return '-'
// }

function getDSSPAssignment(flags: Uint32Array, useOriginal = false) {
    const getResidueFlag = useOriginal ? getOriginalResidueFlag : getUpdatedResidueFlag
    const type = new Uint32Array(flags.length)
    for (let i = 0, il = flags.length; i < il; ++i) {
        const f = DSSPType.create(flags[i])
        // console.log(i, geFlagName(f))
        type[i] = getResidueFlag(f)
    }
    return type as unknown as ArrayLike<SecondaryStructureType>
}

/**
 * Constant for electrostatic energy in kcal/mol
 *      f  *  q1 *   q2
 * Q = -332 * 0.42 * 0.20
 *
 * f is the dimensional factor
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * q1 and q2 are partial charges which are placed on the C,O
 * (+q1,-q1) and N,H (-q2,+q2)
 */
const Q = -27.888

/** cutoff for hbonds in kcal/mol, must be lower to be consider as an hbond */
const hbondEnergyCutoff = -0.5

/**
 * E = Q * (1/r(ON) + l/r(CH) - l/r(OH) - l/r(CN))
 */
function calcHbondEnergy(oPos: Vec3, cPos: Vec3, nPos: Vec3, hPos: Vec3) {
    const distOH = Vec3.distance(oPos, hPos)
    const distCH = Vec3.distance(cPos, hPos)
    const distCN = Vec3.distance(cPos, nPos)
    const distON = Vec3.distance(oPos, nPos)

    const e1 = Q / distOH - Q / distCH
David Sehnal's avatar
David Sehnal committed
    const e2 = Q / distCN - Q / distON
Alexander Rose's avatar
Alexander Rose committed
    return e1 + e2
}

/**
 * The basic turn pattern is a single H bond of type (i, i + n).
 * We assign an n-turn at residue i if there is an H bond from CO(i) to NH(i + n),
 * i.e., “n-turn(i)=: Hbond(i, i + n), n = 3, 4, 5.”
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: T
 */
function assignTurns(ctx: DSSPContext) {
    const { proteinResidues, hbonds, flags, hierarchy } = ctx
    const { chains, residueAtomSegments, chainAtomSegments } = hierarchy
    const { label_asym_id } = chains

David Sehnal's avatar
David Sehnal committed
    const turnFlag = [0, 0, 0, DSSPType.Flag.T3, DSSPType.Flag.T4, DSSPType.Flag.T5]
Alexander Rose's avatar
Alexander Rose committed

    for (let i = 0, il = proteinResidues.length; i < il; ++i) {
        const rI = proteinResidues[i]
        const cI = chainAtomSegments.index[residueAtomSegments.offsets[rI]]

        // TODO should take sequence gaps into account
        for (let k = 3; k <= 5; ++k) {
            if (i + k >= proteinResidues.length) continue

            const rN = proteinResidues[i + k]
            const cN = chainAtomSegments.index[residueAtomSegments.offsets[rN]]
            // check if on same chain
            if (!label_asym_id.areValuesEqual(cI, cN)) continue

            // check if hbond exists
            if (hbonds.getDirectedEdgeIndex(i, i + k) !== -1) {
                flags[i] |= turnFlag[k] | DSSPType.Flag.T
            }
        }
    }
}

/**
 * Two nonoverlapping stretches of three residues each, i - 1, i, i + 1 and j - 1, j, j + 1,
David Sehnal's avatar
David Sehnal committed
 * form either a parallel or antiparallel bridge, depending on which of
Alexander Rose's avatar
Alexander Rose committed
 * two basic patterns is matched. We assign a bridge between residues i and j
 * if there are two H bonds characteristic of P-structure; in particular,
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Parallel Bridge(i, j) =:
 *      [Hbond(i - 1, j) and Hbond(j, i + 1)] or
 *      [Hbond(j - 1, i) and Hbond(i, j + 1)]
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Antiparallel Bridge(i, j) =:
 *      [Hbond(i, j) and Hbond(j, i)] or
 *      [Hbond(i - 1, j + 1) and Hbond(j - 1, i + l)]
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: B
 */
function assignBridges(ctx: DSSPContext) {
    const { proteinResidues, hbonds, flags } = ctx

    const { offset, b } = hbonds
    let i: number, j: number

    for (let k = 0, kl = proteinResidues.length; k < kl; ++k) {
        for (let t = offset[k], _t = offset[k + 1]; t < _t; t++) {
            const l = b[t]
            if (k > l) continue
Alexander Rose's avatar
Alexander Rose committed
            // Parallel Bridge(i, j) =: [Hbond(i - 1, j) and Hbond(j, i + 1)]
            i = k + 1 // k is i - 1
            j = l
            if (i !== j && hbonds.getDirectedEdgeIndex(j, i + 1) !== -1) {
                flags[i] |= DSSPType.Flag.B
                flags[j] |= DSSPType.Flag.B
            }

            // Parallel Bridge(i, j) =: [Hbond(j - 1, i) and Hbond(i, j + 1)]
            i = k
            j = l - 1 // l is j + 1
            if (i !== j && hbonds.getDirectedEdgeIndex(j - 1, i) !== -1) {
                flags[i] |= DSSPType.Flag.B
                flags[j] |= DSSPType.Flag.B
            }

            // Antiparallel Bridge(i, j) =: [Hbond(i, j) and Hbond(j, i)]
            i = k
            j = l
            if (i !== j && hbonds.getDirectedEdgeIndex(j, i) !== -1) {
                flags[i] |= DSSPType.Flag.B
                flags[j] |= DSSPType.Flag.B
            }

            // Antiparallel Bridge(i, j) =: [Hbond(i - 1, j + 1) and Hbond(j - 1, i + l)]
            i = k + 1
            j = l - 1
            if (i !== j && hbonds.getDirectedEdgeIndex(j - 1, i + 1) !== -1) {
                flags[i] |= DSSPType.Flag.B
                flags[j] |= DSSPType.Flag.B
            }
        }
    }
}

/**
 * A minimal helix is defined by two consecutive n-turns.
 * For example, a 4-helix, of minimal length 4 from residues i to i + 3,
 * requires 4-turns at residues i - 1 and i,
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 *      3-helix(i,i + 2)=: [3-turn(i - 1) and 3-turn(i)]
 *      4-helix(i,i + 3)=: [4-turn(i - 1) and 4-turn(i)]
 *      5-helix(i,i + 4)=: [5-turn(i - 1) and 5-turn(i)]
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: G (n=3), H (n=4), I (n=5)
 */
function assignHelices(ctx: DSSPContext) {
    const { proteinResidues, flags } = ctx
David Sehnal's avatar
David Sehnal committed

    const turnFlag = [0, 0, 0, DSSPType.Flag.T3, DSSPType.Flag.T4, DSSPType.Flag.T5]
    const helixFlag = [0, 0, 0, DSSPType.Flag.G, DSSPType.Flag.H, DSSPType.Flag.I]
Alexander Rose's avatar
Alexander Rose committed

    for (let i = 1, il = proteinResidues.length; i < il; ++i) {
        const fI = DSSPType.create(flags[i])
        const fI1 = DSSPType.create(flags[i - 1])

        for (let k = 3; k <= 5; ++k) {
            if (DSSPType.is(fI, turnFlag[k]) && DSSPType.is(fI1, turnFlag[k])) {
                for (let l = 0; l < k; ++l) {
                    flags[i + l] |= helixFlag[k]
                }
            }
        }
    }
}

/**
 * ladder=: set of one or more consecutive bridges of identical type
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: E
 */
function assignLadders(ctx: DSSPContext) {
    // TODO
}

/**
 * sheet=: set of one or more ladders connected by shared residues
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: E
 */
function assignSheets(ctx: DSSPContext) {
    // TODO
}

/**
 * Bend(i) =: [angle ((CW - Ca(i - 2)),(C"(i + 2) - C"(i))) > 70"]
David Sehnal's avatar
David Sehnal committed
 *
Alexander Rose's avatar
Alexander Rose committed
 * Type: S
 */
function assignBends(ctx: DSSPContext) {
    // TODO
}