diff --git a/src/apps/structure-info/model.ts b/src/apps/structure-info/model.ts index 13c6ed4422ff664de86e1eb801a6e224be21cb0e..0728dde5d6a95f069cc3fabc9d38037694f421ab 100644 --- a/src/apps/structure-info/model.ts +++ b/src/apps/structure-info/model.ts @@ -50,7 +50,7 @@ export function residueLabel(model: Model, rI: number) { } export function printSecStructure(model: Model) { - console.log('Secondary Structure\n============='); + console.log('\nSecondary Structure\n============='); const { residues } = model.atomicHierarchy; const { type, key } = model.properties.secondaryStructure; @@ -76,41 +76,32 @@ export function printBonds(structure: Structure) { if (!Unit.isAtomic(unit)) continue; const elements = unit.elements; - const { count, offset, neighbor } = unit.bonds; + const { a, b } = unit.bonds; const { model } = unit; - if (!count) continue; + if (!a.length) continue; - for (let j = 0; j < offset.length - 1; ++j) { - const start = offset[j]; - const end = offset[j + 1]; - - if (end <= start) continue; - - const aI = elements[j]; - for (let _bI = start; _bI < end; _bI++) { - const bI = elements[neighbor[_bI]]; - console.log(`${atomLabel(model, aI)} -- ${atomLabel(model, bI)}`); - } + for (let bI = 0, _bI = a.length; bI < _bI; bI++) { + const x = a[bI], y = b[bI]; + if (x >= y) continue; + console.log(`${atomLabel(model, elements[x])} -- ${atomLabel(model, elements[y])}`); } } } export function printSequence(model: Model) { - console.log('Sequence\n============='); + console.log('\nSequence\n============='); const { byEntityKey } = model.sequence; for (const key of Object.keys(byEntityKey)) { const seq = byEntityKey[+key]; - console.log(`${seq.entityId} (${seq.num.value(0)}, ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`); - // for (let i = 0; i < seq.compId.rowCount; i++) { - // console.log(`${seq.entityId} ${seq.num.value(i)} ${seq.compId.value(i)}`); - // } + console.log(`${seq.entityId} (${seq.sequence.kind} ${seq.num.value(0)} (offset ${seq.sequence.offset}), ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`); + console.log(`${seq.sequence.sequence}`); } console.log(); } export function printUnits(structure: Structure) { - console.log('Units\n============='); + console.log('\nUnits\n============='); const l = Element.Location(); for (const unit of structure.units) { @@ -140,10 +131,9 @@ export function printUnits(structure: Structure) { } } - export function printIHMModels(model: Model) { if (!model.coarseHierarchy.isDefined) return false; - console.log('IHM Models\n============='); + console.log('\nIHM Models\n============='); console.log(Table.formatToString(model.coarseHierarchy.models)); } @@ -151,10 +141,10 @@ async function run(mmcif: mmCIF_Database) { const models = await Model.create({ kind: 'mmCIF', data: mmcif }).run(); const structure = Structure.ofModel(models[0]); printSequence(models[0]); - printIHMModels(models[0]); + //printIHMModels(models[0]); printUnits(structure); - // printBonds(structure); - printSecStructure(models[0]); + //printBonds(structure); + //printSecStructure(models[0]); } async function runDL(pdb: string) { diff --git a/src/mol-data/db/column.ts b/src/mol-data/db/column.ts index 355c1c365a7ba7f51c1cf41f7ede0d691f25d48c..a61efa6771935c8e742d5db03d43883e9b00c67a 100644 --- a/src/mol-data/db/column.ts +++ b/src/mol-data/db/column.ts @@ -103,6 +103,15 @@ namespace Column { return lambdaColumn(spec); } + /** values [min, max] (i.e. include both values) */ + export function range(min: number, max: number): Column<number> { + return ofLambda({ + value: i => i + min, + rowCount: Math.max(max - min + 1, 0), + schema: Schema.int + }); + } + export function ofArray<T extends Column.Schema>(spec: Column.ArraySpec<T>): Column<T['T']> { return arrayColumn(spec); } diff --git a/src/mol-math/graph.ts b/src/mol-math/graph.ts new file mode 100644 index 0000000000000000000000000000000000000000..ce0429662fe04ce07a6a24a56e8a25c188bb4cf1 --- /dev/null +++ b/src/mol-math/graph.ts @@ -0,0 +1,7 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export * from './graph/int/graph' \ No newline at end of file diff --git a/src/mol-math/graph/_spec/int-graph.spec.ts b/src/mol-math/graph/_spec/int-graph.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..3d75dddb0296d749b60261249cfd666ddb1d8a35 --- /dev/null +++ b/src/mol-math/graph/_spec/int-graph.spec.ts @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { IntGraph } from '../int/graph'; + +describe('IntGraph', () => { + const vc = 3; + const xs = [0, 1, 2]; + const ys = [1, 2, 0]; + const _prop = [10, 11, 12]; + + const builder = new IntGraph.EdgeBuilder(vc, xs, ys); + const prop: number[] = new Array(builder.slotCount); + for (let i = 0; i < builder.edgeCount; i++) { + builder.addNextEdge(); + builder.assignProperty(prop, _prop[i]); + } + const graph = builder.createGraph({ prop }); + + it('triangle-edgeCount', () => expect(graph.edgeCount).toBe(3)); + it('triangle-vertexEdgeCounts', () => { + expect(graph.getVertexEdgeCount(0)).toBe(2); + expect(graph.getVertexEdgeCount(1)).toBe(2); + expect(graph.getVertexEdgeCount(2)).toBe(2); + }); + + it('triangle-propAndEdgeIndex', () => { + const prop = graph.prop; + expect(prop[graph.getEdgeIndex(0, 1)]).toBe(10); + expect(prop[graph.getEdgeIndex(1, 2)]).toBe(11); + expect(prop[graph.getEdgeIndex(2, 0)]).toBe(12); + }); +}); \ No newline at end of file diff --git a/src/mol-math/graph/graph.ts b/src/mol-math/graph/graph.ts deleted file mode 100644 index 0ffdd02fcbce683e436c0030ffe0517135c6ceda..0000000000000000000000000000000000000000 --- a/src/mol-math/graph/graph.ts +++ /dev/null @@ -1 +0,0 @@ -// TODO \ No newline at end of file diff --git a/src/mol-math/graph/int/graph.ts b/src/mol-math/graph/int/graph.ts new file mode 100644 index 0000000000000000000000000000000000000000..fb50ee13f123ed9deec41270e3e81b5923878f56 --- /dev/null +++ b/src/mol-math/graph/int/graph.ts @@ -0,0 +1,137 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +/** + * Represent a graph using vertex adjacency list. + * + * Edges of the i-th vertex are stored in the arrays a and b + * for indices in the range [offset[i], offset[i+1]). + * + * Edge properties are indexed same as in the arrays a and b. + */ +type IntGraph<EdgeProperties extends object = { }> = { + readonly offset: ArrayLike<number>, + readonly a: ArrayLike<number>, + readonly b: ArrayLike<number>, + readonly vertexCount: number, + readonly edgeCount: number, + + /** + * Get the edge index between i-th and j-th vertex. + * -1 if the edge does not exist. + * + * Because the a and b arrays contains each edge twice, + * this always returns the smaller of the indices. + */ + getEdgeIndex(i: number, j: number): number, + getVertexEdgeCount(i: number): number +} & EdgeProperties + +namespace IntGraph { + class Impl implements IntGraph<any> { + readonly vertexCount: number; + + getEdgeIndex(i: number, j: number): number { + let a, b; + if (i < j) { a = i; b = j; } + else { a = j; b = i; } + for (let t = this.offset[a], _t = this.offset[a + 1]; t < _t; t++) { + if (this.b[t] === b) return t; + } + return -1; + } + + getVertexEdgeCount(i: number): number { + return this.offset[i + 1] - this.offset[i]; + } + + constructor(public offset: ArrayLike<number>, public a: ArrayLike<number>, public b: ArrayLike<number>, public edgeCount: number, props?: any) { + this.vertexCount = offset.length - 1; + if (props) { + for (const p of Object.keys(props)) { + (this as any)[p] = props[p]; + } + } + } + } + + export function create<EdgeProps extends object = { }>(offset: ArrayLike<number>, a: ArrayLike<number>, b: ArrayLike<number>, edgeCount: number, edgeProps?: EdgeProps): IntGraph<EdgeProps> { + return new Impl(offset, a, b, edgeCount, edgeProps) as IntGraph<EdgeProps>; + } + + export class EdgeBuilder { + private bucketFill: Int32Array; + private current = 0; + private curA: number = 0; + private curB: number = 0; + + offsets: Int32Array; + edgeCount: number; + /** the size of the A and B arrays */ + slotCount: number; + a: Int32Array; + b: Int32Array; + + createGraph<EdgeProps extends object = { }>(edgeProps?: EdgeProps) { + return create(this.offsets, this.a, this.b, this.edgeCount, edgeProps); + } + + /** + * @example + * const property = new Int32Array(builder.slotCount); + * for (let i = 0; i < builder.edgeCount; i++) { + * builder.addNextEdge(); + * builder.assignProperty(property, srcProp[i]); + * } + * return builder.createGraph({ property }); + */ + addNextEdge() { + const a = this.xs[this.current], b = this.ys[this.current]; + + const oa = this.offsets[a] + this.bucketFill[a]; + const ob = this.offsets[b] + this.bucketFill[b]; + + this.a[oa] = a; + this.b[oa] = b; + this.bucketFill[a]++; + + this.a[ob] = b; + this.b[ob] = a; + this.bucketFill[b]++; + + this.current++; + this.curA = oa; + this.curB = ob; + } + + assignProperty<T>(prop: { [i: number]: T }, value: T) { + prop[this.curA] = value; + prop[this.curB] = value; + } + + constructor(public vertexCount: number, public xs: ArrayLike<number>, public ys: ArrayLike<number>) { + this.edgeCount = xs.length; + this.offsets = new Int32Array(this.vertexCount + 1); + this.bucketFill = new Int32Array(this.vertexCount); + + const bucketSizes = new Int32Array(this.vertexCount); + for (let i = 0, _i = this.xs.length; i < _i; i++) bucketSizes[this.xs[i]]++; + for (let i = 0, _i = this.ys.length; i < _i; i++) bucketSizes[this.ys[i]]++; + + let offset = 0; + for (let i = 0; i < this.vertexCount; i++) { + this.offsets[i] = offset; + offset += bucketSizes[i]; + } + this.offsets[this.vertexCount] = offset; + this.slotCount = offset; + this.a = new Int32Array(offset); + this.b = new Int32Array(offset); + } + } +} + +export { IntGraph } \ No newline at end of file diff --git a/src/mol-model/sequence.ts b/src/mol-model/sequence.ts new file mode 100644 index 0000000000000000000000000000000000000000..bd9af242dc8e9daecd99517add461357b0eec763 --- /dev/null +++ b/src/mol-model/sequence.ts @@ -0,0 +1,7 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export * from './sequence/sequence' \ No newline at end of file diff --git a/src/mol-model/sequence/TODO b/src/mol-model/sequence/TODO deleted file mode 100644 index a14553cb9d4b45b94a29e6b28b9c0478205867b3..0000000000000000000000000000000000000000 --- a/src/mol-model/sequence/TODO +++ /dev/null @@ -1,2 +0,0 @@ -- Support for FASTA etc.. -- Mapping/properties for 'structure' \ No newline at end of file diff --git a/src/mol-model/sequence/constants.ts b/src/mol-model/sequence/constants.ts new file mode 100644 index 0000000000000000000000000000000000000000..192d137f5bbd93804bbcdfe509d8f4cf20aacc02 --- /dev/null +++ b/src/mol-model/sequence/constants.ts @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +export type AminoAlphabet = + | 'H' | 'R' | 'K' | 'I' | 'F' | 'L' | 'W' | 'A' | 'M' | 'P' | 'C' | 'N' | 'V' | 'G' | 'S' | 'Q' | 'Y' | 'D' | 'E' | 'T' | 'U' | 'O' + | 'X' /** = Unknown */ + +export type NuclecicAlphabet = + | 'A' | 'C' | 'G' | 'T' | 'U' + | 'X' /** = Unknown */ + +// from NGL +const ProteinOneLetterCodes: { [name: string]: AminoAlphabet } = { + 'HIS': 'H', + 'ARG': 'R', + 'LYS': 'K', + 'ILE': 'I', + 'PHE': 'F', + 'LEU': 'L', + 'TRP': 'W', + 'ALA': 'A', + 'MET': 'M', + 'PRO': 'P', + 'CYS': 'C', + 'ASN': 'N', + 'VAL': 'V', + 'GLY': 'G', + 'SER': 'S', + 'GLN': 'Q', + 'TYR': 'Y', + 'ASP': 'D', + 'GLU': 'E', + 'THR': 'T', + + 'SEC': 'U', // as per IUPAC definition + 'PYL': 'O', // as per IUPAC definition +} + +const DnaOneLetterCodes: { [name: string]: NuclecicAlphabet } = { + 'DA': 'A', + 'DC': 'C', + 'DG': 'G', + 'DT': 'T', + 'DU': 'U' +} + +const RnaOneLetterCodes: { [name: string]: NuclecicAlphabet } = { + 'A': 'A', + 'C': 'C', + 'G': 'G', + 'T': 'T', + 'U': 'U' +} + +export function getProteinOneLetterCode(residueName: string): AminoAlphabet { + const code = ProteinOneLetterCodes[residueName]; + return code || 'X'; +} + +export function getRnaOneLetterCode(residueName: string): NuclecicAlphabet { + const code = RnaOneLetterCodes[residueName]; + return code || 'X'; +} + +export function getDnaOneLetterCode(residueName: string): NuclecicAlphabet { + const code = DnaOneLetterCodes[residueName]; + return code || 'X'; +} \ No newline at end of file diff --git a/src/mol-model/sequence/sequence.ts b/src/mol-model/sequence/sequence.ts new file mode 100644 index 0000000000000000000000000000000000000000..7f99ca54ce82ad570fb6f3d76e24d49f730b23f4 --- /dev/null +++ b/src/mol-model/sequence/sequence.ts @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { AminoAlphabet, NuclecicAlphabet, getProteinOneLetterCode, getRnaOneLetterCode, getDnaOneLetterCode } from './constants'; +import { Column } from 'mol-data/db' + +// TODO add mapping support to other sequence spaces, e.g. uniprot +// TODO sequence alignment (take NGL code as starting point) + +type Sequence = Sequence.Protein | Sequence.DNA | Sequence.RNA | Sequence.Generic + +namespace Sequence { + export const enum Kind { + Protein = 'protein', + RNA = 'RNA', + DNA = 'DNA', + Generic = 'generic' + } + + export interface Base<K extends Kind, Alphabet extends string> { + readonly kind: K, + readonly offset: number, + readonly sequence: ArrayLike<Alphabet> + } + + export interface Protein extends Base<Kind.Protein, AminoAlphabet> { } + export interface RNA extends Base<Kind.RNA, NuclecicAlphabet> { } + export interface DNA extends Base<Kind.DNA, NuclecicAlphabet> { } + export interface Generic extends Base<Kind.Generic, 'X'> { } + + export function create(kind: Kind, sequence: string, offset: number = 0): Sequence { + return { kind: kind as any, sequence: sequence as any, offset }; + } + + export function getSequenceString(seq: Sequence) { + return seq.sequence as string; + } + + function determineKind(names: Column<string>) { + for (let i = 0, _i = Math.min(names.rowCount, 10); i < _i; i++) { + const name = names.value(i) || ''; + if (getProteinOneLetterCode(name) !== 'X') return { kind: Kind.Protein, code: getProteinOneLetterCode }; + if (getRnaOneLetterCode(name) !== 'X') return { kind: Kind.RNA, code: getRnaOneLetterCode }; + if (getDnaOneLetterCode(name) !== 'X') return { kind: Kind.DNA, code: getDnaOneLetterCode }; + } + return { kind: Kind.Generic, code: (v: string) => 'X' }; + } + + export function ofResidueNames(residueName: Column<string>, seqId: Column<number>): Sequence { + if (seqId.rowCount === 0) throw new Error('cannot be empty'); + + const { kind, code } = determineKind(residueName); + return new Impl(kind, residueName, seqId, code) as Sequence; + } + + class Impl implements Base<any, any> { + private _offset = 0; + private _seq: string | undefined = void 0; + + get offset() { + if (this._seq !== void 0) return this._offset; + this.create(); + return this._offset; + } + + get sequence(): any { + if (this._seq !== void 0) return this._seq; + this.create(); + return this._seq; + } + + private create() { + let maxSeqId = 0, minSeqId = Number.MAX_SAFE_INTEGER; + for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) { + const id = this.seqId.value(i); + if (maxSeqId < id) maxSeqId = id; + if (id < minSeqId) minSeqId = id; + } + + const count = maxSeqId - minSeqId + 1; + const sequenceArray = new Array(maxSeqId + 1); + for (let i = 0; i < count; i++) { + sequenceArray[i] = 'X'; + } + + for (let i = 0, _i = this.seqId.rowCount; i < _i; i++) { + sequenceArray[this.seqId.value(i) - minSeqId] = this.code(this.residueName.value(i) || ''); + } + + this._seq = sequenceArray.join(''); + this._offset = minSeqId - 1; + } + + constructor(public kind: Kind, private residueName: Column<string>, private seqId: Column<number>, private code: (name: string) => string) { + + } + } +} + +export { Sequence } diff --git a/src/mol-model/structure/model/formats/mmcif/sequence.ts b/src/mol-model/structure/model/formats/mmcif/sequence.ts index c9f0825208a9538a3bac331162776d6d84989fbc..06f0c888d9af880650668dfe69776c0308aae9ce 100644 --- a/src/mol-model/structure/model/formats/mmcif/sequence.ts +++ b/src/mol-model/structure/model/formats/mmcif/sequence.ts @@ -5,10 +5,11 @@ */ import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif' -import Sequence from '../../properties/sequence' +import StructureSequence from '../../properties/sequence' import { Column } from 'mol-data/db'; import { AtomicHierarchy } from '../../properties/atomic'; import { Entities } from '../../properties/common'; +import { Sequence } from '../../../../sequence'; // TODO how to handle microheterogeneity // see http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/entity_poly_seq.html @@ -20,12 +21,12 @@ import { Entities } from '../../properties/common'; // corresponding ATOM_SITE entries should reflect this // heterogeneity. -export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): Sequence { - if (!cif.entity_poly_seq._rowCount) return Sequence.fromAtomicHierarchy(entities, hierarchy); +export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHierarchy): StructureSequence { + if (!cif.entity_poly_seq._rowCount) return StructureSequence.fromAtomicHierarchy(entities, hierarchy); const { entity_id, num, mon_id } = cif.entity_poly_seq; - const byEntityKey: Sequence['byEntityKey'] = {}; + const byEntityKey: StructureSequence['byEntityKey'] = {}; const count = entity_id.rowCount; let i = 0; @@ -35,7 +36,15 @@ export function getSequence(cif: mmCIF, entities: Entities, hierarchy: AtomicHie i++; const id = entity_id.value(start); - byEntityKey[entities.getEntityIndex(id)] = { entityId: id, compId: Column.window(mon_id, start, i), num: Column.window(num, start, i) } + const _compId = Column.window(mon_id, start, i); + const _num = Column.window(num, start, i); + + byEntityKey[entities.getEntityIndex(id)] = { + entityId: id, + compId: _compId, + num: _num, + sequence: Sequence.ofResidueNames(_compId, _num) + }; } return { byEntityKey }; diff --git a/src/mol-model/structure/model/model.ts b/src/mol-model/structure/model/model.ts index 214dc8d8687ce1b59cfd871513163622b2162420..da0037d65dff36c294992e38a77c7d5d0d109b53 100644 --- a/src/mol-model/structure/model/model.ts +++ b/src/mol-model/structure/model/model.ts @@ -6,7 +6,7 @@ import UUID from 'mol-util/uuid' import Format from './format' -import Sequence from './properties/sequence' +import StructureSequence from './properties/sequence' import { AtomicHierarchy, AtomicConformation } from './properties/atomic' import { ModelSymmetry } from './properties/symmetry' import { CoarseHierarchy, CoarseConformation } from './properties/coarse' @@ -31,7 +31,7 @@ interface Model extends Readonly<{ symmetry: ModelSymmetry, entities: Entities, - sequence: Sequence, + sequence: StructureSequence, atomicHierarchy: AtomicHierarchy, atomicConformation: AtomicConformation, diff --git a/src/mol-model/structure/model/properties/sequence.ts b/src/mol-model/structure/model/properties/sequence.ts index f10338b174124e571eb7f069bd0d34da39eb8001..a2f53c56df7c790bbbacadc67e1d28a4f983a6ac 100644 --- a/src/mol-model/structure/model/properties/sequence.ts +++ b/src/mol-model/structure/model/properties/sequence.ts @@ -7,59 +7,55 @@ import { Column } from 'mol-data/db' import { AtomicHierarchy } from './atomic/hierarchy'; import { Entities } from './common'; +import { Sequence } from '../../../sequence'; -interface Sequence { - readonly byEntityKey: { [key: number]: Sequence.Entity } +interface StructureSequence { + readonly byEntityKey: { [key: number]: StructureSequence.Entity } } -// TODO lift to model/sequence/ folder -// TODO add one letter code sequence string -// TODO add mapping support to other sequence spaces, e.g. uniprot -// TODO add sequence kind, e.g. protein, dna, rna (alphabets?) -// TODO sequence alignment (take NGL code as starting point) - -namespace Sequence { +namespace StructureSequence { export interface Entity { readonly entityId: string, - readonly num: Column<number> - // _entity_poly_seq.mon_id - readonly compId: Column<string> + readonly num: Column<number>, + // Corresponds to _entity_poly_seq.mon_id + readonly compId: Column<string>, + readonly sequence: Sequence } - export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): Sequence { - const { label_entity_id } = hierarchy.chains + export function fromAtomicHierarchy(entities: Entities, hierarchy: AtomicHierarchy): StructureSequence { const { label_comp_id, label_seq_id } = hierarchy.residues const { chainSegments, residueSegments } = hierarchy - const byEntityKey: Sequence['byEntityKey'] = {}; + const byEntityKey: StructureSequence['byEntityKey'] = { }; - // TODO get min/max of label_seq_id to handle missing residues at start and in between - // note that this assumes label_seq_id is monotonically increasing + for (let cI = 0, _cI = hierarchy.chains._rowCount; cI < _cI; cI++) { + const entityKey = hierarchy.entityKey[cI]; + // Only for polymers, trying to mirror _entity_poly_seq + if (byEntityKey[entityKey] !== void 0 || entities.data.type.value(entityKey) !== 'polymer') continue; - const chainCount = hierarchy.chains._rowCount - for (let i = 0; i < chainCount; ++i) { - const entityId = label_entity_id.value(i) - const entityIndex = entities.getEntityIndex(entityId) - // TODO only for polymers, mirroring _entity_poly_seq, ok??? - if (entities.data.type.value(i) !== 'polymer') continue + let start = cI; + cI++; + while (cI < _cI && entityKey === hierarchy.entityKey[cI] && entities.data.type.value(entityKey) !== 'polymer') { + cI++; + } + cI--; - const entityKey = hierarchy.entityKey[entityIndex] - if (byEntityKey[entityKey] !== undefined) continue + const rStart = residueSegments.segmentMap[chainSegments.segments[start]]; + const rEnd = residueSegments.segmentMap[chainSegments.segments[cI + 1]]; - const start = residueSegments.segmentMap[chainSegments.segments[i]] - let end = residueSegments.segmentMap[chainSegments.segments[i + 1]] - // TODO better way to handle end??? - if (end === undefined) end = hierarchy.residues._rowCount + const compId = Column.window(label_comp_id, rStart, rEnd); + const num = Column.window(label_seq_id, rStart, rEnd); byEntityKey[entityKey] = { - entityId, - compId: Column.window(label_comp_id, start, end), - num: Column.window(label_seq_id, start, end) - } + entityId: entities.data.id.value(entityKey), + compId, + num, + sequence: Sequence.ofResidueNames(compId, num) + }; } return { byEntityKey } } } -export default Sequence \ No newline at end of file +export default StructureSequence \ No newline at end of file diff --git a/src/mol-model/structure/structure/unit/bonds/intra-compute.ts b/src/mol-model/structure/structure/unit/bonds/intra-compute.ts index 816d72cb7e1fcbda3ee5f94a9e587784522598fd..ba344073c94604ba612a5dfafe4bd100770e30bb 100644 --- a/src/mol-model/structure/structure/unit/bonds/intra-compute.ts +++ b/src/mol-model/structure/structure/unit/bonds/intra-compute.ts @@ -8,6 +8,7 @@ import { BondType, ElementSymbol } from '../../../model/types' import { IntraUnitBonds } from './intra-data' import { StructConn, ComponentBondInfo } from '../../../model/formats/mmcif/bonds' import Unit from '../../unit' +import { IntGraph } from 'mol-math/graph'; export interface BondComputationParameters { maxHbondLength: number, @@ -62,48 +63,17 @@ function isHydrogen(i: number) { return i === H_ID; } -function computePerAtomBonds(atomA: number[], atomB: number[], _order: number[], _flags: number[], atomCount: number) { - const bucketSizes = new Int32Array(atomCount); - const bucketOffsets = new Int32Array(atomCount + 1) as any as number[]; - const bucketFill = new Int32Array(atomCount); - - for (const i of atomA) bucketSizes[i]++; - for (const i of atomB) bucketSizes[i]++; - - let offset = 0; - for (let i = 0; i < atomCount; i++) { - bucketOffsets[i] = offset; - offset += bucketSizes[i]; +function getGraph(atomA: number[], atomB: number[], _order: number[], _flags: number[], atomCount: number): IntraUnitBonds { + const builder = new IntGraph.EdgeBuilder(atomCount, atomA, atomB); + const flags = new Uint16Array(builder.slotCount); + const order = new Int8Array(builder.slotCount); + for (let i = 0, _i = builder.edgeCount; i < _i; i++) { + builder.addNextEdge(); + builder.assignProperty(flags, _flags[i]); + builder.assignProperty(order, _order[i]); } - bucketOffsets[atomCount] = offset; - - const neighbor = new Int32Array(offset) as any as number[]; - const flags = new Uint16Array(offset) as any as number[]; - const order = new Int8Array(offset) as any as number[]; - - for (let i = 0, _i = atomA.length; i < _i; i++) { - const a = atomA[i], b = atomB[i], f = _flags[i], o = _order[i]; - - const oa = bucketOffsets[a] + bucketFill[a]; - const ob = bucketOffsets[b] + bucketFill[b]; - neighbor[oa] = b; - flags[oa] = f; - order[oa] = o; - bucketFill[a]++; - - neighbor[ob] = a; - flags[ob] = f; - order[ob] = o; - bucketFill[b]++; - } - - return { - offsets: bucketOffsets, - neighbor, - flags, - order - }; + return builder.createGraph({ flags, order }); } function _computeBonds(unit: Unit.Atomic, params: BondComputationParameters): IntraUnitBonds { @@ -231,15 +201,7 @@ function _computeBonds(unit: Unit.Atomic, params: BondComputationParameters): In } } - const bonds = computePerAtomBonds(atomA, atomB, order, flags, atomCount); - - return { - offset: bonds.offsets, - neighbor: bonds.neighbor, - flags: bonds.flags, - order: bonds.order, - count: atomA.length - }; + return getGraph(atomA, atomB, order, flags, atomCount); } function computeIntraUnitBonds(unit: Unit.Atomic, params?: Partial<BondComputationParameters>) { diff --git a/src/mol-model/structure/structure/unit/bonds/intra-data.ts b/src/mol-model/structure/structure/unit/bonds/intra-data.ts index c897e6ca63ed8dff5071f1c52def66e41f3bf935..9914145379663e38ce3882ef4634bd57ae3da57e 100644 --- a/src/mol-model/structure/structure/unit/bonds/intra-data.ts +++ b/src/mol-model/structure/structure/unit/bonds/intra-data.ts @@ -6,24 +6,13 @@ */ import { BondType } from '../../../model/types' +import { IntGraph } from 'mol-math/graph'; -interface IntraUnitBonds { - /** - * Where bonds for atom A start and end. - * Start offset at idx, end at idx + 1 - */ - offset: ArrayLike<number>, - neighbor: ArrayLike<number>, - - order: ArrayLike<number>, - flags: ArrayLike<BondType.Flag>, - - count: number -} +type IntraUnitBonds = IntGraph<{ readonly order: ArrayLike<number>, readonly flags: ArrayLike<BondType.Flag> }> namespace IntraUnitBonds { export function createEmpty(): IntraUnitBonds { - return { offset: [], neighbor: [], order: [], flags: [], count: 0 } + return IntGraph.create([], [], [], 0, { flags: [], order: [] }); } export function isCovalent(flags: number) { return (flags & BondType.Flag.Covalent) !== 0;