From a8d43028399b03dc0f6a7efbc0f21c4f275c1af1 Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Mon, 11 Jun 2018 15:25:37 +0200 Subject: [PATCH] sort mmCIF atom_site --- src/mol-data/util/_spec/buckets.spec.ts | 12 +++++-- src/mol-data/util/buckets.ts | 25 ++++++++++++--- .../structure/model/formats/mmcif.ts | 3 ++ .../structure/model/formats/mmcif/sort.ts | 32 +++++++++++++++++++ 4 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 src/mol-model/structure/model/formats/mmcif/sort.ts diff --git a/src/mol-data/util/_spec/buckets.spec.ts b/src/mol-data/util/_spec/buckets.spec.ts index 2207a53f9..64046ce37 100644 --- a/src/mol-data/util/_spec/buckets.spec.ts +++ b/src/mol-data/util/_spec/buckets.spec.ts @@ -18,16 +18,24 @@ describe('buckets', () => { it('full range', () => { const xs = [1, 1, 2, 2, 3, 1]; const range = createRangeArray(0, xs.length - 1); - const bs = makeBuckets(range, i => xs[i]); + const bs = makeBuckets(range, i => xs[i], false); expect(reorder(range, xs)).toEqual([1, 1, 1, 2, 2, 3]); expect(Array.from(bs)).toEqual([0, 3, 5, 6]); }); + it('sort', () => { + const xs = [3, 1, 2, 1, 2, 3]; + const range = createRangeArray(0, xs.length - 1); + makeBuckets(range, i => xs[i], true); + + expect(reorder(range, xs)).toEqual([1, 1, 2, 2, 3, 3]); + }); + it('subrange', () => { const xs = [2, 1, 2, 1, 2, 3, 1]; const range = createRangeArray(0, xs.length - 1); - const bs = makeBuckets(range, i => xs[i], 1, 5); + const bs = makeBuckets(range, i => xs[i], false, 1, 5); expect(reorder(range, xs)).toEqual([2, 1, 1, 2, 2, 3, 1]); expect(Array.from(bs)).toEqual([1, 3, 5]); diff --git a/src/mol-data/util/buckets.ts b/src/mol-data/util/buckets.ts index 58a747e9d..33bbc3d79 100644 --- a/src/mol-data/util/buckets.ts +++ b/src/mol-data/util/buckets.ts @@ -5,11 +5,12 @@ */ type Bucket = { + key: any, count: number, offset: number } -function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, start: number, end: number) { +function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, sort: boolean, start: number, end: number) { const buckets = new Map<any, Bucket>(); const bucketList: Bucket[] = []; @@ -21,7 +22,7 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => buckets.get(key)!.count++; if (prevKey !== key) isBucketed = false; } else { - const bucket: Bucket = { count: 1, offset: i }; + const bucket: Bucket = { key, count: 1, offset: i }; buckets.set(key, bucket); bucketList[bucketList.length] = bucket; } @@ -31,11 +32,25 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => const bucketOffsets = new Int32Array(bucketList.length + 1); bucketOffsets[bucketList.length] = end; - if (isBucketed) { + let sorted = true; + if (sort) { + for (let i = 1, _i = bucketList.length; i < _i; i++) { + if (bucketList[i - 1].key > bucketList[i].key) { + sorted = false; + break; + } + } + } + + if (isBucketed && sorted) { for (let i = 0; i < bucketList.length; i++) bucketOffsets[i] = bucketList[i].offset; return bucketOffsets; } + if (sort && !sorted) { + bucketList.sort((x, y) => x.key <= y.key ? -1 : 1); + } + let offset = 0; for (let i = 0; i < bucketList.length; i++) { const b = bucketList[i]; @@ -64,11 +79,11 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => * Reorders indices so that the same keys are next to each other, [start, end) * Returns the offsets of buckets. So that [offsets[i], offsets[i + 1]) determines the range. */ -export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, start?: number, end?: number): ArrayLike<number> { +export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, sort: boolean, start?: number, end?: number): ArrayLike<number> { const s = start || 0; const e = typeof end === 'undefined' ? indices.length : end; if (e - s <= 0) throw new Error('Can only bucket non-empty collections.'); - return _makeBuckets(indices, getKey, s, e); + return _makeBuckets(indices, getKey, sort, s, e); } \ No newline at end of file diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts index a5c8c5008..f83a449f0 100644 --- a/src/mol-model/structure/model/formats/mmcif.ts +++ b/src/mol-model/structure/model/formats/mmcif.ts @@ -23,6 +23,7 @@ import { getSequence } from './mmcif/sequence'; import mmCIF_Format = Format.mmCIF import { Task } from 'mol-task'; import { getSecondaryStructureMmCif } from './mmcif/secondary-structure'; +import { sortAtomSite } from './mmcif/sort'; function findModelBounds({ data }: mmCIF_Format, startIndex: number) { const num = data.atom_site.pdbx_PDB_model_num; @@ -196,6 +197,8 @@ function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> { let modelStart = 0; while (modelStart < atomCount) { const bounds = findModelBounds(format, modelStart); + + // const indices = await sortAtomSite(ctx, format.data.atom_site, 0, Interval.end(bounds)); const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0); models.push(model); modelStart = Interval.end(bounds); diff --git a/src/mol-model/structure/model/formats/mmcif/sort.ts b/src/mol-model/structure/model/formats/mmcif/sort.ts new file mode 100644 index 000000000..aa071aedb --- /dev/null +++ b/src/mol-model/structure/model/formats/mmcif/sort.ts @@ -0,0 +1,32 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif'; +import { createRangeArray, makeBuckets } from 'mol-data/util'; +import { Column } from 'mol-data/db'; +import { RuntimeContext } from 'mol-task'; + +export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Database['atom_site'], start: number, end: number) { + const indices = createRangeArray(start, end - 1); + + const { label_entity_id, label_asym_id, label_seq_id } = atom_site; + const entityBuckets = makeBuckets(indices, label_entity_id.value, false); + if (ctx.shouldUpdate) await ctx.update(); + for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) { + const chainBuckets = makeBuckets(indices, label_asym_id.value, false, entityBuckets[ei], entityBuckets[ei + 1]); + for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) { + const aI = chainBuckets[cI]; + // are we in HETATM territory? + if (label_seq_id.valueKind(aI) !== Column.ValueKind.Present) continue; + + makeBuckets(indices, label_seq_id.value, true, aI, chainBuckets[cI + 1]); + if (ctx.shouldUpdate) await ctx.update(); + } + if (ctx.shouldUpdate) await ctx.update(); + } + + return indices; +} \ No newline at end of file -- GitLab