Skip to content
Snippets Groups Projects
Commit a8d43028 authored by David Sehnal's avatar David Sehnal
Browse files

sort mmCIF atom_site

parent 636f6725
No related branches found
No related tags found
No related merge requests found
...@@ -18,16 +18,24 @@ describe('buckets', () => { ...@@ -18,16 +18,24 @@ describe('buckets', () => {
it('full range', () => { it('full range', () => {
const xs = [1, 1, 2, 2, 3, 1]; const xs = [1, 1, 2, 2, 3, 1];
const range = createRangeArray(0, xs.length - 1); const range = createRangeArray(0, xs.length - 1);
const bs = makeBuckets(range, i => xs[i]); const bs = makeBuckets(range, i => xs[i], false);
expect(reorder(range, xs)).toEqual([1, 1, 1, 2, 2, 3]); expect(reorder(range, xs)).toEqual([1, 1, 1, 2, 2, 3]);
expect(Array.from(bs)).toEqual([0, 3, 5, 6]); expect(Array.from(bs)).toEqual([0, 3, 5, 6]);
}); });
it('sort', () => {
const xs = [3, 1, 2, 1, 2, 3];
const range = createRangeArray(0, xs.length - 1);
makeBuckets(range, i => xs[i], true);
expect(reorder(range, xs)).toEqual([1, 1, 2, 2, 3, 3]);
});
it('subrange', () => { it('subrange', () => {
const xs = [2, 1, 2, 1, 2, 3, 1]; const xs = [2, 1, 2, 1, 2, 3, 1];
const range = createRangeArray(0, xs.length - 1); const range = createRangeArray(0, xs.length - 1);
const bs = makeBuckets(range, i => xs[i], 1, 5); const bs = makeBuckets(range, i => xs[i], false, 1, 5);
expect(reorder(range, xs)).toEqual([2, 1, 1, 2, 2, 3, 1]); expect(reorder(range, xs)).toEqual([2, 1, 1, 2, 2, 3, 1]);
expect(Array.from(bs)).toEqual([1, 3, 5]); expect(Array.from(bs)).toEqual([1, 3, 5]);
......
...@@ -5,11 +5,12 @@ ...@@ -5,11 +5,12 @@
*/ */
type Bucket = { type Bucket = {
key: any,
count: number, count: number,
offset: number offset: number
} }
function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, start: number, end: number) { function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => any, sort: boolean, start: number, end: number) {
const buckets = new Map<any, Bucket>(); const buckets = new Map<any, Bucket>();
const bucketList: Bucket[] = []; const bucketList: Bucket[] = [];
...@@ -21,7 +22,7 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => ...@@ -21,7 +22,7 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
buckets.get(key)!.count++; buckets.get(key)!.count++;
if (prevKey !== key) isBucketed = false; if (prevKey !== key) isBucketed = false;
} else { } else {
const bucket: Bucket = { count: 1, offset: i }; const bucket: Bucket = { key, count: 1, offset: i };
buckets.set(key, bucket); buckets.set(key, bucket);
bucketList[bucketList.length] = bucket; bucketList[bucketList.length] = bucket;
} }
...@@ -31,11 +32,25 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => ...@@ -31,11 +32,25 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
const bucketOffsets = new Int32Array(bucketList.length + 1); const bucketOffsets = new Int32Array(bucketList.length + 1);
bucketOffsets[bucketList.length] = end; bucketOffsets[bucketList.length] = end;
if (isBucketed) { let sorted = true;
if (sort) {
for (let i = 1, _i = bucketList.length; i < _i; i++) {
if (bucketList[i - 1].key > bucketList[i].key) {
sorted = false;
break;
}
}
}
if (isBucketed && sorted) {
for (let i = 0; i < bucketList.length; i++) bucketOffsets[i] = bucketList[i].offset; for (let i = 0; i < bucketList.length; i++) bucketOffsets[i] = bucketList[i].offset;
return bucketOffsets; return bucketOffsets;
} }
if (sort && !sorted) {
bucketList.sort((x, y) => x.key <= y.key ? -1 : 1);
}
let offset = 0; let offset = 0;
for (let i = 0; i < bucketList.length; i++) { for (let i = 0; i < bucketList.length; i++) {
const b = bucketList[i]; const b = bucketList[i];
...@@ -64,11 +79,11 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) => ...@@ -64,11 +79,11 @@ function _makeBuckets(indices: Helpers.ArrayLike<number>, getKey: (i: number) =>
* Reorders indices so that the same keys are next to each other, [start, end) * Reorders indices so that the same keys are next to each other, [start, end)
* Returns the offsets of buckets. So that [offsets[i], offsets[i + 1]) determines the range. * Returns the offsets of buckets. So that [offsets[i], offsets[i + 1]) determines the range.
*/ */
export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, start?: number, end?: number): ArrayLike<number> { export function makeBuckets<T>(indices: Helpers.ArrayLike<number>, getKey: (i: number) => string | number, sort: boolean, start?: number, end?: number): ArrayLike<number> {
const s = start || 0; const s = start || 0;
const e = typeof end === 'undefined' ? indices.length : end; const e = typeof end === 'undefined' ? indices.length : end;
if (e - s <= 0) throw new Error('Can only bucket non-empty collections.'); if (e - s <= 0) throw new Error('Can only bucket non-empty collections.');
return _makeBuckets(indices, getKey, s, e); return _makeBuckets(indices, getKey, sort, s, e);
} }
\ No newline at end of file
...@@ -23,6 +23,7 @@ import { getSequence } from './mmcif/sequence'; ...@@ -23,6 +23,7 @@ import { getSequence } from './mmcif/sequence';
import mmCIF_Format = Format.mmCIF import mmCIF_Format = Format.mmCIF
import { Task } from 'mol-task'; import { Task } from 'mol-task';
import { getSecondaryStructureMmCif } from './mmcif/secondary-structure'; import { getSecondaryStructureMmCif } from './mmcif/secondary-structure';
import { sortAtomSite } from './mmcif/sort';
function findModelBounds({ data }: mmCIF_Format, startIndex: number) { function findModelBounds({ data }: mmCIF_Format, startIndex: number) {
const num = data.atom_site.pdbx_PDB_model_num; const num = data.atom_site.pdbx_PDB_model_num;
...@@ -196,6 +197,8 @@ function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> { ...@@ -196,6 +197,8 @@ function buildModels(format: mmCIF_Format): Task<ReadonlyArray<Model>> {
let modelStart = 0; let modelStart = 0;
while (modelStart < atomCount) { while (modelStart < atomCount) {
const bounds = findModelBounds(format, modelStart); const bounds = findModelBounds(format, modelStart);
// const indices = await sortAtomSite(ctx, format.data.atom_site, 0, Interval.end(bounds));
const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0); const model = createModel(format, bounds, models.length > 0 ? models[models.length - 1] : void 0);
models.push(model); models.push(model);
modelStart = Interval.end(bounds); modelStart = Interval.end(bounds);
......
/**
* Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { mmCIF_Database } from 'mol-io/reader/cif/schema/mmcif';
import { createRangeArray, makeBuckets } from 'mol-data/util';
import { Column } from 'mol-data/db';
import { RuntimeContext } from 'mol-task';
export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Database['atom_site'], start: number, end: number) {
const indices = createRangeArray(start, end - 1);
const { label_entity_id, label_asym_id, label_seq_id } = atom_site;
const entityBuckets = makeBuckets(indices, label_entity_id.value, false);
if (ctx.shouldUpdate) await ctx.update();
for (let ei = 0, _eI = entityBuckets.length - 1; ei < _eI; ei++) {
const chainBuckets = makeBuckets(indices, label_asym_id.value, false, entityBuckets[ei], entityBuckets[ei + 1]);
for (let cI = 0, _cI = chainBuckets.length - 1; cI < _cI; cI++) {
const aI = chainBuckets[cI];
// are we in HETATM territory?
if (label_seq_id.valueKind(aI) !== Column.ValueKind.Present) continue;
makeBuckets(indices, label_seq_id.value, true, aI, chainBuckets[cI + 1]);
if (ctx.shouldUpdate) await ctx.update();
}
if (ctx.shouldUpdate) await ctx.update();
}
return indices;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment