diff --git a/src/mol-base/collections/column.ts b/src/mol-base/collections/column.ts index e510026600e82f5e08c7b7e739b159f492da262f..a66c3a2e414d7f514ca873cca308f56a9525db94 100644 --- a/src/mol-base/collections/column.ts +++ b/src/mol-base/collections/column.ts @@ -109,7 +109,7 @@ namespace Column { export default Column; -function createFirstIndexMapOfColumn<T>(c: Column<T>): Map<T, number> | undefined { +function createFirstIndexMapOfColumn<T>(c: Column<T>): Map<T, number> { const map = new Map<T, number>(); for (let i = 0, _i = c.rowCount; i < _i; i++) { const v = c.value(i); diff --git a/src/mol-base/collections/integer/impl/segmentation.ts b/src/mol-base/collections/integer/impl/segmentation.ts index 254191098fcaaf6932cb653565b3ea4f08e20e81..806ccde0d2351fdc25bce76e75cb71d5e5dfd2f2 100644 --- a/src/mol-base/collections/integer/impl/segmentation.ts +++ b/src/mol-base/collections/integer/impl/segmentation.ts @@ -94,7 +94,7 @@ class SegmentIterator implements Iterator<Segs.Segment> { } } -export function segments(segs: Segmentation, set: OrderedSet, range?: Interval) { - const int = typeof range !== 'undefined' ? range : Interval.ofBounds(0, OrderedSet.size(set)); +export function segments(segs: Segmentation, set: OrderedSet, segment?: Segs.Segment) { + const int = typeof segment !== 'undefined' ? Interval.ofBounds(segment.start, segment.end) : Interval.ofBounds(0, OrderedSet.size(set)); return new SegmentIterator(segs.segments, set, int); } \ No newline at end of file diff --git a/src/mol-base/collections/integer/segmentation.ts b/src/mol-base/collections/integer/segmentation.ts index 275b44f6dacc6e97de063b157692ef1241d6dd6b..b986fe401a3e9ee4dc1dc6075d0c7a2a65e47c93 100644 --- a/src/mol-base/collections/integer/segmentation.ts +++ b/src/mol-base/collections/integer/segmentation.ts @@ -20,7 +20,7 @@ namespace Segmentation { export const projectValue: (segs: Segmentation, set: OrderedSet, value: number) => Interval = Impl.projectValue as any; // Segment iterator that mutates a single segment object to mark all the segments. - export const transientSegments: (segs: Segmentation, set: OrderedSet, range?: Interval) => Iterator<Segment> = Impl.segments as any; + export const transientSegments: (segs: Segmentation, set: OrderedSet, segment?: Segment) => Iterator<Segment> = Impl.segments as any; } interface Segmentation { diff --git a/src/mol-data/model/properties/hierarchy.ts b/src/mol-data/model/properties/hierarchy.ts index 1e7cfd5efdf9989076f7756f432149f1828bb240..04cb70e1a71397b72e6cf3f30f96d8e1556b62e9 100644 --- a/src/mol-data/model/properties/hierarchy.ts +++ b/src/mol-data/model/properties/hierarchy.ts @@ -6,6 +6,7 @@ import Column from '../../../mol-base/collections/column' import Table from '../../../mol-base/collections/table' +import Segmentation from '../../../mol-base/collections/integer/segmentation' import { Schema as mmCIF } from '../../../mol-io/reader/cif/schema/mmcif' const _esCache = Object.create(null); @@ -35,7 +36,7 @@ export const ResiduesSchema = { pdbx_PDB_ins_code: mmCIF.atom_site.pdbx_PDB_ins_code }; -export interface Residues extends Table<typeof AtomsSchema> { } +export interface Residues extends Table<typeof ResiduesSchema> { } export const ChainsSchema = { label_asym_id: mmCIF.atom_site.label_asym_id, @@ -57,10 +58,15 @@ export interface HierarchyData { entities: Entities } +export interface HierarchySegmentation { + residues: Segmentation, + chains: Segmentation +} + export interface HierarchyKeys { // indicate whether the keys form an increasing sequence (in other words, the residues are sorted). // monotonous sequences enable for example faster secodnary structure assignment. - isMonotonous: number, + isMonotonous: boolean, // assign a key to each residue index. residue: ArrayLike<number>, @@ -71,8 +77,8 @@ export interface HierarchyKeys { entity: ArrayLike<number>, findEntity(id: string): number, - findChain(entityId: string, label_asym_id: string): number, - findResidue(entityId: string, label_asym_id: string, label_comp_id: string, label_seq_id: number, pdbx_PDB_ins_code: string): number + findChain(entityId: string, auth_asym_id: string): number, + findResidue(entityId: string, auth_asym_id: string, auth_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code: string): number } export interface Hierarchy extends HierarchyData { diff --git a/src/mol-data/model/utils/hierarchy-keys.ts b/src/mol-data/model/utils/hierarchy-keys.ts index f65eb69ab2d7a8ffa1fb22542feb87919a5a4606..2558637bea09030c295844361e4e10bd2a75c62c 100644 --- a/src/mol-data/model/utils/hierarchy-keys.ts +++ b/src/mol-data/model/utils/hierarchy-keys.ts @@ -4,11 +4,102 @@ * @author David Sehnal <david.sehnal@gmail.com> */ -import { HierarchyData, HierarchyKeys } from '../properties/hierarchy' +import Column from '../../../mol-base/collections/column' +import { HierarchyData, HierarchySegmentation, HierarchyKeys } from '../properties/hierarchy' +import Segmentation from '../../../mol-base/collections/integer/segmentation' +import Interval from '../../../mol-base/collections/integer/interval' -function create(data: HierarchyData): HierarchyKeys { +function getResidueId(comp_id: string, seq_id: number, ins_code: string) { + return `${comp_id} ${seq_id} ${ins_code}`; +} + +function getElementKey(map: Map<string, number>, key: string, counter: { index: number }) { + if (map.has(key)) return map.get(key)!; + const ret = counter.index++; + map.set(key, ret); + return ret; +} + +function getElementSubstructureKeyMap(map: Map<number, Map<string, number>>, key: number) { + if (map.has(key)) return map.get(key)!; + const ret = new Map<string, number>(); + map.set(key, ret); + return ret; +} + +function createLookUp(entity: Map<string, number>, chain: Map<number, Map<string, number>>, residue: Map<number, Map<string, number>>) { + const findEntity: HierarchyKeys['findEntity'] = (id) => entity.has(id) ? entity.get(id)! : -1; + const findChain: HierarchyKeys['findChain'] = (e, c) => { + if (!entity.has(e)) return -1; + const cm = chain.get(entity.get(e)!)!; + if (!cm.has(c)) return -1; + return cm.get(c)!; + } + const findResidue: HierarchyKeys['findResidue'] = (e, c, name, seq, ins) => { + if (!entity.has(e)) return -1; + const cm = chain.get(entity.get(e)!)!; + if (!cm.has(c)) return -1; + const rm = residue.get(cm.get(c)!)! + const id = getResidueId(name, seq, ins); + if (!rm.has(id)) return -1; + return rm.get(id)!; + } + return { findEntity, findChain, findResidue }; +} + +function isMonotonous(xs: ArrayLike<number>) { + for (let i = 1, _i = xs.length; i < _i; i++) { + if (xs[i] < xs[i - 1]) return false; + } + return true; +} + +function create(data: HierarchyData, segments: HierarchySegmentation): HierarchyKeys { + const { chains, residues, entities } = data; + + const entityMap = Column.createFirstIndexMap(entities.id); + const chainMaps = new Map<number, Map<string, number>>(), chainCounter = { index: 0 }; + const residueMaps = new Map<number, Map<string, number>>(), residueCounter = { index: 0 }; + + const residueKey = new Int32Array(residues._rowCount); + const chainKey = new Int32Array(chains._rowCount); + const entityKey = new Int32Array(chains._rowCount); + + const { auth_comp_id, auth_seq_id, pdbx_PDB_ins_code } = data.residues; + const { label_entity_id, auth_asym_id } = data.chains; + + const chainsIt = Segmentation.transientSegments(segments.chains, Interval.ofBounds(0, data.atoms._rowCount)); + while (chainsIt.hasNext) { + const chainSegment = chainsIt.move(); + const residuesIt = Segmentation.transientSegments(segments.residues, Interval.ofBounds(chainSegment.start, chainSegment.end)); + const cI = chainSegment.index; + + const eKey = entityMap.get(label_entity_id.value(cI)) || 0; + const chainMap = getElementSubstructureKeyMap(chainMaps, eKey); + const cKey = getElementKey(chainMap, auth_asym_id.value(cI), chainCounter); + + chainKey[cI] = cKey; + entityKey[cI] = eKey; + + const residueMap = getElementSubstructureKeyMap(residueMaps, cKey); + while (residuesIt.hasNext) { + const rI = residuesIt.move().index; + const residueId = getResidueId(auth_comp_id.value(rI), auth_seq_id.value(rI), pdbx_PDB_ins_code.value(rI)); + residueKey[rI] = getElementKey(residueMap, residueId, residueCounter); + } + } + + const { findEntity, findChain, findResidue } = createLookUp(entityMap, chainMaps, residueMaps); - return 0 as any; + return { + isMonotonous: isMonotonous(entityKey) && isMonotonous(chainKey) && isMonotonous(residueKey), + residue: residueKey, + chain: chainKey, + entity: entityKey, + findEntity, + findChain, + findResidue + }; } export default create; \ No newline at end of file diff --git a/src/perf-tests/sets.ts b/src/perf-tests/sets.ts index 6d3a2cbf1a6487c702fe0ccfc77eb7cd755eb113..b94f63786ea06b283a03884c5f5341a5feefd61f 100644 --- a/src/perf-tests/sets.ts +++ b/src/perf-tests/sets.ts @@ -284,7 +284,56 @@ export function testSegments() { } } -Build.run(); +export namespace ObjectVsMap { + function objCreate(keys: string[]) { + const m = Object.create(null); + m.x = 0; + delete m.x; + for (let i = 0, _i = keys.length; i < _i; i++) { + m[keys[i]] = i * i; + } + return m; + } + + function mapCreate(keys: string[]) { + const m = new Map<string, number>(); + for (let i = 0, _i = keys.length; i < _i; i++) { + m.set(keys[i], i * i); + } + return m; + } + + function objQuery(keys: string[], n: number, obj: any) { + let ret = 0; + for (let i = 0; i < n; i++) ret += obj[keys[i % n]]; + return ret; + } + + function mapQuery(keys: string[], n: number, map: Map<string, number>) { + let ret = 0; + for (let i = 0; i < n; i++) ret += map.get(keys[i % n])!; + return ret; + } + + export function run() { + const suite = new B.Suite(); + const keys: string[] = []; + for (let i = 0; i < 1000; i++) keys[i] = 'k' + i; + + const N = 100000; + const obj = objCreate(keys); + const map = mapCreate(keys); + suite + .add('c obj', () => objCreate(keys)) + .add('c map', () => mapCreate(keys)) + .add('q obj', () => objQuery(keys, N, obj)) + .add('q map', () => mapQuery(keys, N, map)) + .on('cycle', (e: any) => console.log(String(e.target))) + .run(); + } +} + +ObjectVsMap.run(); //testSegments();