From 530a86dc9207f3fc781fb807dabbb687f34777cc Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Tue, 31 Oct 2017 19:46:57 +0100 Subject: [PATCH] structure data model 1st somewhat working version --- src/mol-base/collections/_spec/table.spec.ts | 12 +++++ src/mol-base/collections/column.ts | 8 +-- .../integer/_spec/segmentation.spec.ts | 50 ++++++++++++++++++- .../collections/integer/impl/segmentation.ts | 20 +++++--- src/mol-base/collections/table.ts | 13 ++++- src/mol-data/model.ts | 7 +-- src/mol-data/model/builders/mmcif.ts | 46 +++++++++++++---- src/mol-data/model/properties/hierarchy.ts | 23 +++++---- src/mol-data/model/utils/hierarchy-keys.ts | 44 +++++++++------- src/mol-data/structure/base.ts | 2 +- src/script.ts | 10 +++- 11 files changed, 173 insertions(+), 62 deletions(-) diff --git a/src/mol-base/collections/_spec/table.spec.ts b/src/mol-base/collections/_spec/table.spec.ts index aa7662ab4..4d79f445b 100644 --- a/src/mol-base/collections/_spec/table.spec.ts +++ b/src/mol-base/collections/_spec/table.spec.ts @@ -96,6 +96,18 @@ describe('table', () => { expect(picked.y.toArray()).toEqual([3, 4]); }); + it('view', () => { + const t = Table.ofColumns<typeof schema>({ + x: Column.ofArray({ array: [10, -1], type: Column.Type.int }), + n: Column.ofArray({ array: ['row1', 'row2'], type: Column.Type.str }), + }); + const s = { x: Column.Type.int }; + const view = Table.view(t, s, [1]); + expect(view._columns).toEqual(['x']); + expect(view._rowCount).toEqual(1); + expect(view.x.toArray()).toEqual([-1]); + }); + it('sort', () => { const t = Table.ofColumns<typeof schema>({ x: Column.ofArray({ array: [10, -1], type: Column.Type.int }), diff --git a/src/mol-base/collections/column.ts b/src/mol-base/collections/column.ts index a66c3a2e4..887a9b60b 100644 --- a/src/mol-base/collections/column.ts +++ b/src/mol-base/collections/column.ts @@ -87,7 +87,7 @@ namespace Column { } export function view<T>(column: Column<T>, indices: ArrayLike<number>, checkIndentity = true) { - return columnPermutation(column, indices, checkIndentity); + return columnView(column, indices, checkIndentity); } /** A map of the 1st occurence of each value. */ @@ -192,7 +192,7 @@ function arrayColumn<T extends Column.Type>({ array, type, valueKind }: Column.A function windowColumn<T>(column: Column<T>, start: number, end: number) { if (!column.isDefined) return Column.Undefined(end - start, column['@type']); - if (column['@array'] && ColumnHelpers.isTypedArray(column['@array'])) return windowTyped(column, start, end); + if (!!column['@array'] && ColumnHelpers.isTypedArray(column['@array'])) return windowTyped(column, start, end); return windowFull(column, start, end); } @@ -229,10 +229,10 @@ function isIdentity(map: ArrayLike<number>, rowCount: number) { return true; } -function columnPermutation<T>(c: Column<T>, map: ArrayLike<number>, checkIdentity: boolean): Column<T> { +function columnView<T>(c: Column<T>, map: ArrayLike<number>, checkIdentity: boolean): Column<T> { if (!c.isDefined) return c; if (checkIdentity && isIdentity(map, c.rowCount)) return c; - if (!c['@array']) return arrayView(c, map); + if (!!c['@array']) return arrayView(c, map); return viewFull(c, map); } diff --git a/src/mol-base/collections/integer/_spec/segmentation.spec.ts b/src/mol-base/collections/integer/_spec/segmentation.spec.ts index 2c08ce929..b3a232026 100644 --- a/src/mol-base/collections/integer/_spec/segmentation.spec.ts +++ b/src/mol-base/collections/integer/_spec/segmentation.spec.ts @@ -11,7 +11,7 @@ import Segmentation from '../segmentation' describe('segments', () => { const data = OrderedSet.ofSortedArray([4, 9, 10, 11, 14, 15, 16]); const segs = Segmentation.create([0, 4, 10, 12, 13, 15, 25]); - + it('size', () => expect(Segmentation.count(segs)).toBe(6)); it('project', () => { @@ -35,7 +35,9 @@ describe('segments', () => { const it = Segmentation.transientSegments(segs, data); const t = Object.create(null); + let count = 0; while (it.hasNext) { + count++; const s = it.move(); for (let j = s.start; j < s.end; j++) { const x = t[s.index]; @@ -44,7 +46,51 @@ describe('segments', () => { else x[x.length] = v; } } - + expect(count).toBe(4); expect(t).toEqual({ 1: [4, 9], 2: [10, 11], 4: [14], 5: [15, 16] }); }); + + it('iteration range', () => { + const segs = Segmentation.create([0, 2, 4]); + const dataRange = OrderedSet.ofBounds(0, 4); + + const it = Segmentation.transientSegments(segs, dataRange); + + const t = Object.create(null); + let count = 0; + while (it.hasNext) { + count++; + const s = it.move(); + for (let j = s.start; j < s.end; j++) { + const x = t[s.index]; + const v = OrderedSet.getAt(dataRange, j); + if (!x) t[s.index] = [v]; + else x[x.length] = v; + } + } + expect(count).toBe(2); + expect(t).toEqual({ 0: [0, 1], 1: [2, 3] }); + }); + + it('iteration range 1', () => { + const segs = Segmentation.create([0, 2, 4]); + const dataRange = OrderedSet.ofBounds(0, 4); + + const it = Segmentation.transientSegments(segs, dataRange, { index: 0, start: 2, end: 4 }); + + const t = Object.create(null); + let count = 0; + while (it.hasNext) { + count++; + const s = it.move(); + for (let j = s.start; j < s.end; j++) { + const x = t[s.index]; + const v = OrderedSet.getAt(dataRange, j); + if (!x) t[s.index] = [v]; + else x[x.length] = v; + } + } + expect(count).toBe(1); + expect(t).toEqual({ 1: [2, 3] }); + }); }); diff --git a/src/mol-base/collections/integer/impl/segmentation.ts b/src/mol-base/collections/integer/impl/segmentation.ts index 806ccde0d..9c8acd16f 100644 --- a/src/mol-base/collections/integer/impl/segmentation.ts +++ b/src/mol-base/collections/integer/impl/segmentation.ts @@ -54,12 +54,12 @@ class SegmentIterator implements Iterator<Segs.Segment> { move() { while (this.hasNext) { - if (!this.updateValue()) { - this.updateSegmentRange(); - } else { + if (this.updateValue()) { this.value.index = this.segmentStart++; - this.hasNext = this.segmentEnd > this.segmentStart; + this.hasNext = this.segmentEnd >= this.segmentStart && Interval.size(this.setRange) > 0; break; + } else { + this.updateSegmentRange(); } } return this.value; @@ -75,16 +75,24 @@ class SegmentIterator implements Iterator<Segs.Segment> { const setEnd = OrderedSet.findPredecessorIndexInRange(this.set, segmentEnd, this.setRange); this.value.start = Interval.start(this.setRange); this.value.end = setEnd; - this.setRange = Interval.ofBounds(setEnd, Interval.end(this.setRange)) + const rEnd = Interval.end(this.setRange); + this.setRange = Interval.ofBounds(setEnd, rEnd); return setEnd > this.value.start; } private updateSegmentRange() { + const sMin = Interval.min(this.setRange), sMax = Interval.max(this.setRange); + if (sMax < sMin) { + this.hasNext = false; + return; + } + const min = OrderedSet.getAt(this.set, Interval.min(this.setRange)); const max = OrderedSet.getAt(this.set, Interval.max(this.setRange)); + this.segmentStart = this.getSegmentIndex(min); this.segmentEnd = this.getSegmentIndex(max) + 1; - this.hasNext = this.segmentEnd > this.segmentStart; + this.hasNext = this.segmentEnd >= this.segmentStart && Interval.size(this.setRange) > 0; } constructor(private segments: OrderedSet, private set: OrderedSet, inputRange: Interval) { diff --git a/src/mol-base/collections/table.ts b/src/mol-base/collections/table.ts index fad818853..fa2386f05 100644 --- a/src/mol-base/collections/table.ts +++ b/src/mol-base/collections/table.ts @@ -58,12 +58,23 @@ namespace Table { const columns = Object.keys(schema); ret._rowCount = arrays[columns[0]].length; ret._columns = columns; - for (const k of Object.keys(schema)) { + for (const k of columns) { (ret as any)[k] = Column.ofArray({ array: arrays[k], type: schema[k] }) } return ret as R; } + export function view<S extends Schema, R extends Schema>(table: Table<S>, schema: R, view: ArrayLike<number>) { + const ret = Object.create(null); + const columns = Object.keys(schema); + ret._rowCount = view.length; + ret._columns = columns; + for (const k of columns) { + (ret as any)[k] = Column.view(table[k], view); + } + return ret as Table<R>; + } + /** Sort and return a new table */ export function sort<T extends Table<S>, S extends Schema>(table: T, cmp: (i: number, j: number) => number) { const indices = new Int32Array(table._rowCount); diff --git a/src/mol-data/model.ts b/src/mol-data/model.ts index a6177c117..f04f3dd12 100644 --- a/src/mol-data/model.ts +++ b/src/mol-data/model.ts @@ -7,7 +7,6 @@ import * as Formats from './model/formats' import HierarchyProperties from './model/properties/hierarchy' import ConformationProperties from './model/properties/conformation' -import Segmentation from '../mol-base/collections/integer/segmentation' /** * Interface to the "source data" of the molecule. @@ -30,11 +29,7 @@ interface Model extends Readonly<{ conformation: number }, - atomCount: number, - segments: Readonly<{ - chains: Segmentation, - residues: Segmentation - }> + atomCount: number }> { } export default Model \ No newline at end of file diff --git a/src/mol-data/model/builders/mmcif.ts b/src/mol-data/model/builders/mmcif.ts index b3dad75ba..0f8ca14da 100644 --- a/src/mol-data/model/builders/mmcif.ts +++ b/src/mol-data/model/builders/mmcif.ts @@ -7,10 +7,13 @@ import { RawData } from '../formats' import { Frame as mmCIF } from '../../../mol-io/reader/cif/schema/mmcif' import Model from '../../model' -//import Column from '../../../mol-base/collections/column' +import Column from '../../../mol-base/collections/column' +import Table from '../../../mol-base/collections/table' import Interval from '../../../mol-base/collections/integer/interval' import Segmentation from '../../../mol-base/collections/integer/segmentation' import uuId from '../../../mol-base/utils/uuid' +import * as Hierarchy from '../properties/hierarchy' +import findHierarchyKeys from '../utils/hierarchy-keys' function findModelBounds(data: mmCIF, startIndex: number) { const num = data.atom_site.pdbx_PDB_model_num; @@ -21,7 +24,7 @@ function findModelBounds(data: mmCIF, startIndex: number) { return Interval.ofBounds(startIndex, endIndex); } -function segmentOffsets(data: mmCIF, bounds: Interval) { +function findHierarchyOffsets(data: mmCIF, bounds: Interval) { const start = Interval.start(bounds), end = Interval.end(bounds); const residues = [start], chains = [start]; @@ -41,21 +44,42 @@ function segmentOffsets(data: mmCIF, bounds: Interval) { return { residues, chains }; } +function createHierarchyData(data: mmCIF, bounds: Interval, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): Hierarchy.HierarchyData { + const { atom_site } = data; + const start = Interval.start(bounds), end = Interval.end(bounds); + const atoms = Table.ofColumns<Hierarchy.AtomsSchema>({ + id: Column.window(atom_site.id, start, end), + type_symbol: Column.ofArray({ array: Column.mapToArray(Column.window(atom_site.type_symbol, start, end), Hierarchy.ElementSymbol), type: Column.Type.aliased<Hierarchy.ElementSymbol>(Column.Type.str) }), + label_atom_id: Column.window(atom_site.label_atom_id, start, end), + auth_atom_id: Column.window(atom_site.auth_atom_id, start, end), + label_alt_id: Column.window(atom_site.label_alt_id, start, end), + pdbx_formal_charge: Column.window(atom_site.pdbx_formal_charge, start, end), + occupancy: Column.window(atom_site.occupancy, start, end), + B_iso_or_equiv: Column.window(atom_site.B_iso_or_equiv, start, end), + }); + const residues = Table.view(atom_site, Hierarchy.ResiduesSchema, offsets.residues); + const chains = Table.view(atom_site, Hierarchy.ChainsSchema, offsets.chains); + return { atoms, residues, chains, entities: data.entity }; +} + function createModel(raw: RawData, data: mmCIF, bounds: Interval): Model { - const segments = segmentOffsets(data, bounds); + const hierarchyOffsets = findHierarchyOffsets(data, bounds); + + const hierarchySegments: Hierarchy.HierarchySegmentation = { + residueSegments: Segmentation.ofOffsets(hierarchyOffsets.residues, bounds), + chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds), + } + const hierarchyData = createHierarchyData(data, bounds, hierarchyOffsets); + const hierarchyKeys = findHierarchyKeys(hierarchyData, hierarchySegments); + return { id: uuId(), sourceData: raw, - model_num: 0, // TODO: fix - //common: 0 as any, - hierarchy: 0 as any, + model_num: data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)), + hierarchy: { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }, conformation: 0 as any, version: { data: 0, conformation: 0 }, - atomCount: Interval.size(bounds), - segments: { - residues: Segmentation.ofOffsets(segments.residues, bounds), - chains: Segmentation.ofOffsets(segments.chains, bounds), - } + atomCount: Interval.size(bounds) }; } diff --git a/src/mol-data/model/properties/hierarchy.ts b/src/mol-data/model/properties/hierarchy.ts index 04cb70e1a..9f13f137e 100644 --- a/src/mol-data/model/properties/hierarchy.ts +++ b/src/mol-data/model/properties/hierarchy.ts @@ -16,6 +16,7 @@ export function ElementSymbol(s: string): ElementSymbol { } export const AtomsSchema = { + id: mmCIF.atom_site.id, type_symbol: Column.Type.aliased<ElementSymbol>(mmCIF.atom_site.type_symbol), label_atom_id: mmCIF.atom_site.label_atom_id, auth_atom_id: mmCIF.atom_site.auth_atom_id, @@ -25,6 +26,7 @@ export const AtomsSchema = { B_iso_or_equiv: mmCIF.atom_site.B_iso_or_equiv }; +export type AtomsSchema = typeof AtomsSchema export interface Atoms extends Table<typeof AtomsSchema> { } export const ResiduesSchema = { @@ -59,8 +61,8 @@ export interface HierarchyData { } export interface HierarchySegmentation { - residues: Segmentation, - chains: Segmentation + residueSegments: Segmentation, + chainSegments: Segmentation } export interface HierarchyKeys { @@ -69,20 +71,19 @@ export interface HierarchyKeys { isMonotonous: boolean, // assign a key to each residue index. - residue: ArrayLike<number>, + residueKey: ArrayLike<number>, // assign a key to each chain index - chain: ArrayLike<number>, + chainKey: ArrayLike<number>, // assigne a key to each chain index // also index to the Entities table. - entity: ArrayLike<number>, + entityKey: ArrayLike<number>, - findEntity(id: string): number, - findChain(entityId: string, auth_asym_id: string): number, - findResidue(entityId: string, auth_asym_id: string, auth_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code: string): number + findEntityKey(id: string): number, + findChainKey(entityId: string, label_asym_id: string): number, + findResidueKey(entityId: string, label_asym_id: string, label_comp_id: string, auth_seq_id: number, pdbx_PDB_ins_code: string): number } -export interface Hierarchy extends HierarchyData { - keys: HierarchyKeys -} +type _Hierarchy = HierarchyData & HierarchySegmentation & HierarchyKeys +export interface Hierarchy extends _Hierarchy { } export default Hierarchy \ No newline at end of file diff --git a/src/mol-data/model/utils/hierarchy-keys.ts b/src/mol-data/model/utils/hierarchy-keys.ts index 2558637be..806b024fb 100644 --- a/src/mol-data/model/utils/hierarchy-keys.ts +++ b/src/mol-data/model/utils/hierarchy-keys.ts @@ -28,14 +28,14 @@ function getElementSubstructureKeyMap(map: Map<number, Map<string, number>>, key } function createLookUp(entity: Map<string, number>, chain: Map<number, Map<string, number>>, residue: Map<number, Map<string, number>>) { - const findEntity: HierarchyKeys['findEntity'] = (id) => entity.has(id) ? entity.get(id)! : -1; - const findChain: HierarchyKeys['findChain'] = (e, c) => { + const findEntityKey: HierarchyKeys['findEntityKey'] = (id) => entity.has(id) ? entity.get(id)! : -1; + const findChainKey: HierarchyKeys['findChainKey'] = (e, c) => { if (!entity.has(e)) return -1; const cm = chain.get(entity.get(e)!)!; if (!cm.has(c)) return -1; return cm.get(c)!; } - const findResidue: HierarchyKeys['findResidue'] = (e, c, name, seq, ins) => { + const findResidueKey: HierarchyKeys['findResidueKey'] = (e, c, name, seq, ins) => { if (!entity.has(e)) return -1; const cm = chain.get(entity.get(e)!)!; if (!cm.has(c)) return -1; @@ -44,12 +44,14 @@ function createLookUp(entity: Map<string, number>, chain: Map<number, Map<string if (!rm.has(id)) return -1; return rm.get(id)!; } - return { findEntity, findChain, findResidue }; + return { findEntityKey, findChainKey, findResidueKey }; } function isMonotonous(xs: ArrayLike<number>) { for (let i = 1, _i = xs.length; i < _i; i++) { - if (xs[i] < xs[i - 1]) return false; + if (xs[i] < xs[i - 1]) { + return false; + } } return true; } @@ -65,40 +67,44 @@ function create(data: HierarchyData, segments: HierarchySegmentation): Hierarchy const chainKey = new Int32Array(chains._rowCount); const entityKey = new Int32Array(chains._rowCount); - const { auth_comp_id, auth_seq_id, pdbx_PDB_ins_code } = data.residues; - const { label_entity_id, auth_asym_id } = data.chains; + const { label_comp_id, auth_seq_id, pdbx_PDB_ins_code } = data.residues; + const { label_entity_id, label_asym_id } = data.chains; + + const atomSet = Interval.ofBounds(0, data.atoms._rowCount); + + const chainsIt = Segmentation.transientSegments(segments.chainSegments,atomSet); - const chainsIt = Segmentation.transientSegments(segments.chains, Interval.ofBounds(0, data.atoms._rowCount)); while (chainsIt.hasNext) { const chainSegment = chainsIt.move(); - const residuesIt = Segmentation.transientSegments(segments.residues, Interval.ofBounds(chainSegment.start, chainSegment.end)); const cI = chainSegment.index; const eKey = entityMap.get(label_entity_id.value(cI)) || 0; const chainMap = getElementSubstructureKeyMap(chainMaps, eKey); - const cKey = getElementKey(chainMap, auth_asym_id.value(cI), chainCounter); + const cKey = getElementKey(chainMap, label_asym_id.value(cI), chainCounter); chainKey[cI] = cKey; entityKey[cI] = eKey; const residueMap = getElementSubstructureKeyMap(residueMaps, cKey); + const residuesIt = Segmentation.transientSegments(segments.residueSegments, atomSet, chainSegment); while (residuesIt.hasNext) { - const rI = residuesIt.move().index; - const residueId = getResidueId(auth_comp_id.value(rI), auth_seq_id.value(rI), pdbx_PDB_ins_code.value(rI)); + const residueSegment = residuesIt.move(); + const rI = residueSegment.index; + const residueId = getResidueId(label_comp_id.value(rI), auth_seq_id.value(rI), pdbx_PDB_ins_code.value(rI)); residueKey[rI] = getElementKey(residueMap, residueId, residueCounter); } } - const { findEntity, findChain, findResidue } = createLookUp(entityMap, chainMaps, residueMaps); + const { findEntityKey, findChainKey, findResidueKey } = createLookUp(entityMap, chainMaps, residueMaps); return { isMonotonous: isMonotonous(entityKey) && isMonotonous(chainKey) && isMonotonous(residueKey), - residue: residueKey, - chain: chainKey, - entity: entityKey, - findEntity, - findChain, - findResidue + residueKey: residueKey, + chainKey: chainKey, + entityKey: entityKey, + findEntityKey, + findChainKey, + findResidueKey }; } diff --git a/src/mol-data/structure/base.ts b/src/mol-data/structure/base.ts index 81a18c790..1ef19f3db 100644 --- a/src/mol-data/structure/base.ts +++ b/src/mol-data/structure/base.ts @@ -24,7 +24,7 @@ class Builder { export const Empty: Structure = { units: {}, atoms: AtomSet.Empty }; export function ofModel(model: Model): Structure { - const chains = model.segments.chains; + const chains = model.hierarchy.chainSegments; const builder = new Builder(); for (let c = 0; c < chains.count; c++) { diff --git a/src/script.ts b/src/script.ts index dbe952e16..c91664f98 100644 --- a/src/script.ts +++ b/src/script.ts @@ -117,7 +117,15 @@ async function runCIF(input: string | Uint8Array) { console.time('createModels'); const models = buildModels(mmcif); console.timeEnd('createModels'); - console.log(models[0].id); + + console.log(models[0].hierarchy.isMonotonous); + console.log(models[0].hierarchy.atoms.type_symbol.value(0)); + console.log(models[0].hierarchy.atoms.id.value(1)); + console.log(models[0].hierarchy.residues.auth_comp_id.value(0)); + console.log(models[0].hierarchy.residues.auth_comp_id.value(1)); + console.log(models[0].hierarchy.chains.auth_asym_id.value(0)); + console.log(models[0].hierarchy.chains.auth_asym_id.value(1)); + console.log(models[0].hierarchy.chains.label_asym_id.value(1)); // const schema = await _dic() // if (schema) { -- GitLab