diff --git a/src/apps/combine-mmcif/index.ts b/src/apps/combine-mmcif/index.ts index d43d85917587b0559024a6ed498968e697f5db03..dd13d652deb5bebb6cbc153ed86bdf8d061ac8cd 100644 --- a/src/apps/combine-mmcif/index.ts +++ b/src/apps/combine-mmcif/index.ts @@ -208,8 +208,7 @@ export function getBirdBonds(mmcif: mmCIF_Database) { } export function getCcdBonds(mmcif: mmCIF_Database) { - const bonds: PartialStructConnRow[] = [] - + // const bonds: PartialStructConnRow[] = [] } async function run(pdb: string, out?: string) { diff --git a/src/apps/structure-info/index.ts b/src/apps/structure-info/index.ts new file mode 100644 index 0000000000000000000000000000000000000000..c52e4006e47f6a6b99679273e2bb77e7b95197b2 --- /dev/null +++ b/src/apps/structure-info/index.ts @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import * as argparse from 'argparse' +import fetch from 'node-fetch' +require('util.promisify').shim(); + +import CIF from 'mol-io/reader/cif' +import Computation from 'mol-util/computation' +import { Model, Structure } from 'mol-model/structure' + +function showProgress(tag: string, p: Computation.Progress) { + console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`) +} + +async function parseCif(data: string|Uint8Array) { + const comp = CIF.parse(data) + const ctx = Computation.observable({ + updateRateMs: 250, + observer: p => showProgress(`cif parser ${typeof data === 'string' ? 'string' : 'binary'}`, p) + }); + console.time('parse cif') + const parsed = await comp(ctx); + console.timeEnd('parse cif') + if (parsed.isError) throw parsed; + return parsed +} + +export async function getPdb(pdb: string) { + console.log(`downloading ${pdb}...`) + const data = await fetch(`https://files.rcsb.org/download/${pdb}.cif`) + console.log(`done downloading ${pdb}`) + + const parsed = await parseCif(await data.text()) + return CIF.schema.mmCIF(parsed.result.blocks[0]) +} + +async function run(pdb: string, out?: string) { + const mmcif = await getPdb(pdb) + + const models = Model.create({ kind: 'mmCIF', data: mmcif }); + const structure = Structure.ofModel(models[0]) + + console.log(structure) + console.log(Model.bonds(models[0])) +} + +const parser = new argparse.ArgumentParser({ + addHelp: true, + description: 'Print info about a structure' +}); +parser.addArgument([ '--pdb', '-p' ], { + help: 'Pdb entry id' +}); +interface Args { + pdb: string +} +const args: Args = parser.parseArgs(); + +run(args.pdb) diff --git a/src/mol-math/geometry/grid-lookup.ts b/src/mol-math/geometry/grid-lookup.ts index f7723b745c8d2abec8f865fcd6f3f5db2d370353..fa703c242f34fbfefb2cb120559fde5f445dbfec 100644 --- a/src/mol-math/geometry/grid-lookup.ts +++ b/src/mol-math/geometry/grid-lookup.ts @@ -1,2 +1,291 @@ +/* + * Copyright (c) 2017 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + // TODO: 3d grid lookup (use single cell for small sets), make bounding sphere part of the structure -// TODO: assign radius to points? \ No newline at end of file +// TODO: assign radius to points? + +/** + * A "masked" 3D spatial lookup structure. + */ + +import Mask from 'mol-util/mask' + +export type FindFunc = (x: number, y: number, z: number, radius: number) => Result +export type CheckFunc = (x: number, y: number, z: number, radius: number) => boolean + +export interface Result { + readonly count: number, + readonly indices: number[], + readonly squaredDistances: number[] +} + +export interface Positions { x: ArrayLike<number>, y: ArrayLike<number>, z: ArrayLike<number> } + +interface GridLookup { find: (mask: Mask) => FindFunc, check: (mask: Mask) => CheckFunc } + +function GridLookup(positions: Positions): GridLookup { + const tree = build(createInputData(positions)); + return { + find(mask) { + const ctx = QueryContext.create(tree, mask, false); + return function (x: number, y: number, z: number, radius: number) { + QueryContext.update(ctx, x, y, z, radius); + nearest(ctx); + return ctx.buffer; + } + }, + check(mask) { + const ctx = QueryContext.create(tree, mask, true); + return function (x: number, y: number, z: number, radius: number) { + QueryContext.update(ctx, x, y, z, radius); + return nearest(ctx); + } + } + } +} + +interface InputData { + bounds: Box3D, + count: number, + positions: Positions +} + +interface Box3D { + min: number[], + max: number[] +} + +namespace Box3D { + export function createInfinite(): Box3D { + return { + min: [Number.MAX_VALUE, Number.MAX_VALUE, Number.MAX_VALUE], + max: [-Number.MAX_VALUE, -Number.MAX_VALUE, -Number.MAX_VALUE] + } + } +} + +/** +* Query context. Handles the actual querying. +*/ +interface QueryContext<T> { + structure: T, + pivot: number[], + radius: number, + radiusSq: number, + buffer: QueryContext.Buffer, + mask: Mask, + isCheck: boolean +} + +namespace QueryContext { + export interface Buffer extends Result { + count: number; + indices: any[]; + squaredDistances: number[]; + } + + export function add<T>(ctx: QueryContext<T>, distSq: number, index: number) { + const buffer = ctx.buffer; + buffer.squaredDistances[buffer.count] = distSq; + buffer.indices[buffer.count++] = index; + } + + function resetBuffer(buffer: Buffer) { buffer.count = 0; } + + function createBuffer(): Buffer { + return { + indices: [], + count: 0, + squaredDistances: [] + } + } + + /** + * Query the tree and store the result to this.buffer. Overwrites the old result. + */ + export function update<T>(ctx: QueryContext<T>, x: number, y: number, z: number, radius: number) { + ctx.pivot[0] = x; + ctx.pivot[1] = y; + ctx.pivot[2] = z; + ctx.radius = radius; + ctx.radiusSq = radius * radius; + resetBuffer(ctx.buffer); + } + + export function create<T>(structure: T, mask: Mask, isCheck: boolean): QueryContext<T> { + return { + structure, + buffer: createBuffer(), + pivot: [0.1, 0.1, 0.1], + radius: 1.1, + radiusSq: 1.1 * 1.1, + mask, + isCheck + } + } +} + +function createInputData(positions: { x: ArrayLike<number>, y: ArrayLike<number>, z: ArrayLike<number> }): InputData { + const { x, y, z } = positions; + const bounds = Box3D.createInfinite(); + const count = x.length; + const { min, max } = bounds; + + for (let i = 0; i < count; i++) { + min[0] = Math.min(x[i], min[0]); + min[1] = Math.min(y[i], min[1]); + min[2] = Math.min(z[i], min[2]); + max[0] = Math.max(x[i], max[0]); + max[1] = Math.max(y[i], max[1]); + max[2] = Math.max(z[i], max[2]); + } + + return { positions, bounds, count }; +} + +/** + * Adapted from https://github.com/arose/ngl + * MIT License Copyright (C) 2014+ Alexander Rose + */ + +interface SpatialHash { + size: number[], + min: number[], + grid: Uint32Array, + bucketOffset: Uint32Array, + bucketCounts: Int32Array, + bucketArray: Int32Array, + positions: Positions +} + +interface State { + size: number[], + positions: Positions, + bounds: Box3D, + count: number +} + +const enum Constants { Exp = 3 } + +function nearest(ctx: QueryContext<SpatialHash>): boolean { + const { min: [minX, minY, minZ], size: [sX, sY, sZ], bucketOffset, bucketCounts, bucketArray, grid, positions: { x: px, y: py, z: pz } } = ctx.structure; + const { radius: r, radiusSq: rSq, pivot: [x, y, z], isCheck, mask } = ctx; + + const loX = Math.max(0, (x - r - minX) >> Constants.Exp); + const loY = Math.max(0, (y - r - minY) >> Constants.Exp); + const loZ = Math.max(0, (z - r - minZ) >> Constants.Exp); + + const hiX = Math.min(sX, (x + r - minX) >> Constants.Exp); + const hiY = Math.min(sY, (y + r - minY) >> Constants.Exp); + const hiZ = Math.min(sZ, (z + r - minZ) >> Constants.Exp); + + for (let ix = loX; ix <= hiX; ix++) { + for (let iy = loY; iy <= hiY; iy++) { + for (let iz = loZ; iz <= hiZ; iz++) { + const bucketIdx = grid[(((ix * sY) + iy) * sZ) + iz]; + + if (bucketIdx > 0) { + const k = bucketIdx - 1; + const offset = bucketOffset[k]; + const count = bucketCounts[k]; + const end = offset + count; + + for (let i = offset; i < end; i++) { + const idx = bucketArray[i]; + if (!mask.has(idx)) continue; + + const dx = px[idx] - x; + const dy = py[idx] - y; + const dz = pz[idx] - z; + const distSq = dx * dx + dy * dy + dz * dz; + + if (distSq <= rSq) { + if (isCheck) return true; + QueryContext.add(ctx, distSq, idx) + } + } + } + } + } + } + return ctx.buffer.count > 0; +} + +function _build(state: State): SpatialHash { + const { bounds, size: [sX, sY, sZ], positions: { x: px, y: py, z: pz }, count } = state; + const n = sX * sY * sZ; + const { min: [minX, minY, minZ] } = bounds; + + let bucketCount = 0; + const grid = new Uint32Array(n); + const bucketIndex = new Int32Array(count); + for (let i = 0; i < count; i++) { + const x = (px[i] - minX) >> Constants.Exp; + const y = (py[i] - minY) >> Constants.Exp; + const z = (pz[i] - minZ) >> Constants.Exp; + const idx = (((x * sY) + y) * sZ) + z; + if ((grid[idx] += 1) === 1) { + bucketCount += 1 + } + bucketIndex[i] = idx; + } + + const bucketCounts = new Int32Array(bucketCount); + for (let i = 0, j = 0; i < n; i++) { + const c = grid[i]; + if (c > 0) { + grid[i] = j + 1; + bucketCounts[j] = c; + j += 1; + } + } + + const bucketOffset = new Uint32Array(count); + for (let i = 1; i < count; ++i) { + bucketOffset[i] += bucketOffset[i - 1] + bucketCounts[i - 1]; + } + + const bucketFill = new Int32Array(bucketCount); + const bucketArray = new Int32Array(count); + for (let i = 0; i < count; i++) { + const bucketIdx = grid[bucketIndex[i]] + if (bucketIdx > 0) { + const k = bucketIdx - 1; + bucketArray[bucketOffset[k] + bucketFill[k]] = i; + bucketFill[k] += 1; + } + } + + return { + size: state.size, + bucketArray, + bucketCounts, + bucketOffset, + grid, + min: state.bounds.min, + positions: state.positions + } +} + +function build({ positions, bounds, count }: InputData): SpatialHash { + const size = [ + ((bounds.max[0] - bounds.min[0]) >> Constants.Exp) + 1, + ((bounds.max[1] - bounds.min[1]) >> Constants.Exp) + 1, + ((bounds.max[2] - bounds.min[2]) >> Constants.Exp) + 1 + ]; + + const state: State = { + size, + positions, + bounds, + count + } + + return _build(state); +} + +export default GridLookup; \ No newline at end of file diff --git a/src/mol-model/structure/model/formats/mmcif/bonds.ts b/src/mol-model/structure/model/formats/mmcif/bonds.ts new file mode 100644 index 0000000000000000000000000000000000000000..2a123a82d881f882362c9aaf27f2f001cb957964 --- /dev/null +++ b/src/mol-model/structure/model/formats/mmcif/bonds.ts @@ -0,0 +1,258 @@ +/** + * Copyright (c) 2017-2018 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import Model from '../../model' +import Bonds from '../../properties/bonds' +import { BondType } from '../../types' +import { findEntityIdByAsymId, findAtomIndexByLabelName } from './util' +import { Column } from 'mol-data/db' + +export class StructConn implements Bonds.StructConn { + private _residuePairIndex: Map<string, StructConn.Entry[]> | undefined = void 0; + private _atomIndex: Map<number, StructConn.Entry[]> | undefined = void 0; + + private static _resKey(rA: number, rB: number) { + if (rA < rB) return `${rA}-${rB}`; + return `${rB}-${rA}`; + } + + private getResiduePairIndex() { + if (this._residuePairIndex) return this._residuePairIndex; + this._residuePairIndex = new Map(); + for (const e of this.entries) { + const ps = e.partners; + const l = ps.length; + for (let i = 0; i < l - 1; i++) { + for (let j = i + i; j < l; j++) { + const key = StructConn._resKey(ps[i].residueIndex, ps[j].residueIndex); + if (this._residuePairIndex.has(key)) { + this._residuePairIndex.get(key)!.push(e); + } else { + this._residuePairIndex.set(key, [e]); + } + } + } + } + return this._residuePairIndex; + } + + private getAtomIndex() { + if (this._atomIndex) return this._atomIndex; + this._atomIndex = new Map(); + for (const e of this.entries) { + for (const p of e.partners) { + const key = p.atomIndex; + if (this._atomIndex.has(key)) { + this._atomIndex.get(key)!.push(e); + } else { + this._atomIndex.set(key, [e]); + } + } + } + return this._atomIndex; + } + + private static _emptyEntry = []; + + getResidueEntries(residueAIndex: number, residueBIndex: number): ReadonlyArray<StructConn.Entry> { + return this.getResiduePairIndex().get(StructConn._resKey(residueAIndex, residueBIndex)) || StructConn._emptyEntry; + } + + getAtomEntries(atomIndex: number): ReadonlyArray<StructConn.Entry> { + return this.getAtomIndex().get(atomIndex) || StructConn._emptyEntry; + } + + constructor(public entries: StructConn.Entry[]) { + } +} + +export namespace StructConn { + export interface Entry extends Bonds.StructConnEntry { + distance: number, + order: number, + flags: number, + partners: { residueIndex: number, atomIndex: number, symmetry: string }[] + } + + type StructConnType = + | 'covale' + | 'covale_base' + | 'covale_phosphate' + | 'covale_sugar' + | 'disulf' + | 'hydrog' + | 'metalc' + | 'mismat' + | 'modres' + | 'saltbr' + + export function create(model: Model): StructConn | undefined { + if (model.sourceData.kind !== 'mmCIF') return + const { struct_conn } = model.sourceData.data; + if (!struct_conn._rowCount) return void 0; + + const { conn_type_id, pdbx_dist_value, pdbx_value_order } = struct_conn; + const p1 = { + label_asym_id: struct_conn.ptnr1_label_asym_id, + label_comp_id: struct_conn.ptnr1_label_comp_id, + label_seq_id: struct_conn.ptnr1_label_seq_id, + label_atom_id: struct_conn.ptnr1_label_atom_id, + label_alt_id: struct_conn.pdbx_ptnr1_label_alt_id, + ins_code: struct_conn.pdbx_ptnr1_PDB_ins_code, + symmetry: struct_conn.ptnr1_symmetry + }; + const p2: typeof p1 = { + label_asym_id: struct_conn.ptnr2_label_asym_id, + label_comp_id: struct_conn.ptnr2_label_comp_id, + label_seq_id: struct_conn.ptnr2_label_seq_id, + label_atom_id: struct_conn.ptnr2_label_atom_id, + label_alt_id: struct_conn.pdbx_ptnr2_label_alt_id, + ins_code: struct_conn.pdbx_ptnr2_PDB_ins_code, + symmetry: struct_conn.ptnr2_symmetry + }; + + const _p = (row: number, ps: typeof p1) => { + if (ps.label_asym_id.valueKind(row) !== Column.ValueKind.Present) return void 0; + const asymId = ps.label_asym_id.value(row) + const residueIndex = model.hierarchy.findResidueKey( + findEntityIdByAsymId(model, asymId), + ps.label_comp_id.value(row), + asymId, + ps.label_seq_id.value(row), + ps.ins_code.value(row) + ); + if (residueIndex < 0) return void 0; + const atomName = ps.label_atom_id.value(row); + // turns out "mismat" records might not have atom name value + if (!atomName) return void 0; + const atomIndex = findAtomIndexByLabelName(model, residueIndex, atomName, ps.label_alt_id.value(row)); + if (atomIndex < 0) return void 0; + return { residueIndex, atomIndex, symmetry: ps.symmetry.value(row) || '1_555' }; + } + + const _ps = (row: number) => { + const ret = []; + let p = _p(row, p1); + if (p) ret.push(p); + p = _p(row, p2); + if (p) ret.push(p); + return ret; + } + + const entries: StructConn.Entry[] = []; + for (let i = 0; i < struct_conn._rowCount; i++) { + const partners = _ps(i); + if (partners.length < 2) continue; + + const type = conn_type_id.value(i)! as StructConnType; + const orderType = (pdbx_value_order.value(i) || '').toLowerCase(); + let flags = BondType.Flag.None; + let order = 1; + + switch (orderType) { + case 'sing': order = 1; break; + case 'doub': order = 2; break; + case 'trip': order = 3; break; + case 'quad': order = 4; break; + } + + switch (type) { + case 'covale': + case 'covale_base': + case 'covale_phosphate': + case 'covale_sugar': + case 'modres': + flags = BondType.Flag.Covalent; + break; + case 'disulf': flags = BondType.Flag.Covalent | BondType.Flag.Sulfide; break; + case 'hydrog': flags = BondType.Flag.Hydrogen; break; + case 'metalc': flags = BondType.Flag.MetallicCoordination; break; + case 'saltbr': flags = BondType.Flag.Ion; break; + } + + entries.push({ flags, order, distance: pdbx_dist_value.value(i), partners }); + } + + return new StructConn(entries); + } +} + +export class ComponentBondInfo implements Bonds.ComponentBondInfo { + entries: Map<string, ComponentBondInfo.Entry> = new Map(); + + newEntry(id: string) { + let e = new ComponentBondInfo.Entry(id); + this.entries.set(id, e); + return e; + } +} + +export namespace ComponentBondInfo { + export class Entry implements Bonds.ComponentBondInfoEntry { + map: Map<string, Map<string, { order: number, flags: number }>> = new Map(); + + add(a: string, b: string, order: number, flags: number, swap = true) { + let e = this.map.get(a); + if (e !== void 0) { + let f = e.get(b); + if (f === void 0) { + e.set(b, { order, flags }); + } + } else { + let map = new Map<string, { order: number, flags: number }>(); + map.set(b, { order, flags }); + this.map.set(a, map); + } + + if (swap) this.add(b, a, order, flags, false); + } + + constructor(public id: string) { + } + } + + export function create(model: Model): ComponentBondInfo | undefined { + if (model.sourceData.kind !== 'mmCIF') return + const { chem_comp_bond } = model.sourceData.data; + if (!chem_comp_bond._rowCount) return void 0; + + let info = new ComponentBondInfo(); + + const { comp_id, atom_id_1, atom_id_2, value_order, pdbx_aromatic_flag, _rowCount: rowCount } = chem_comp_bond; + + let entry = info.newEntry(comp_id.value(0)!); + + for (let i = 0; i < rowCount; i++) { + + const id = comp_id.value(i)!; + const nameA = atom_id_1.value(i)!; + const nameB = atom_id_2.value(i)!; + const order = value_order.value(i)!; + const aromatic = pdbx_aromatic_flag.value(i) === 'Y'; + + if (entry.id !== id) { + entry = info.newEntry(id); + } + + let flags: number = BondType.Flag.Covalent; + let ord = 1; + if (aromatic) flags |= BondType.Flag.Aromatic; + switch (order.toLowerCase()) { + case 'doub': + case 'delo': + ord = 2; + break; + case 'trip': ord = 3; break; + case 'quad': ord = 4; break; + } + + entry.add(nameA, nameB, ord, flags); + } + + return info; + } +} \ No newline at end of file diff --git a/src/mol-model/structure/model/formats/mmcif/util.ts b/src/mol-model/structure/model/formats/mmcif/util.ts new file mode 100644 index 0000000000000000000000000000000000000000..a03d511f54dd47643b4cc8b2189fdb532e994e7e --- /dev/null +++ b/src/mol-model/structure/model/formats/mmcif/util.ts @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import Model from '../../model' + +export function findEntityIdByAsymId(model: Model, asymId: string) { + if (model.sourceData.kind !== 'mmCIF') return '' + const { struct_asym } = model.sourceData.data + for (let i = 0, n = struct_asym._rowCount; i < n; ++i) { + if (struct_asym.id.value(i) === asymId) return struct_asym.entity_id.value(i) + } + return '' +} + +export function findAtomIndexByLabelName(model: Model, residueIndex: number, atomName: string, altLoc: string | null) { + const { segmentMap, segments } = model.hierarchy.residueSegments + const idx = segmentMap[residueIndex] + const { label_atom_id, label_alt_id } = model.hierarchy.atoms; + for (let i = segments[idx], n = segments[idx + 1]; i <= n; ++i) { + if (label_atom_id.value(i) === atomName && (!altLoc || label_alt_id.value(i) === altLoc)) return i; + } + return -1; +} \ No newline at end of file diff --git a/src/mol-model/structure/model/model.ts b/src/mol-model/structure/model/model.ts index 7c9f435aa36331da1b4154d917604bf972b498c5..00d105267df695aa8267a55617966725059f840d 100644 --- a/src/mol-model/structure/model/model.ts +++ b/src/mol-model/structure/model/model.ts @@ -5,10 +5,14 @@ */ import UUID from 'mol-util/uuid' +import GridLookup from 'mol-math/geometry/grid-lookup' import Format from './format' import Hierarchy from './properties/hierarchy' import Conformation from './properties/conformation' import Symmetry from './properties/symmetry' +import Bonds from './properties/bonds' + +import computeBonds from './utils/compute-bonds' import from_gro from './formats/gro' import from_mmCIF from './formats/mmcif' @@ -30,8 +34,11 @@ interface Model extends Readonly<{ conformation: Conformation, symmetry: Symmetry, - atomCount: number -}> { } + atomCount: number, +}> { + '@spatialLookup'?: GridLookup, + '@bonds'?: Bonds +} { } namespace Model { export function create(format: Format) { @@ -40,6 +47,18 @@ namespace Model { case 'mmCIF': return from_mmCIF(format); } } + export function spatialLookup(model: Model): GridLookup { + if (model['@spatialLookup']) return model['@spatialLookup']!; + const lookup = GridLookup(model.conformation); + model['@spatialLookup'] = lookup; + return lookup; + } + export function bonds(model: Model): Bonds { + if (model['@bonds']) return model['@bonds']!; + const bonds = computeBonds(model); + model['@bonds'] = bonds; + return bonds; + } } export default Model \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/bonds.ts b/src/mol-model/structure/model/properties/bonds.ts new file mode 100644 index 0000000000000000000000000000000000000000..706202d4d4693a3df4d235969a5e8f7a8fafeaae --- /dev/null +++ b/src/mol-model/structure/model/properties/bonds.ts @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { BondType } from '../types' + +interface Bonds { + /** + * Where bonds for atom A start and end. + * Start offset at idx, end at idx + 1 + */ + offset: ArrayLike<number>, + neighbor: ArrayLike<number>, + + order: ArrayLike<number>, + flags: ArrayLike<BondType.Flag>, + + count: number +} + +namespace Bonds { + export function createEmpty(): Bonds { + return { offset: [], neighbor: [], order: [], flags: [], count: 0 } + } + export function isCovalent(flags: number) { + return (flags & BondType.Flag.Covalent) !== 0; + } + export interface StructConnEntry { + flags: BondType.Flag, + order: number, + distance: number, + partners: { residueIndex: number, atomIndex: number, symmetry: string }[] + } + export interface StructConn { + getResidueEntries(residueAIndex: number, residueBIndex: number): ReadonlyArray<StructConnEntry> + getAtomEntries(atomIndex: number): ReadonlyArray<StructConnEntry> + } + export interface ComponentBondInfoEntry { + map: Map<string, Map<string, { order: number, flags: number }>> + } + export interface ComponentBondInfo { + entries: Map<string, ComponentBondInfoEntry> + } +} + + + +export default Bonds \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/seconday-structure.ts b/src/mol-model/structure/model/properties/seconday-structure.ts new file mode 100644 index 0000000000000000000000000000000000000000..04eef7fb927781a87617b89a5d6b008e483ff527 --- /dev/null +++ b/src/mol-model/structure/model/properties/seconday-structure.ts @@ -0,0 +1,14 @@ +/** + * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +/** Secondary structure "indexed" by residues. */ +export interface SecondaryStructure { + type: number[], + index: number[], + flags: number[], + /** unique value for each "element". This is because single sheet is speficied by multiple records. */ + key: number[] +} \ No newline at end of file diff --git a/src/mol-model/structure/model/types.ts b/src/mol-model/structure/model/types.ts index d105c1f81c8e0e59ea64430ae12e815bc71a2a28..caa1ee2c85af1c68e2bdca9a9e0fdd90de87b23b 100644 --- a/src/mol-model/structure/model/types.ts +++ b/src/mol-model/structure/model/types.ts @@ -337,4 +337,20 @@ export const VdwRadii = { 'LV': 2.0, 'UUH': 2.0 } -export const DefaultVdwRadius = 2.0 \ No newline at end of file +export const DefaultVdwRadius = 2.0 + +export interface BondType extends BitFlags<BondType.Flag> { } +export namespace BondType { + export const is: (b: BondType, f: Flag) => boolean = BitFlags.has + export const enum Flag { + None = 0x0, + Covalent = 0x1, + MetallicCoordination = 0x2, + Hydrogen = 0x4, + Ion = 0x8, + Sulfide = 0x10, + Aromatic = 0x20, + Computed = 0x40 + // currently at most 16 flags are supported!! + } +} \ No newline at end of file diff --git a/src/mol-model/structure/model/utils/compute-bonds.ts b/src/mol-model/structure/model/utils/compute-bonds.ts new file mode 100644 index 0000000000000000000000000000000000000000..9b91705743317ca8016f7726bf949b163c80f359 --- /dev/null +++ b/src/mol-model/structure/model/utils/compute-bonds.ts @@ -0,0 +1,253 @@ +/** + * Copyright (c) 2017 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +import Mask from 'mol-util/mask' +import Model from '../model' +import { BondType, ElementSymbol } from '../types' +import Bonds from '../properties/bonds' +import { StructConn, ComponentBondInfo } from '../formats/mmcif/bonds' + +export interface BondComputationParameters { + maxHbondLength: number, + forceCompute: boolean +} + +// H,D,T are all mapped to H +const __ElementIndex: { [e: string]: number | undefined } = { 'H': 0, 'h': 0, 'D': 0, 'd': 0, 'T': 0, 't': 0, 'He': 2, 'HE': 2, 'he': 2, 'Li': 3, 'LI': 3, 'li': 3, 'Be': 4, 'BE': 4, 'be': 4, 'B': 5, 'b': 5, 'C': 6, 'c': 6, 'N': 7, 'n': 7, 'O': 8, 'o': 8, 'F': 9, 'f': 9, 'Ne': 10, 'NE': 10, 'ne': 10, 'Na': 11, 'NA': 11, 'na': 11, 'Mg': 12, 'MG': 12, 'mg': 12, 'Al': 13, 'AL': 13, 'al': 13, 'Si': 14, 'SI': 14, 'si': 14, 'P': 15, 'p': 15, 'S': 16, 's': 16, 'Cl': 17, 'CL': 17, 'cl': 17, 'Ar': 18, 'AR': 18, 'ar': 18, 'K': 19, 'k': 19, 'Ca': 20, 'CA': 20, 'ca': 20, 'Sc': 21, 'SC': 21, 'sc': 21, 'Ti': 22, 'TI': 22, 'ti': 22, 'V': 23, 'v': 23, 'Cr': 24, 'CR': 24, 'cr': 24, 'Mn': 25, 'MN': 25, 'mn': 25, 'Fe': 26, 'FE': 26, 'fe': 26, 'Co': 27, 'CO': 27, 'co': 27, 'Ni': 28, 'NI': 28, 'ni': 28, 'Cu': 29, 'CU': 29, 'cu': 29, 'Zn': 30, 'ZN': 30, 'zn': 30, 'Ga': 31, 'GA': 31, 'ga': 31, 'Ge': 32, 'GE': 32, 'ge': 32, 'As': 33, 'AS': 33, 'as': 33, 'Se': 34, 'SE': 34, 'se': 34, 'Br': 35, 'BR': 35, 'br': 35, 'Kr': 36, 'KR': 36, 'kr': 36, 'Rb': 37, 'RB': 37, 'rb': 37, 'Sr': 38, 'SR': 38, 'sr': 38, 'Y': 39, 'y': 39, 'Zr': 40, 'ZR': 40, 'zr': 40, 'Nb': 41, 'NB': 41, 'nb': 41, 'Mo': 42, 'MO': 42, 'mo': 42, 'Tc': 43, 'TC': 43, 'tc': 43, 'Ru': 44, 'RU': 44, 'ru': 44, 'Rh': 45, 'RH': 45, 'rh': 45, 'Pd': 46, 'PD': 46, 'pd': 46, 'Ag': 47, 'AG': 47, 'ag': 47, 'Cd': 48, 'CD': 48, 'cd': 48, 'In': 49, 'IN': 49, 'in': 49, 'Sn': 50, 'SN': 50, 'sn': 50, 'Sb': 51, 'SB': 51, 'sb': 51, 'Te': 52, 'TE': 52, 'te': 52, 'I': 53, 'i': 53, 'Xe': 54, 'XE': 54, 'xe': 54, 'Cs': 55, 'CS': 55, 'cs': 55, 'Ba': 56, 'BA': 56, 'ba': 56, 'La': 57, 'LA': 57, 'la': 57, 'Ce': 58, 'CE': 58, 'ce': 58, 'Pr': 59, 'PR': 59, 'pr': 59, 'Nd': 60, 'ND': 60, 'nd': 60, 'Pm': 61, 'PM': 61, 'pm': 61, 'Sm': 62, 'SM': 62, 'sm': 62, 'Eu': 63, 'EU': 63, 'eu': 63, 'Gd': 64, 'GD': 64, 'gd': 64, 'Tb': 65, 'TB': 65, 'tb': 65, 'Dy': 66, 'DY': 66, 'dy': 66, 'Ho': 67, 'HO': 67, 'ho': 67, 'Er': 68, 'ER': 68, 'er': 68, 'Tm': 69, 'TM': 69, 'tm': 69, 'Yb': 70, 'YB': 70, 'yb': 70, 'Lu': 71, 'LU': 71, 'lu': 71, 'Hf': 72, 'HF': 72, 'hf': 72, 'Ta': 73, 'TA': 73, 'ta': 73, 'W': 74, 'w': 74, 'Re': 75, 'RE': 75, 're': 75, 'Os': 76, 'OS': 76, 'os': 76, 'Ir': 77, 'IR': 77, 'ir': 77, 'Pt': 78, 'PT': 78, 'pt': 78, 'Au': 79, 'AU': 79, 'au': 79, 'Hg': 80, 'HG': 80, 'hg': 80, 'Tl': 81, 'TL': 81, 'tl': 81, 'Pb': 82, 'PB': 82, 'pb': 82, 'Bi': 83, 'BI': 83, 'bi': 83, 'Po': 84, 'PO': 84, 'po': 84, 'At': 85, 'AT': 85, 'at': 85, 'Rn': 86, 'RN': 86, 'rn': 86, 'Fr': 87, 'FR': 87, 'fr': 87, 'Ra': 88, 'RA': 88, 'ra': 88, 'Ac': 89, 'AC': 89, 'ac': 89, 'Th': 90, 'TH': 90, 'th': 90, 'Pa': 91, 'PA': 91, 'pa': 91, 'U': 92, 'u': 92, 'Np': 93, 'NP': 93, 'np': 93, 'Pu': 94, 'PU': 94, 'pu': 94, 'Am': 95, 'AM': 95, 'am': 95, 'Cm': 96, 'CM': 96, 'cm': 96, 'Bk': 97, 'BK': 97, 'bk': 97, 'Cf': 98, 'CF': 98, 'cf': 98, 'Es': 99, 'ES': 99, 'es': 99, 'Fm': 100, 'FM': 100, 'fm': 100, 'Md': 101, 'MD': 101, 'md': 101, 'No': 102, 'NO': 102, 'no': 102, 'Lr': 103, 'LR': 103, 'lr': 103, 'Rf': 104, 'RF': 104, 'rf': 104, 'Db': 105, 'DB': 105, 'db': 105, 'Sg': 106, 'SG': 106, 'sg': 106, 'Bh': 107, 'BH': 107, 'bh': 107, 'Hs': 108, 'HS': 108, 'hs': 108, 'Mt': 109, 'MT': 109, 'mt': 109 }; + +const __ElementBondThresholds: { [e: number]: number | undefined } = { 0: 1.42, 1: 1.42, 3: 2.7, 4: 2.7, 6: 1.75, 7: 1.6, 8: 1.52, 11: 2.7, 12: 2.7, 13: 2.7, 14: 1.9, 15: 1.9, 16: 1.9, 17: 1.8, 19: 2.7, 20: 2.7, 21: 2.7, 22: 2.7, 23: 2.7, 24: 2.7, 25: 2.7, 26: 2.7, 27: 2.7, 28: 2.7, 29: 2.7, 30: 2.7, 31: 2.7, 33: 2.68, 37: 2.7, 38: 2.7, 39: 2.7, 40: 2.7, 41: 2.7, 42: 2.7, 43: 2.7, 44: 2.7, 45: 2.7, 46: 2.7, 47: 2.7, 48: 2.7, 49: 2.7, 50: 2.7, 55: 2.7, 56: 2.7, 57: 2.7, 58: 2.7, 59: 2.7, 60: 2.7, 61: 2.7, 62: 2.7, 63: 2.7, 64: 2.7, 65: 2.7, 66: 2.7, 67: 2.7, 68: 2.7, 69: 2.7, 70: 2.7, 71: 2.7, 72: 2.7, 73: 2.7, 74: 2.7, 75: 2.7, 76: 2.7, 77: 2.7, 78: 2.7, 79: 2.7, 80: 2.7, 81: 2.7, 82: 2.7, 83: 2.7, 87: 2.7, 88: 2.7, 89: 2.7, 90: 2.7, 91: 2.7, 92: 2.7, 93: 2.7, 94: 2.7, 95: 2.7, 96: 2.7, 97: 2.7, 98: 2.7, 99: 2.7, 100: 2.7, 101: 2.7, 102: 2.7, 103: 2.7, 104: 2.7, 105: 2.7, 106: 2.7, 107: 2.7, 108: 2.7, 109: 2.88 }; + +const __ElementPairThresholds: { [e: number]: number | undefined } = { 0: 0.8, 20: 1.31, 27: 1.3, 35: 1.3, 44: 1.05, 54: 1, 60: 1.84, 72: 1.88, 84: 1.75, 85: 1.56, 86: 1.76, 98: 1.6, 99: 1.68, 100: 1.63, 112: 1.55, 113: 1.59, 114: 1.36, 129: 1.45, 144: 1.6, 170: 1.4, 180: 1.55, 202: 2.4, 222: 2.24, 224: 1.91, 225: 1.98, 243: 2.02, 269: 2, 293: 1.9, 480: 2.3, 512: 2.3, 544: 2.3, 612: 2.1, 629: 1.54, 665: 1, 813: 2.6, 854: 2.27, 894: 1.93, 896: 2.1, 937: 2.05, 938: 2.06, 981: 1.62, 1258: 2.68, 1309: 2.33, 1484: 1, 1763: 2.14, 1823: 2.48, 1882: 2.1, 1944: 1.72, 2380: 2.34, 3367: 2.44, 3733: 2.11, 3819: 2.6, 3821: 2.36, 4736: 2.75, 5724: 2.73, 5959: 2.63, 6519: 2.84, 6750: 2.87, 8991: 2.81 }; + +const __DefaultBondingRadius = 2.001; + +const MetalsSet = (function () { + const metals = ['LI', 'NA', 'K', 'RB', 'CS', 'FR', 'BE', 'MG', 'CA', 'SR', 'BA', 'RA', 'AL', 'GA', 'IN', 'SN', 'TL', 'PB', 'BI', 'SC', 'TI', 'V', 'CR', 'MN', 'FE', 'CO', 'NI', 'CU', 'ZN', 'Y', 'ZR', 'NB', 'MO', 'TC', 'RU', 'RH', 'PD', 'AG', 'CD', 'LA', 'HF', 'TA', 'W', 'RE', 'OS', 'IR', 'PT', 'AU', 'HG', 'AC', 'RF', 'DB', 'SG', 'BH', 'HS', 'MT', 'CE', 'PR', 'ND', 'PM', 'SM', 'EU', 'GD', 'TB', 'DY', 'HO', 'ER', 'TM', 'YB', 'LU', 'TH', 'PA', 'U', 'NP', 'PU', 'AM', 'CM', 'BK', 'CF', 'ES', 'FM', 'MD', 'NO', 'LR']; + const set = new Set<number>(); + for (const m of metals) { + set.add(__ElementIndex[m]!); + } + return set; +})(); + +function pair(a: number, b: number) { + if (a < b) return (a + b) * (a + b + 1) / 2 + b; + else return (a + b) * (a + b + 1) / 2 + a; +} + +function idx(e: ElementSymbol) { + const i = __ElementIndex[e as any as string]; + if (i === void 0) return -1; + return i; +} + +function pairThreshold(i: number, j: number) { + if (i < 0 || j < 0) return -1; + const r = __ElementPairThresholds[pair(i, j)]; + if (r === void 0) return -1; + return r; +} + +function threshold(i: number) { + if (i < 0) return __DefaultBondingRadius; + const r = __ElementBondThresholds[i]; + if (r === void 0) return __DefaultBondingRadius; + return r; +} + +const H_ID = __ElementIndex['H']!; +function isHydrogen(i: number) { + return i === H_ID; +} + +function computePerAtomBonds(atomA: number[], atomB: number[], _order: number[], _flags: number[], atomCount: number) { + const bucketSizes = new Int32Array(atomCount); + const bucketOffsets = new Int32Array(atomCount + 1) as any as number[]; + const bucketFill = new Int32Array(atomCount); + + for (const i of atomA) bucketSizes[i]++; + for (const i of atomB) bucketSizes[i]++; + + let offset = 0; + for (let i = 0; i < atomCount; i++) { + bucketOffsets[i] = offset; + offset += bucketSizes[i]; + } + bucketOffsets[atomCount] = offset; + + const neighbor = new Int32Array(offset) as any as number[]; + const flags = new Uint16Array(offset) as any as number[]; + const order = new Int8Array(offset) as any as number[]; + + for (let i = 0, _i = atomA.length; i < _i; i++) { + const a = atomA[i], b = atomB[i], f = _flags[i], o = _order[i]; + + const oa = bucketOffsets[a] + bucketFill[a]; + const ob = bucketOffsets[b] + bucketFill[b]; + + neighbor[oa] = b; + flags[oa] = f; + order[oa] = o; + bucketFill[a]++; + + neighbor[ob] = a; + flags[ob] = f; + order[ob] = o; + bucketFill[b]++; + } + + return { + offsets: bucketOffsets, + neighbor, + flags, + order + }; +} + +function _computeBonds(model: Model, params: BondComputationParameters): Bonds { + const MAX_RADIUS = 3; + + const { x, y, z } = model.conformation; + const { atomCount } = model; + const { residueKey } = model.hierarchy + const { type_symbol, label_atom_id, label_alt_id } = model.hierarchy.atoms; + const { label_comp_id } = model.hierarchy.residues; + const query3d = Model.spatialLookup(model).find(Mask.always(atomCount)); + + const structConn = model.sourceData.kind === 'mmCIF' ? StructConn.create(model) : void 0 + const component = model.sourceData.kind === 'mmCIF' ? ComponentBondInfo.create(model) : void 0 + + const atomA: number[] = []; + const atomB: number[] = []; + const flags: number[] = []; + const order: number[] = []; + + let lastResidue = -1; + let componentMap: Map<string, Map<string, { flags: number, order: number }>> | undefined = void 0; + + for (let aI = 0; aI < atomCount; aI++) { + const raI = residueKey.value(aI); + // const rowA = dataIndex[aI]; // TODO + const rowA = aI; + + if (!params.forceCompute && raI !== lastResidue) { + const resn = label_comp_id.value(rowA)!; + if (!!component && component.entries.has(resn)) { + componentMap = component.entries.get(resn)!.map; + } else { + componentMap = void 0; + } + } + lastResidue = raI; + + const componentPairs = componentMap ? componentMap.get(label_atom_id.value(rowA)) : void 0; + + const aeI = idx(type_symbol.value(rowA)!); + + const { indices, count, squaredDistances } = query3d(x[aI], y[aI], z[aI], MAX_RADIUS); + const isHa = isHydrogen(aeI); + const thresholdA = threshold(aeI); + const altA = label_alt_id.value(rowA); + const metalA = MetalsSet.has(aeI); + const structConnEntries = params.forceCompute ? void 0 : structConn && structConn.getAtomEntries(aI); + + for (let ni = 0; ni < count; ni++) { + const bI = indices[ni]; + if (bI <= aI) continue; + + // const rowB = dataIndex[bI]; // TODO + const rowB = bI; + + const altB = label_alt_id.value(rowB); + if (altA && altB && altA !== altB) continue; + + const beI = idx(type_symbol.value(rowB)!); + const isMetal = metalA || MetalsSet.has(beI); + + const rbI = residueKey.value(bI); + // handle "component dictionary" bonds. + if (raI === rbI && componentPairs) { + const e = componentPairs.get(label_atom_id.value(rowB)!); + if (e) { + atomA[atomA.length] = aI; + atomB[atomB.length] = bI; + order[order.length] = e.order; + let flag = e.flags; + if (isMetal) { + if (flag | BondType.Flag.Covalent) flag ^= BondType.Flag.Covalent; + flag |= BondType.Flag.MetallicCoordination; + } + flags[flags.length] = flag; + } + continue; + } + + const isHb = isHydrogen(beI); + if (isHa && isHb) continue; + + const dist = Math.sqrt(squaredDistances[ni]); + if (dist === 0) continue; + + // handle "struct conn" bonds. + if (structConnEntries && structConnEntries.length) { + let added = false; + for (const se of structConnEntries) { + for (const p of se.partners) { + if (p.atomIndex === bI) { + atomA[atomA.length] = aI; + atomB[atomB.length] = bI; + flags[flags.length] = se.flags; + order[order.length] = se.order; + added = true; + break; + } + } + if (added) break; + } + if (added) continue; + } + + if (isHa || isHb) { + if (dist < params.maxHbondLength) { + atomA[atomA.length] = aI; + atomB[atomB.length] = bI; + order[order.length] = 1; + flags[flags.length] = BondType.Flag.Covalent | BondType.Flag.Computed; // TODO: check if correct + } + continue; + } + + const thresholdAB = pairThreshold(aeI, beI); + const pairingThreshold = thresholdAB > 0 + ? thresholdAB + : beI < 0 ? thresholdA : Math.max(thresholdA, threshold(beI)); + + + if (dist <= pairingThreshold) { + atomA[atomA.length] = aI; + atomB[atomB.length] = bI; + order[order.length] = 1; + flags[flags.length] = (isMetal ? BondType.Flag.MetallicCoordination : BondType.Flag.Covalent) | BondType.Flag.Computed; + } + } + } + + const bonds = computePerAtomBonds(atomA, atomB, order, flags, atomCount); + return { + offset: bonds.offsets, + neighbor: bonds.neighbor, + flags: bonds.flags, + order: bonds.order, + count: atomA.length + }; +} + +export default function computeBonds(model: Model, params?: Partial<BondComputationParameters>) { + return _computeBonds(model, { + maxHbondLength: (params && params.maxHbondLength) || 1.15, + forceCompute: !!(params && params.forceCompute), + }); +} \ No newline at end of file diff --git a/src/mol-util/mask.ts b/src/mol-util/mask.ts new file mode 100644 index 0000000000000000000000000000000000000000..8b333231e3ecc160d65a092f10e67bb8c10b9300 --- /dev/null +++ b/src/mol-util/mask.ts @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 MolQL contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal <david.sehnal@gmail.com> + */ + +// TODO check if the removal of FastSet and the removal of the context object for forEach +// have any performance implications + +function _ascSort(a: number, b: number) { + return a - b; +} + +export function sortAsc<T extends ArrayLike<number>>(array: T): T { + Array.prototype.sort.call(array, _ascSort); + return array; +} + +interface Mask { + '@type': 'mask' + size: number; + has(i: number): boolean; + /** in-order iteration of all "masked elements". */ + forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx?: Ctx): Ctx | undefined; +} + +namespace Mask { + class EmptyMask implements Mask { + '@type': 'mask' + size = 0; + has(i: number) { return false; } + forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx: Ctx) { return ctx; } + constructor() { } + } + + class SingletonMask implements Mask { + '@type': 'mask' + size = 1; + has(i: number) { return i === this.idx; } + forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx?: Ctx) { f(this.idx, ctx); return ctx; } + constructor(private idx: number) { } + } + + class BitMask implements Mask { + '@type': 'mask' + private length: number; + has(i: number) { return i < this.length && !!this.mask[i] as any; } + + private _forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx: Ctx | undefined) { + for (let i = 0; i < this.length; i++) { + if (this.mask[i]) f(i, ctx); + } + } + forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx?: Ctx) { + this._forEach(f, ctx); + return ctx; + } + constructor(private mask: boolean[], public size: number) { this.length = mask.length; } + } + + class AllMask implements Mask { + '@type': 'mask' + has(i: number) { return true; } + private _forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx: Ctx | undefined) { + for (let i = 0; i < this.size; i++) { + f(i, ctx); + } + } + forEach<Ctx>(f: (i: number, ctx?: Ctx) => void, ctx?: Ctx) { + this._forEach(f, ctx); + return ctx; + } + constructor(public size: number) { } + } + + class SetMask implements Mask { + '@type': 'mask' + private _flat: number[] | undefined = void 0; + size: number; + has(i: number) { return this.set.has(i); } + + private _forEach<Ctx>(f: (i: number, ctx: Ctx) => void, ctx: Ctx) { + for (const idx of this.flatten()) { + f(idx, ctx); + } + } + private flatten() { + if (this._flat) return this._flat; + const indices = new Int32Array(this.size); + let offset = 0 + this.set.forEach(i => indices[offset++] = i); + sortAsc(indices); + this._flat = indices as any as number[]; + return this._flat; + } + forEach<Ctx>(f: (i: number, ctx: Ctx) => void, ctx: Ctx) { + this._forEach(f, ctx); + return ctx; + } + constructor(private set: Set<number>) { + this.size = set.size; + } + } + + export function always(size: number) { return new AllMask(size); } + export const never = new EmptyMask(); + + export function ofSet(set: Set<number>): Mask { + return new SetMask(set); + } + + export function singleton(i: number) { + return new SingletonMask(i); + } + + export function ofUniqueIndices(indices: ArrayLike<number>): Mask { + const len = indices.length; + if (len === 0) return new EmptyMask(); + if (len === 1) return new SingletonMask(indices[0]); + + let max = 0; + for (const i of (indices as number[])) { + if (i > max) max = i; + } + if (len === max) return new AllMask(len); + + const f = len / max; + if (f < 1 / 12) { + const set = new Set<number>(); + for (const i of (indices as number[])) set.add(i); + return new SetMask(set); + } + + const mask = new Int8Array(max + 1); + for (const i of (indices as number[])) { + mask[i] = 1; + } + return new BitMask(mask as any as boolean[], indices.length); + } + + export function ofMask(mask: boolean[], size: number): Mask { + return new BitMask(mask, size); + } + + export function hasAny(mask: Mask, xs: number[]) { + for (const x of xs) { + if (mask.has(x)) return true; + } + return false; + } + + export function complement(mask: Mask, against: Mask) { + let count = 0 + let max = 0 + against.forEach(i => { + if (!mask.has(i)) { + count++; + if (i > max) max = i; + } + }); + + if (count / max < 1 / 12) { + // set based + const set = new Set<number>() + against.forEach(i => { + if (!mask.has(i)) { + set.add(i); + } + }); + return ofSet(set); + } else { + // mask based + const target = new Uint8Array(max + 1); + against.forEach(i => { + if (!mask.has(i)) { + target[i] = 1; + } + }); + return ofMask(target as any as boolean[], count); + } + } +} + +export default Mask; \ No newline at end of file diff --git a/src/script.ts b/src/script.ts deleted file mode 100644 index 5eee1e26d2f092072357a59efe72b32a80d831ae..0000000000000000000000000000000000000000 --- a/src/script.ts +++ /dev/null @@ -1,207 +0,0 @@ -/** - * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - * @author David Sehnal <david.sehnal@gmail.com> - */ - -import * as util from 'util' -import * as fs from 'fs' - -require('util.promisify').shim(); -const readFileAsync = util.promisify(fs.readFile); - -import Gro from 'mol-io/reader/gro/parser' -import Csv from 'mol-io/reader/csv/parser' -import CIF from 'mol-io/reader/cif' - -import Computation from 'mol-util/computation' - -import { Model } from 'mol-model/structure' - -const file = '1crn.gro' -// const file = 'water.gro' -// const file = 'test.gro' -// const file = 'md_1u19_trj.gro' - -function showProgress(tag: string, p: Computation.Progress) { - console.log(`[${tag}] ${p.message} ${p.isIndeterminate ? '' : (p.current / p.max * 100).toFixed(2) + '% '}(${p.elapsedMs | 0}ms)`) -} - -async function runGro(input: string) { - console.time('parseGro'); - const comp = Gro(input); - - const ctx = Computation.observable({ updateRateMs: 150, observer: p => showProgress('GRO', p) }); - const parsed = await comp(ctx); - console.timeEnd('parseGro'); - - if (parsed.isError) { - console.log(parsed); - return; - } - - const groFile = parsed.result - - console.log('structure count: ', groFile.structures.length); - - const data = groFile.structures[0]; - - // const header = groFile.blocks[0].getCategory('header') - const { header, atoms } = data; - console.log(JSON.stringify(header, null, 2)); - console.log('number of atoms:', atoms.count); - - console.log(`'${atoms.residueNumber.value(1)}'`) - console.log(`'${atoms.residueName.value(1)}'`) - console.log(`'${atoms.atomName.value(1)}'`) - console.log(atoms.z.value(1)) - console.log(`'${atoms.z.value(1)}'`) - - const n = atoms.count; - console.log('rowCount', n) - - console.time('getFloatArray x') - const x = atoms.x.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray x') - console.log(x.length, x[0], x[x.length - 1]) - - console.time('getFloatArray y') - const y = atoms.y.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray y') - console.log(y.length, y[0], y[y.length - 1]) - - console.time('getFloatArray z') - const z = atoms.z.toArray({ array: Float32Array }) - console.timeEnd('getFloatArray z') - console.log(z.length, z[0], z[z.length - 1]) - - console.time('getIntArray residueNumber') - const residueNumber = atoms.residueNumber.toArray({ array: Int32Array }) - console.timeEnd('getIntArray residueNumber') - console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1]) -} - -export async function _gro() { - const input = await readFileAsync(`./examples/${file}`, 'utf8') - runGro(input) -} - -// _gro() - -async function runCIF(input: string | Uint8Array) { - console.time('parseCIF'); - const comp = typeof input === 'string' ? CIF.parseText(input) : CIF.parseBinary(input); - - const ctx = Computation.observable({ updateRateMs: 250, observer: p => showProgress('CIF', p) }); - const parsed = await comp(ctx); - console.timeEnd('parseCIF'); - if (parsed.isError) { - console.log(parsed); - return; - } - - const data = parsed.result.blocks[0]; - const atom_site = data.categories._atom_site; - console.log(atom_site.getField('Cartn_x')!.float(0)); - //console.log(atom_site.getField('label_atom_id')!.toStringArray()); - - const mmcif = CIF.schema.mmCIF(data); - console.log(mmcif.atom_site.Cartn_x.value(0)); - console.log(mmcif.entity.type.toArray()); - console.log(mmcif.pdbx_struct_oper_list.matrix.value(0)); - - console.time('createModels'); - const models = Model.create({ kind: 'mmCIF', data: mmcif }); - console.timeEnd('createModels'); - - for (let i = 0; i < models.length; i++) { - console.log(models[i].id, models[i].conformation.id); - } - - // console.log(models[0].hierarchy.isMonotonous); - // console.log(models[0].hierarchy.atoms.type_symbol.value(0)); - // console.log(models[0].hierarchy.residues.auth_comp_id.value(0)); - // console.log(models[0].hierarchy.residues.auth_comp_id.value(1)); - // console.log(models[0].hierarchy.chains.auth_asym_id.value(0)); - // console.log(models[0].hierarchy.chains.auth_asym_id.value(1)); - // console.log(models[0].hierarchy.chains.label_asym_id.value(1)); - // console.log(models[0].conformation.x[0]); - // console.log(models[0].conformation.y[0]); - // console.log(models[0].conformation.z[0]); - - // const schema = await _dic() - // if (schema) { - // const mmcif2 = applySchema(schema, data) - // // console.log(util.inspect(mmcif2.atom_site, {showHidden: false, depth: 3})) - // console.log(mmcif2.atom_site.Cartn_x.value(0)); - // console.log(mmcif2.entity.type.toArray()); - // // console.log(mmcif2.pdbx_struct_oper_list.matrix.value(0)); // TODO - // } else { - // console.log('error getting mmcif schema from dic') - // } -} - -export async function _cif() { - let path = `./examples/1grm_updated.cif`; - // path = '../test/3j3q.cif' // lets have a relative path for big test files - // path = 'e:/test/quick/3j3q_updated.cif'; - const input = await readFileAsync(path, 'utf8') - console.log('------------------'); - console.log('Text CIF:'); - runCIF(input); - - path = `./examples/1cbs_full.bcif`; - - const input2 = await readFileAsync(path) - console.log('------------------'); - console.log('BinaryCIF:'); - const data = new Uint8Array(input2.byteLength); - for (let i = 0; i < input2.byteLength; i++) data[i] = input2[i]; - runCIF(input2); -} - -// _cif(); - -const comp = Computation.create(async ctx => { - for (let i = 0; i < 0; i++) { - await new Promise(res => setTimeout(res, 500)); - if (ctx.requiresUpdate) await ctx.update({ message: 'working', current: i, max: 2 }); - } - return 42; -}); -export async function testComp() { - const ctx = Computation.observable({ observer: p => showProgress('test', p) }); - const ret = await comp(ctx); - console.log('computation returned', ret); -} -// testComp(); - - -const csvString = ` Year,Make,Model,Length -1997,Ford,"E350 - -MOIN",2.34 -2000,Mercury, Cougar,2.38` - -export async function testCsv () { - const parsed = await Csv(csvString)(); - - if (parsed.isError) { - console.log(parsed) - return; - } - - const csvFile = parsed.result; - csvFile.table.columnNames.forEach(name => { - const col = csvFile.table.getColumn(name) - if (col) console.log(name, col.toStringArray()) - }) - - const year = csvFile.table.getColumn('Year') - if (year) console.log('(int)Year', year.toIntArray()) - - const length = csvFile.table.getColumn('Length') - if (length) console.log('(float)Length', length.toFloatArray()) -} -testCsv() \ No newline at end of file