From a81bcf4311624b15525cfe4b376c667fbf40cbdd Mon Sep 17 00:00:00 2001 From: David Sehnal <david.sehnal@gmail.com> Date: Sat, 2 Mar 2019 12:57:58 +0100 Subject: [PATCH] mol-model: added sourceIndex to AtomicHierarchy --- package-lock.json | Bin 609362 -> 607562 bytes .../structure/mmcif/atomic.ts | 12 +++-- src/mol-model-formats/structure/mmcif/ihm.ts | 1 + .../structure/mmcif/parser.ts | 41 +++++++++++++----- src/mol-model-formats/structure/mmcif/sort.ts | 9 +++- .../model/properties/atomic/hierarchy.ts | 9 +++- 6 files changed, 50 insertions(+), 22 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4444de29a715c5abccb6351afde6e97dd2a96c96..00f2081bcfdf7d63fc7301f534af8309da642912 100644 GIT binary patch delta 1504 zcmcaKL-kab>IQYM$qRJlHoJH|+dKUME2GSIR}Mz$rpXH~h)r)`XJef%@QYP+`jsWD zg3~{6F^O*9-o<F{1(My)dX>?bV|qaVi^O*SyNm)Z(-lmaB(|3+GOdo7zCnXkV)`ys zX3^;a^O*!DCuDO^SD4PhGToqtDSEn&F`MxAA5~28jFT_?7oT2W#l$*&LJd>?WQPsX z)8FNBXl#$KWzvC2?yqA?0ISmK=a8D7Th9~>=KZzd;GeEl#KF5=pn+)yH%M&zoo*%$ zMz9%q*32r~9eSC>Al$w_ra2%L(-RgjvP}Qp%po$}po@`j`hQ1e!Rh)9Os3Nn%s92C zPuR`MI$eJalhX9pD_Oaw3)nNWOcwYf0nue+&nz<i!yZPy$q%ORPEV*~@|b>LI+Nyf z0Xt^y>Hk5J4Yf=X(_`xxIj1`~F!N47Fo}_Gdb|U(;PeX}Os3lpOk{H706Ri=9y|NA znQWZX=a{i^PT%0g%rg1GKfdV`Y*~b+KbXlBJzZ-ehcMWx>HZTr1VKS&3ig%Wc2<?i z2mbL)xAI|@nl8}5R5iWpH9OCAcOPb6Fn9ZjIZTo4(+~KtOH6-d#>_X}U=9Z_$g2v| z4H}rDw!dD&w1a8+f#pn0lN~mMPUrv6%sT!3A`aQ<0#{j~O3tlfs$-sf;lJJV3Gdmt zrz?oDv4ZrfO@1J&yj^xZ(|z{o8#b})Pe0qux^?=2Tg(#Mzi(%<&;X@}>H8Njc2D=X z&&0d^-UFuDF4Gmdnfa$b$l?&(J~x}gmkGkZpwBEYy|0f+X!_h5Cf?}*_RK=lqw_d| zr$1=oP}u%HkE5LtNnt`Q6Yul~TFmiCV)fe0l}OxmtJwLsYZY;n^Fjm{gs=#1|8L2x z&p5q7msx&$o;7nGBgpj2VEJiw%styDv~aL6O>Zz@R-V2gkePk^{WgwhPO$WJu=~zi zva@Y(n82Y3F=P2ZZ)PqeX@#}yEYmO4aBy!on8R_L8Ent=+sm2Mr%h(zn4Z(aB)+|N z0mpNQ=ILiO7!{{0_%NwV_cLZ@+x~tL$2Jxu!{XwZxu(Bg&H?tKar^!x=Jx$bEZg@d zu?ELa7qDlM*sghqRYV3B!Xn!<?y<IWfVkV)p0iFAo9@udsJVUrUsgZS=>n6ORX6`w zo5VPMK?tMz<c9Uu(-k@y*|z@{VVfWTj=1e-yID6dPB-|-$g|z=AoDS%?SF5u#xZW6 zaE#fQar+q^wxgQU6$;oSwyy|dlYa}=0*dQ{(@(i`@=xzq=j5EeeiD<wWPu~r({HPD z%4|1vWtSD0J|TxuV*0#~jFQ`%LfM^iK&}HNhq`I4?2^-;2Qf-c_kYU5vwiv@_LFSW z1^%&1Y%f2_{&XQE7)Fx_Nl9e-N`DT%?Hg8b{Hg?p^L=Gb_U$K>IAz7a+=RufV%vo* HIUR%n)*d77 delta 1330 zcmX@rrFv<G>IQYM=@Tw9iA}CIV&ClQ^=vPgH+{kv4$ke~9E{>klLdIir~P7O-Co<p zSmZVR!gfZ9?QB;WjX1yxxBK5^6m)?w%N3c{L`)ZGWHQ-qRmH@_1ZGYT&E$}up0J!< zaC&DZ2mkc{txUPoe`IoKOkdE##Il{QmPvyVqR_XFDFMRTtH&la{k#S<@AMC~Ol}ad zD1T=D=`otjywk3*C`>P?Vv5>+ubxSc8zP%t#UVWXf)<Cs^u8(%{^|YgOfl1UTd})M zYh%ipE?~lGGrj66lk)TiR#p+P-sula*;%Lan=-Rc=kH}ooPJ?0lg0E0s?0*uA6PPr zOg>j*Gu^?8S$J9>lRCtL2d3;S)6cGCVxPXoikWx%gBoUm=>c}kQ2!XLVlvzQyN@ZG z5n|E?Csx+U=egLXA86nZ-adZ<6EDZK*-WO>Wp=XhPxtEL;GO<p2Gb*m+Pp3%gYCSt zm>M`BHfaSh%WOZeh{=c<EWe$18Pij!u?|6utkX9rvYAf5pw7rT{lWwef$cFXm{?gL zIxE&N#e!HG+aIi9`p!OmLIS(^bOQ~>C)+P?WwOwiu28`&Io-jGy=MA>nT$N!SKnh= z=`uYaox^c@oIW$h_8MhoSElI>q0A1TkZ75{z>e8r`;SbH4n~OJ{A`XK2y4%DCUE@3 zL%0SDnE1EHXfl`cLil>~nAo=GR&i)Cg3Xw|rjtWpx`GL($n*tX%qrXeRdc98WzJbL zZ-MC9;Ka%@JwJ*W9KMARefOf6t+v0e<5&$*z1_i<IgJydobN6R+jfU84n>I6@`i2> zE|84l_61Wpt}=u5Z<h~a)@7P*;Kb}Wy?-l{0>t(W5zKp7KuR5^E#$}qF$|_ZxWcTm z-FG#!7*o4#0&}}<0?T&W1XkNvNV1-|msL~-9(Y3AFWh8p=K%2~w&y)!ogfAYXZ|2| z!R_ilS^Y%8x~2>KWais`pouk!ar+;BwjTlzRR$W2XQnGGV&a|NSIH(Y-NBSaa=JnX z>rqII+*e|g(T8Z)+s!JoT`!1@<t>uN1U*Kc>8`?@{L{nkvC2+g5XNM_z14x;5^DGa z5e~`i?*rJKazL&FNA=Y0_x7^CV*{JH{qYg@XA4IooEaM7wh7D+n6?+(<6vdoo*&J` m&$xZUDh`pt=?`Q%<+dM`=9Co!F(sy-oX0A<oy&;RK^OpVug+Ki diff --git a/src/mol-model-formats/structure/mmcif/atomic.ts b/src/mol-model-formats/structure/mmcif/atomic.ts index b43be7fe7..38e39f5b6 100644 --- a/src/mol-model-formats/structure/mmcif/atomic.ts +++ b/src/mol-model-formats/structure/mmcif/atomic.ts @@ -16,11 +16,8 @@ import { ElementSymbol } from 'mol-model/structure/model/types'; import { Entities } from 'mol-model/structure/model/properties/common'; import { getAtomicRanges } from 'mol-model/structure/model/properties/utils/atomic-ranges'; import { getAtomicDerivedData } from 'mol-model/structure/model/properties/utils/atomic-derived'; -import { ModelFormat } from '../format'; -import mmCIF_Format = ModelFormat.mmCIF import { FormatData } from './parser'; - type AtomSite = mmCIF_Database['atom_site'] function findHierarchyOffsets(atom_site: AtomSite) { @@ -45,13 +42,14 @@ function findHierarchyOffsets(atom_site: AtomSite) { return { residues, chains }; } -function createHierarchyData(atom_site: AtomSite, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData { +function createHierarchyData(atom_site: AtomSite, sourceIndex: Column<number>, offsets: { residues: ArrayLike<number>, chains: ArrayLike<number> }): AtomicData { const atoms = Table.ofColumns(AtomsSchema, { type_symbol: Column.ofArray({ array: Column.mapToArray(atom_site.type_symbol, ElementSymbol), schema: Column.Schema.Aliased<ElementSymbol>(Column.Schema.str) }), label_atom_id: atom_site.label_atom_id, auth_atom_id: atom_site.auth_atom_id, label_alt_id: atom_site.label_alt_id, - pdbx_formal_charge: atom_site.pdbx_formal_charge + pdbx_formal_charge: atom_site.pdbx_formal_charge, + sourceIndex }); const residues = Table.view(atom_site, ResiduesSchema, offsets.residues); // Optimize the numeric columns @@ -80,9 +78,9 @@ function isHierarchyDataEqual(a: AtomicData, b: AtomicData) { && Table.areEqual(a.atoms as Table<AtomsSchema>, b.atoms as Table<AtomsSchema>) } -export function getAtomicHierarchyAndConformation(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model) { +export function getAtomicHierarchyAndConformation(atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, formatData: FormatData, previous?: Model) { const hierarchyOffsets = findHierarchyOffsets(atom_site); - const hierarchyData = createHierarchyData(atom_site, hierarchyOffsets); + const hierarchyData = createHierarchyData(atom_site, sourceIndex, hierarchyOffsets); if (previous && isHierarchyDataEqual(previous.atomicHierarchy, hierarchyData)) { return { diff --git a/src/mol-model-formats/structure/mmcif/ihm.ts b/src/mol-model-formats/structure/mmcif/ihm.ts index 731af9e3a..405fec8cf 100644 --- a/src/mol-model-formats/structure/mmcif/ihm.ts +++ b/src/mol-model-formats/structure/mmcif/ihm.ts @@ -21,6 +21,7 @@ export interface IHMData { model_name: string, entities: Entities, atom_site: mmCIF['atom_site'], + atom_site_sourceIndex: Column<number>, ihm_sphere_obj_site: mmCIF['ihm_sphere_obj_site'], ihm_gaussian_obj_site: mmCIF['ihm_gaussian_obj_site'] } diff --git a/src/mol-model-formats/structure/mmcif/parser.ts b/src/mol-model-formats/structure/mmcif/parser.ts index 17d4f3a5b..44835f9e9 100644 --- a/src/mol-model-formats/structure/mmcif/parser.ts +++ b/src/mol-model-formats/structure/mmcif/parser.ts @@ -169,8 +169,8 @@ function getFormatData(format: mmCIF_Format): FormatData { } } -function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities: Entities, formatData: FormatData, previous?: Model): Model { - const atomic = getAtomicHierarchyAndConformation(format, atom_site, entities, formatData, previous); +function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, sourceIndex: Column<number>, entities: Entities, formatData: FormatData, previous?: Model): Model { + const atomic = getAtomicHierarchyAndConformation(atom_site, sourceIndex, entities, formatData, previous); if (previous && atomic.sameAsPrevious) { return { ...previous, @@ -209,7 +209,7 @@ function createStandardModel(format: mmCIF_Format, atom_site: AtomSite, entities } function createModelIHM(format: mmCIF_Format, data: IHMData, formatData: FormatData): Model { - const atomic = getAtomicHierarchyAndConformation(format, data.atom_site, data.entities, formatData); + const atomic = getAtomicHierarchyAndConformation(data.atom_site, data.atom_site_sourceIndex, data.entities, formatData); const coarse = getIHMCoarse(data, formatData); return { @@ -255,8 +255,8 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatDat let modelStart = 0; while (modelStart < atomCount) { const modelEnd = findModelEnd(format.data.atom_site.pdbx_PDB_model_num, modelStart); - const atom_site = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); - const model = createStandardModel(format, atom_site, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); + const { atom_site, sourceIndex } = await sortAtomSite(ctx, format.data.atom_site, modelStart, modelEnd); + const model = createStandardModel(format, atom_site, sourceIndex, entities, formatData, models.length > 0 ? models[models.length - 1] : void 0); attachProps(model); models.push(model); modelStart = modelEnd; @@ -265,14 +265,17 @@ async function readStandard(ctx: RuntimeContext, format: mmCIF_Format, formatDat } function splitTable<T extends Table<any>>(table: T, col: Column<number>) { - const ret = new Map<number, T>() + const ret = new Map<number, { table: T, start: number, end: number }>() const rowCount = table._rowCount; let modelStart = 0; while (modelStart < rowCount) { const modelEnd = findModelEnd(col, modelStart); const id = col.value(modelStart); - const window = Table.window(table, table._schema, modelStart, modelEnd) as T; - ret.set(id, window); + ret.set(id, { + table: Table.window(table, table._schema, modelStart, modelEnd) as T, + start: modelStart, + end: modelEnd + }); modelStart = modelEnd; } return ret; @@ -286,8 +289,9 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: Fo throw new Error('expected _atom_site.ihm_model_id to be defined') } - // TODO: will IHM require sorting or will we trust it? const atom_sites = splitTable(format.data.atom_site, format.data.atom_site.ihm_model_id); + // TODO: will coarse IHM records require sorting or will we trust it? + // ==> Probably implement a sort as as well and store the sourceIndex same as with atomSite const sphere_sites = splitTable(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site.model_id); const gauss_sites = splitTable(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site.model_id); @@ -296,13 +300,26 @@ async function readIHM(ctx: RuntimeContext, format: mmCIF_Format, formatData: Fo const { model_id, model_name } = ihm_model_list; for (let i = 0; i < ihm_model_list._rowCount; i++) { const id = model_id.value(i); + + let atom_site, atom_site_sourceIndex; + if (atom_sites.has(id)) { + const e = atom_sites.get(id)!; + const { atom_site: sorted, sourceIndex } = await sortAtomSite(ctx, e.table, e.start, e.end); + atom_site = sorted; + atom_site_sourceIndex = sourceIndex; + } else { + atom_site = Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0); + atom_site_sourceIndex = Column.ofIntArray([]); + } + const data: IHMData = { model_id: id, model_name: model_name.value(i), entities: entities, - atom_site: atom_sites.has(id) ? atom_sites.get(id)! : Table.window(format.data.atom_site, format.data.atom_site._schema, 0, 0), - ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)! : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), - ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)! : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) + atom_site, + atom_site_sourceIndex, + ihm_sphere_obj_site: sphere_sites.has(id) ? sphere_sites.get(id)!.table : Table.window(format.data.ihm_sphere_obj_site, format.data.ihm_sphere_obj_site._schema, 0, 0), + ihm_gaussian_obj_site: gauss_sites.has(id) ? gauss_sites.get(id)!.table : Table.window(format.data.ihm_gaussian_obj_site, format.data.ihm_gaussian_obj_site._schema, 0, 0) }; const model = createModelIHM(format, data, formatData); attachProps(model); diff --git a/src/mol-model-formats/structure/mmcif/sort.ts b/src/mol-model-formats/structure/mmcif/sort.ts index 7cbee9e5b..868cd5dd8 100644 --- a/src/mol-model-formats/structure/mmcif/sort.ts +++ b/src/mol-model-formats/structure/mmcif/sort.ts @@ -9,6 +9,8 @@ import { createRangeArray, makeBuckets } from 'mol-data/util'; import { Column, Table } from 'mol-data/db'; import { RuntimeContext } from 'mol-task'; +export type SortedAtomSite = mmCIF_Database['atom_site'] & { sourceIndex: Column<number> } + function isIdentity(xs: ArrayLike<number>) { for (let i = 0, _i = xs.length; i < _i; i++) { if (xs[i] !== i) return false; @@ -36,8 +38,11 @@ export async function sortAtomSite(ctx: RuntimeContext, atom_site: mmCIF_Databas } if (isIdentity(indices) && indices.length === atom_site._rowCount) { - return atom_site; + return { atom_site, sourceIndex: Column.ofIntArray(indices) }; } - return Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site']; + return { + atom_site: Table.view(atom_site, atom_site._schema, indices) as mmCIF_Database['atom_site'], + sourceIndex: Column.ofIntArray(indices) + }; } \ No newline at end of file diff --git a/src/mol-model/structure/model/properties/atomic/hierarchy.ts b/src/mol-model/structure/model/properties/atomic/hierarchy.ts index 33c599324..7262fe28a 100644 --- a/src/mol-model/structure/model/properties/atomic/hierarchy.ts +++ b/src/mol-model/structure/model/properties/atomic/hierarchy.ts @@ -38,7 +38,14 @@ export const AtomsSchema = { * The net integer charge assigned to this atom. * This is the formal charge assignment normally found in chemical diagrams. */ - pdbx_formal_charge: mmCIF.atom_site.pdbx_formal_charge + pdbx_formal_charge: mmCIF.atom_site.pdbx_formal_charge, + + /** + * The index of this atom in the input data. + * Required because of sorting of atoms. + */ + sourceIndex: Column.Schema.int + // id, occupancy and B_iso_or_equiv are part of conformation }; -- GitLab