diff --git a/data/mmcif-field-names.csv b/data/mmcif-field-names.csv index f05f299f39864c70572447630a3bf934951ef3cc..137c35a8cdcf492b237cbd5eab50c3a44da46e16 100644 --- a/data/mmcif-field-names.csv +++ b/data/mmcif-field-names.csv @@ -93,12 +93,24 @@ entity_poly_seq.hetero entity_src_gen.entity_id entity_src_gen.pdbx_src_id -entity_src_gen.pdbx_alt_source_flag -entity_src_gen.pdbx_seq_type +entity_src_gen.pdbx_alt_source_flag +entity_src_gen.pdbx_seq_type entity_src_gen.pdbx_beg_seq_num entity_src_gen.pdbx_end_seq_num entity_src_gen.pdbx_gene_src_gene +entity_src_nat.entity_id +entity_src_nat.pdbx_src_id +entity_src_nat.pdbx_alt_source_flag +entity_src_nat.pdbx_beg_seq_num +entity_src_nat.pdbx_end_seq_num + +pdbx_entity_src_syn.entity_id +pdbx_entity_src_syn.pdbx_src_id +pdbx_entity_src_syn.pdbx_alt_source_flag +pdbx_entity_src_syn.pdbx_beg_seq_num +pdbx_entity_src_syn.pdbx_end_seq_num + pdbx_entity_branch.entity_id pdbx_entity_branch.type diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index 39d20bcd83d4312a874eb2598fe6132005316e87..dc6c7747ae0f7c4de6a86bd4d4669857cfe24ece 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -1,7 +1,7 @@ /** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * - * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft. + * Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.310, IHM 0.141, CARB draft. * * @author mol-star package (src/apps/schema-generator/generate) */ @@ -1964,6 +1964,40 @@ export const mmCIF_Schema = { */ details: str, }, + /** + * Data items in the ENTITY_SRC_NAT category record details of + * the source from which the entity was obtained in cases + * where the entity was isolated directly from a natural tissue. + */ + entity_src_nat: { + /** + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * This data item is an ordinal identifier for entity_src_nat data records. + */ + pdbx_src_id: int, + /** + * This data item identifies cases in which an alternative source + * modeled. + */ + pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str), + /** + * The beginning polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_beg_seq_num: int, + /** + * The ending polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_end_seq_num: int, + }, /** * Data items in the ENTITY_SRC_GEN category record details of * the source from which the entity was obtained in cases @@ -2010,6 +2044,39 @@ export const mmCIF_Schema = { */ pdbx_end_seq_num: int, }, + /** + * The data items in category PDBX_ENTITY_SRC_SYN record the source details + * about chemically synthesized molecules. + */ + pdbx_entity_src_syn: { + /** + * This data item is a pointer to _entity.id in the ENTITY category. + */ + entity_id: str, + /** + * This data item is an ordinal identifier for pdbx_entity_src_syn data records. + */ + pdbx_src_id: int, + /** + * This data item identifies cases in which an alternative source + * modeled. + */ + pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str), + /** + * The beginning polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_beg_seq_num: int, + /** + * The ending polymer sequence position for the polymer section corresponding + * to this source. + * + * A reference to the sequence position in the entity_poly category. + */ + pdbx_end_seq_num: int, + }, /** * Data items in the PDBX_ENTITY_DESCRIPTOR category provide * string descriptors of entity chemical structure. diff --git a/src/mol-theme/color.ts b/src/mol-theme/color.ts index b82996be6a92aa90ee0bd5101e55c1ca462fb3a2..bab8e130c4fbb0ce67bf44413004a83d355dbfad 100644 --- a/src/mol-theme/color.ts +++ b/src/mol-theme/color.ts @@ -27,7 +27,7 @@ import { UnitIndexColorThemeProvider } from './color/unit-index'; import { ScaleLegend } from 'mol-util/color/scale'; import { TableLegend } from 'mol-util/color/tables'; import { UncertaintyColorThemeProvider } from './color/uncertainty'; -import { GeneColorThemeProvider } from './color/gene'; +import { EntitySourceColorThemeProvider } from './color/entity-source'; import { IllustrativeColorThemeProvider } from './color/illustrative'; import { HydrophobicityColorThemeProvider } from './color/hydrophobicity'; @@ -76,7 +76,7 @@ export const BuiltInColorThemes = { 'cross-link': CrossLinkColorThemeProvider, 'element-index': ElementIndexColorThemeProvider, 'element-symbol': ElementSymbolColorThemeProvider, - 'gene': GeneColorThemeProvider, + 'entity-source': EntitySourceColorThemeProvider, 'hydrophobicity': HydrophobicityColorThemeProvider, 'illustrative': IllustrativeColorThemeProvider, 'molecule-type': MoleculeTypeColorThemeProvider, diff --git a/src/mol-theme/color/entity-source.ts b/src/mol-theme/color/entity-source.ts new file mode 100644 index 0000000000000000000000000000000000000000..828f0b824a957d4d49f7f172ab388cc9b302f565 --- /dev/null +++ b/src/mol-theme/color/entity-source.ts @@ -0,0 +1,143 @@ +/** + * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { StructureProperties, StructureElement, Link, Model } from 'mol-model/structure'; +import { ColorScale, Color } from 'mol-util/color'; +import { Location } from 'mol-model/location'; +import { ColorTheme, LocationColor } from '../color'; +import { ParamDefinition as PD } from 'mol-util/param-definition' +import { ThemeDataContext } from 'mol-theme/theme'; +import { ColorListOptions, ColorListName } from 'mol-util/color/scale'; +import { Table, Column } from 'mol-data/db'; +import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif'; + +const DefaultColor = Color(0xCCCCCC) +const Description = 'Gives ranges of a polymer chain a color based on the entity source it originates from. Genes get the same color per entity' + +export const EntitySourceColorThemeParams = { + list: PD.ColorScale<ColorListName>('RedYellowBlue', ColorListOptions), +} +export type EntitySourceColorThemeParams = typeof EntitySourceColorThemeParams +export function getEntitySourceColorThemeParams(ctx: ThemeDataContext) { + return EntitySourceColorThemeParams // TODO return copy +} + +function modelEntityKey(modelIndex: number, entityId: string) { + return `${modelIndex}|${entityId}` +} + +type EntitySrc = Table<{ + entity_id: mmCIF_Schema['entity_src_gen']['entity_id'], + pdbx_src_id: mmCIF_Schema['entity_src_gen']['pdbx_src_id'], + pdbx_beg_seq_num: mmCIF_Schema['entity_src_gen']['pdbx_beg_seq_num'], + pdbx_end_seq_num: mmCIF_Schema['entity_src_gen']['pdbx_end_seq_num'], +}> +type GeneSrcGene = Column<mmCIF_Schema['entity_src_gen']['pdbx_gene_src_gene']['T']> + +function srcKey(modelIndex: number, entityId: string, srcId: number, gene: string) { + return `${modelIndex}|${entityId}|${gene ? gene : srcId}` +} + +function addSrc(seqToSrcByModelEntity: Map<string, Int16Array>, srcKeySerialMap: Map<string, number>, modelIndex: number, model: Model, entity_src: EntitySrc, gene_src_gene?: GeneSrcGene) { + const { entity_id, pdbx_src_id, pdbx_beg_seq_num, pdbx_end_seq_num } = entity_src + for (let j = 0, jl = entity_src._rowCount; j < jl; ++j) { + const entityId = entity_id.value(j) + const mK = modelEntityKey(modelIndex, entityId) + let seqToSrc: Int16Array + if (!seqToSrcByModelEntity.has(mK)) { + const entityIndex = model.entities.getEntityIndex(entityId) + const seq = model.sequence.sequences[entityIndex].sequence + seqToSrc = new Int16Array(seq.sequence.length) + seqToSrcByModelEntity.set(mK, seqToSrc) + } else { + seqToSrc = seqToSrcByModelEntity.get(mK)! + } + const sK = srcKey(modelIndex, entityId, pdbx_src_id.value(j), gene_src_gene ? gene_src_gene.value(j).join(',') : '') + + // may not be given (= 0) indicating src is for the whole seq + const beg = pdbx_beg_seq_num.valueKind(j) === Column.ValueKind.Present ? pdbx_beg_seq_num.value(j) : 1 + const end = pdbx_end_seq_num.valueKind(j) === Column.ValueKind.Present ? pdbx_end_seq_num.value(j) : seqToSrc.length + + let srcIndex: number // serial no starting from 1 + if (srcKeySerialMap.has(sK)) { + srcIndex = srcKeySerialMap.get(sK)! + } else { + srcIndex = srcKeySerialMap.size + 1 + srcKeySerialMap.set(sK, srcIndex) + } + // set src index + for (let i = beg, il = end; i <= il; ++i) { + seqToSrc[i - 1] = srcIndex + } + } +} + +export function EntitySourceColorTheme(ctx: ThemeDataContext, props: PD.Values<EntitySourceColorThemeParams>): ColorTheme<EntitySourceColorThemeParams> { + let color: LocationColor + const scale = ColorScale.create({ listOrName: props.list, minLabel: 'Start', maxLabel: 'End' }) + const { structure } = ctx + + if (structure) { + const l = StructureElement.create() + const { models } = structure + const seqToSrcByModelEntity = new Map<string, Int16Array>() + const srcKeySerialMap = new Map<string, number>() // serial no starting from 1 + + for (let i = 0, il = models.length; i <il; ++i) { + const m = models[i] + if (m.sourceData.kind !== 'mmCIF') continue + const { entity_src_gen, entity_src_nat, pdbx_entity_src_syn } = m.sourceData.data + addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, entity_src_gen, entity_src_gen.pdbx_gene_src_gene) + addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, entity_src_nat) + addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, pdbx_entity_src_syn) + } + scale.setDomain(1, srcKeySerialMap.size) + const scaleColor = scale.color + + const getSrcColor = (location: StructureElement) => { + const modelIndex = structure.models.indexOf(location.unit.model) + const entityId = StructureProperties.entity.id(location) + const mK = modelEntityKey(modelIndex, entityId) + const seqToSrc = seqToSrcByModelEntity.get(mK) + if (seqToSrc) { + // minus 1 to convert seqId to array index + return scaleColor(seqToSrc[StructureProperties.residue.label_seq_id(location) - 1]) + } else { + return DefaultColor + } + } + + color = (location: Location): Color => { + if (StructureElement.isLocation(location)) { + return getSrcColor(location) + } else if (Link.isLocation(location)) { + l.unit = location.aUnit + l.element = location.aUnit.elements[location.aIndex] + return getSrcColor(l) + } + return DefaultColor + } + } else { + color = () => DefaultColor + } + + return { + factory: EntitySourceColorTheme, + granularity: 'group', + color, + props, + description: Description, + legend: scale ? scale.legend : undefined + } +} + +export const EntitySourceColorThemeProvider: ColorTheme.Provider<EntitySourceColorThemeParams> = { + label: 'Entity Source', + factory: EntitySourceColorTheme, + getParams: getEntitySourceColorThemeParams, + defaultValues: PD.getDefaultValues(EntitySourceColorThemeParams), + isApplicable: (ctx: ThemeDataContext) => !!ctx.structure +} \ No newline at end of file diff --git a/src/mol-theme/color/gene.ts b/src/mol-theme/color/gene.ts deleted file mode 100644 index 1f0110e846f700fdb7e0a3e01319248d35b0b585..0000000000000000000000000000000000000000 --- a/src/mol-theme/color/gene.ts +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info. - * - * @author Alexander Rose <alexander.rose@weirdbyte.de> - */ - -import { StructureProperties, StructureElement, Link } from 'mol-model/structure'; -import { ColorScale, Color } from 'mol-util/color'; -import { Location } from 'mol-model/location'; -import { ColorTheme, LocationColor } from '../color'; -import { ParamDefinition as PD } from 'mol-util/param-definition' -import { ThemeDataContext } from 'mol-theme/theme'; -import { ColorListOptions, ColorListName } from 'mol-util/color/scale'; -import { NumberArray } from 'mol-util/type-helpers'; - -const DefaultColor = Color(0xCCCCCC) -const Description = 'Gives ranges of a polymer chain a color based on the gene (or linker/terminal extension) it originates from.' - -export const GeneColorThemeParams = { - list: PD.ColorScale<ColorListName>('RedYellowBlue', ColorListOptions), -} -export type GeneColorThemeParams = typeof GeneColorThemeParams -export function getGeneColorThemeParams(ctx: ThemeDataContext) { - return GeneColorThemeParams // TODO return copy -} - -function modelEntityKey(modelIndex: number, entityId: string) { - return `${modelIndex}|${entityId}` -} - -function addGene(geneSerialMap: Map<string, number>, geneNames: string[], beg: number, end: number, seqToSrcGen: NumberArray) { - const gene = geneNames.map(s => s.toUpperCase()).sort().join(',') - let geneIndex = 0 // serial no starting from 1 - if (gene === '') { - geneIndex = geneSerialMap.size + 1 - geneSerialMap.set(`UNKNOWN${geneIndex}`, geneIndex) - } else if (geneSerialMap.has(gene)) { - geneIndex = geneSerialMap.get(gene)! - } else { - geneIndex = geneSerialMap.size + 1 - geneSerialMap.set(gene, geneIndex) - } - for (let i = beg, il = end; i <= il; ++i) { - seqToSrcGen[i - 1] = geneIndex - } -} - -export function GeneColorTheme(ctx: ThemeDataContext, props: PD.Values<GeneColorThemeParams>): ColorTheme<GeneColorThemeParams> { - let color: LocationColor - const scale = ColorScale.create({ listOrName: props.list, minLabel: 'Start', maxLabel: 'End' }) - const { structure } = ctx - - if (structure) { - const l = StructureElement.create() - const { models } = structure - const seqToSrcGenByModelEntity = new Map<string, NumberArray>() - const geneSerialMap = new Map<string, number>() // serial no starting from 1 - for (let i = 0, il = models.length; i <il; ++i) { - const m = models[i] - if (m.sourceData.kind !== 'mmCIF') continue - const { entity_src_gen } = m.sourceData.data - - const { entity_id, pdbx_beg_seq_num, pdbx_end_seq_num, pdbx_gene_src_gene } = entity_src_gen - for (let j = 0, jl = entity_src_gen._rowCount; j < jl; ++j) { - const entityId = entity_id.value(j) - const k = modelEntityKey(i, entityId) - if (!seqToSrcGenByModelEntity.has(k)) { - const entityIndex = m.entities.getEntityIndex(entityId) - const seq = m.sequence.sequences[entityIndex].sequence - const seqLength = seq.sequence.length - const seqToGene = new Int16Array(seqLength) - addGene(geneSerialMap, pdbx_gene_src_gene.value(j), pdbx_beg_seq_num.value(j), pdbx_end_seq_num.value(j), seqToGene) - seqToSrcGenByModelEntity.set(k, seqToGene) - } else { - const seqToGene = seqToSrcGenByModelEntity.get(k)! - addGene(geneSerialMap, pdbx_gene_src_gene.value(j), pdbx_beg_seq_num.value(j), pdbx_end_seq_num.value(j), seqToGene) - seqToSrcGenByModelEntity.set(k, seqToGene) - } - } - } - scale.setDomain(1, geneSerialMap.size) - const scaleColor = scale.color - - const getGeneColor = (location: StructureElement) => { - const modelIndex = structure.models.indexOf(location.unit.model) - const entityId = StructureProperties.entity.id(location) - const k = modelEntityKey(modelIndex, entityId) - const seqToGene = seqToSrcGenByModelEntity.get(k) - if (seqToGene) { - // minus 1 to convert seqId to array index - return scaleColor(seqToGene[StructureProperties.residue.label_seq_id(location) - 1]) - } else { - return DefaultColor - } - } - - color = (location: Location): Color => { - if (StructureElement.isLocation(location)) { - return getGeneColor(location) - } else if (Link.isLocation(location)) { - l.unit = location.aUnit - l.element = location.aUnit.elements[location.aIndex] - return getGeneColor(l) - } - return DefaultColor - } - } else { - color = () => DefaultColor - } - - return { - factory: GeneColorTheme, - granularity: 'group', - color, - props, - description: Description, - legend: scale ? scale.legend : undefined - } -} - -export const GeneColorThemeProvider: ColorTheme.Provider<GeneColorThemeParams> = { - label: 'Gene', - factory: GeneColorTheme, - getParams: getGeneColorThemeParams, - defaultValues: PD.getDefaultValues(GeneColorThemeParams), - isApplicable: (ctx: ThemeDataContext) => !!ctx.structure -} \ No newline at end of file