Skip to content
Snippets Groups Projects
Unverified Commit e94ecf2a authored by David Sehnal's avatar David Sehnal Committed by GitHub
Browse files

Merge pull request #314 from ptourlas/feature/formal-charge-labels

Feature/formal charge labels
parents 9bd60f8e 1bd4d841
No related branches found
No related tags found
No related merge requests found
/**
* Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { parseMol } from '../mol/parser';
import { parseMol, formalChargeMapper } from '../mol/parser';
const MolString = `2244
-OEChem-04072009073D
......@@ -49,6 +56,48 @@ const MolString = `2244
13 20 1 0 0 0 0
M END`;
const MolStringWithAtomBlockCharge = `
Ketcher 1 72215442D 1 1.00000 0.00000 0
4 3 0 0 0 0 999 V2000
0.0000 0.0000 0.0000 C 0 1 0 0 0 0 0 0 0 0 0 0
0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
1 4 2 0 0 0 0
3 1 1 0 0 0 0
2 1 1 0 0 0 0
M END`;
const MolStringWithPropertyBlockCharge = `
Ketcher 1 72215442D 1 1.00000 0.00000 0
4 3 0 0 0 0 999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
1 4 2 0 0 0 0
3 1 1 0 0 0 0
2 1 1 0 0 0 0
M CHG 3 2 -1 3 1 4 1
M END`;
const MolStringWithMultipleChargeLines = `
Ketcher 1 72215442D 1 1.00000 0.00000 0
4 3 0 0 0 0 999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.8660 0.5000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
-0.8660 0.5000 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 -1.0000 0.0000 P 0 0 0 0 0 0 0 0 0 0 0 0
1 4 2 0 0 0 0
3 1 1 0 0 0 0
2 1 1 0 0 0 0
M CHG 1 2 -1
M CHG 2 3 1 4 1
M END`;
describe('mol reader', () => {
it('basic', async () => {
const parsed = await parseMol(MolString).run();
......@@ -70,4 +119,63 @@ describe('mol reader', () => {
expect(bonds.atomIdxB.value(20)).toBe(20);
expect(bonds.order.value(20)).toBe(1);
});
it('property block charges', async () => {
const parsed = await parseMol(MolStringWithPropertyBlockCharge).run();
if (parsed.isError) {
throw new Error(parsed.message);
}
const { formalCharges } = parsed.result;
expect(formalCharges.atomIdx.rowCount).toBe(3);
expect(formalCharges.charge.rowCount).toBe(3);
expect(formalCharges.atomIdx.value(0)).toBe(2);
expect(formalCharges.atomIdx.value(1)).toBe(3);
expect(formalCharges.charge.value(0)).toBe(-1);
expect(formalCharges.charge.value(1)).toBe(1);
});
it('multiple charge lines', async () => {
const parsed = await parseMol(MolStringWithMultipleChargeLines).run();
if (parsed.isError) {
throw new Error(parsed.message);
}
const { formalCharges } = parsed.result;
expect(formalCharges.atomIdx.rowCount).toBe(3);
expect(formalCharges.charge.rowCount).toBe(3);
expect(formalCharges.atomIdx.value(0)).toBe(2);
expect(formalCharges.atomIdx.value(1)).toBe(3);
expect(formalCharges.charge.value(0)).toBe(-1);
expect(formalCharges.charge.value(1)).toBe(1);
});
it('atom block charge mapping', async () => {
expect(formalChargeMapper(7)).toBe(-3);
expect(formalChargeMapper(6)).toBe(-2);
expect(formalChargeMapper(5)).toBe(-1);
expect(formalChargeMapper(0)).toBe(0);
expect(formalChargeMapper(3)).toBe(1);
expect(formalChargeMapper(2)).toBe(2);
expect(formalChargeMapper(1)).toBe(3);
expect(formalChargeMapper(4)).toBe(0);
});
it('atom block charges', async () => {
const parsed = await parseMol(MolStringWithAtomBlockCharge).run();
if (parsed.isError) {
throw new Error(parsed.message);
}
const { atoms, formalCharges } = parsed.result;
/* No property block charges */
expect(formalCharges.atomIdx.rowCount).toBe(0);
expect(formalCharges.charge.rowCount).toBe(0);
expect(atoms.formal_charge.value(0)).toBe(1);
expect(atoms.formal_charge.value(1)).toBe(0);
expect(atoms.formal_charge.value(2)).toBe(0);
expect(atoms.formal_charge.value(3)).toBe(0);
});
});
/**
* Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
* @author David Sehnal <david.sehnal@gmail.com>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { parseSdf } from '../sdf/parser';
......@@ -458,6 +465,38 @@ describe('sdf reader', () => {
expect(compound3.dataItems.data.value(21)).toBe('2\n5\n10');
});
it('charge parsing in V2000', async () => {
const parsed = await parseSdf(SdfString).run();
if (parsed.isError) {
throw new Error(parsed.message);
}
const compound1 = parsed.result.compounds[0];
const compound2 = parsed.result.compounds[1];
const compound3 = parsed.result.compounds[2];
const formalCharges1 = {
atomIdx: compound1.molFile.formalCharges.atomIdx,
charge: compound1.molFile.formalCharges.charge
};
const formalCharges2 = {
atomIdx: compound2.molFile.formalCharges.atomIdx,
charge: compound2.molFile.formalCharges.charge
};
const formalCharges3 = {
atomIdx: compound3.molFile.formalCharges.atomIdx,
charge: compound3.molFile.formalCharges.charge
};
expect(formalCharges1.atomIdx.rowCount).toBe(3);
expect(formalCharges2.atomIdx.rowCount).toBe(3);
expect(formalCharges3.atomIdx.rowCount).toBe(0);
expect(formalCharges1.charge.rowCount === formalCharges1.atomIdx.rowCount).toBe(true);
expect(formalCharges2.charge.rowCount === formalCharges2.atomIdx.rowCount).toBe(true);
expect(formalCharges3.charge.rowCount === formalCharges3.atomIdx.rowCount).toBe(true);
});
it('v3000', async () => {
const parsed = await parseSdf(V3000SdfString).run();
if (parsed.isError) {
......@@ -486,6 +525,11 @@ describe('sdf reader', () => {
expect(compound1.molFile.bonds.atomIdxB.value(10)).toBe(9);
expect(compound1.molFile.bonds.order.value(10)).toBe(2);
expect(compound1.molFile.formalCharges.atomIdx.rowCount).toBe(13);
for (let i = 0; i < compound1.molFile.atoms.count; i++) {
expect(compound1.molFile.formalCharges.charge.value(i)).toBe(0);
}
expect(compound1.dataItems.dataHeader.rowCount).toBe(2);
expect(compound1.dataItems.data.rowCount).toBe(2);
......
/**
* Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
* Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { Column } from '../../../mol-data/db';
......@@ -10,6 +11,7 @@ import { TokenColumnProvider as TokenColumn } from '../common/text/column/token'
import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
import { ReaderResult as Result } from '../result';
/** Subset of the MolFile V2000 format */
export interface MolFile {
readonly title: string,
......@@ -20,7 +22,8 @@ export interface MolFile {
readonly x: Column<number>,
readonly y: Column<number>,
readonly z: Column<number>,
readonly type_symbol: Column<string>
readonly type_symbol: Column<string>,
readonly formal_charge: Column<number>
},
readonly bonds: {
readonly count: number
......@@ -28,6 +31,57 @@ export interface MolFile {
readonly atomIdxB: Column<number>,
readonly order: Column<number>
}
readonly formalCharges: {
readonly atomIdx: Column<number>;
readonly charge: Column<number>;
}
}
/*
The atom lines in a .mol file have the following structure:
xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
---------------------------------------------------------------------
Below is a breakdown of each component and its start/end indices:
xxxxx.xxxx (X COORDINATE, 1-10)
yyyyy.yyyy (Y COORDINATE, 10-20)
zzzzz.zzzz (Z COORDINATE, 20-30)
_ (30 IS EMPTY)
aaa (ATOM SYMBOL, 31-34)
dd (MASS DIFF, 34-36)
ccc (FORMAL CHARGE, 36-39)
sss (ATOM STEREO PARITY, 39-42)
hhh (HYDROGEN COUNT+1, 42-45)
bbb (STEREO CARE BOX, 45-48)
vvv (VALENCE, 48-51)
HHH (H0 DESIGNATOR, 51-54)
rrr (UNUSED, 54-57)
iii (UNUSED, 57-60)
mmm (ATOM-ATOM MAPPING NUMBER, 60-63)
nnn (INVERSION/RETENTION FLAG, 63-66)
eee (EXACT CHANGE FLAG, 66-69)
*/
/**
* @param key - The value found at the atom block.
* @returns The actual formal charge based on the mapping.
*/
export function formalChargeMapper(key: number) {
switch (key) {
case 7: return -3;
case 6: return -2;
case 5: return -1;
case 0: return 0;
case 3: return 1;
case 2: return 2;
case 1: return 3;
case 4: return 0;
default:
console.error(`Value ${key} is outside the 0-7 range, defaulting to 0.`);
return 0;
}
}
export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms'] {
......@@ -35,6 +89,7 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
const y = TokenBuilder.create(tokenizer.data, count * 2);
const z = TokenBuilder.create(tokenizer.data, count * 2);
const type_symbol = TokenBuilder.create(tokenizer.data, count * 2);
const formal_charge = TokenBuilder.create(tokenizer.data, count * 2);
for (let i = 0; i < count; ++i) {
Tokenizer.markLine(tokenizer);
......@@ -47,6 +102,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
TokenBuilder.addUnchecked(z, tokenizer.tokenStart, tokenizer.tokenEnd);
Tokenizer.trim(tokenizer, s + 31, s + 34);
TokenBuilder.addUnchecked(type_symbol, tokenizer.tokenStart, tokenizer.tokenEnd);
Tokenizer.trim(tokenizer, s + 36, s + 39);
TokenBuilder.addUnchecked(formal_charge, tokenizer.tokenStart, tokenizer.tokenEnd);
tokenizer.position = position;
}
......@@ -55,7 +112,8 @@ export function handleAtoms(tokenizer: Tokenizer, count: number): MolFile['atoms
x: TokenColumn(x)(Column.Schema.float),
y: TokenColumn(y)(Column.Schema.float),
z: TokenColumn(z)(Column.Schema.float),
type_symbol: TokenColumn(type_symbol)(Column.Schema.str)
type_symbol: TokenColumn(type_symbol)(Column.Schema.str),
formal_charge: TokenColumn(formal_charge)(Column.Schema.int)
};
}
......@@ -84,6 +142,76 @@ export function handleBonds(tokenizer: Tokenizer, count: number): MolFile['bonds
};
}
interface FormalChargesRawData {
atomIdx: Array<number>;
charge: Array<number>;
}
export function handleFormalCharges(tokenizer: Tokenizer, lineStart: number, formalCharges: FormalChargesRawData) {
Tokenizer.trim(tokenizer, lineStart + 6, lineStart + 9);
const numOfCharges = parseInt(Tokenizer.getTokenString(tokenizer));
for (let i = 0; i < numOfCharges; ++i) {
/*
M CHG 3 1 -1 2 0 2 -1
| | | | |
| | | | |__charge2 (etc.)
| | | |
| | | |__atomIdx2
| | |
| | |__charge1
| |
| |__atomIdx1 (cursor at position 12)
|
|___numOfCharges
*/
const offset = 9 + (i * 8);
Tokenizer.trim(tokenizer, lineStart + offset, lineStart + offset + 4);
const _atomIdx = Tokenizer.getTokenString(tokenizer);
formalCharges.atomIdx.push(+_atomIdx);
Tokenizer.trim(tokenizer, lineStart + offset + 4, lineStart + offset + 8);
const _charge = Tokenizer.getTokenString(tokenizer);
formalCharges.charge.push(+_charge);
}
/* Once the line is read, move to the next one. */
Tokenizer.eatLine(tokenizer);
}
/** Call an appropriate handler based on the property type.
* (For now it only calls the formal charge handler, additional handlers can
* be added for other properties.)
*/
export function handlePropertiesBlock(tokenizer: Tokenizer): MolFile['formalCharges'] {
const _atomIdx: Array<number> = [];
const _charge: Array<number> = [];
const _formalCharges: FormalChargesRawData = { atomIdx: _atomIdx, charge: _charge };
while (tokenizer.position < tokenizer.length) {
const { position: s } = tokenizer;
Tokenizer.trim(tokenizer, s + 3, s + 6);
const propertyType = Tokenizer.getTokenString(tokenizer);
if (propertyType === 'END') break;
Tokenizer.eatLine(tokenizer);
switch (propertyType) {
case 'CHG':
handleFormalCharges(tokenizer, s, _formalCharges);
break;
default:
break;
}
}
const formalCharges: MolFile['formalCharges'] = {
atomIdx: Column.ofIntArray(_formalCharges.atomIdx),
charge: Column.ofIntArray(_formalCharges.charge)
};
return formalCharges;
}
function parseInternal(data: string): Result<MolFile> {
const tokenizer = Tokenizer(data);
......@@ -98,12 +226,15 @@ function parseInternal(data: string): Result<MolFile> {
const atoms = handleAtoms(tokenizer, atomCount);
const bonds = handleBonds(tokenizer, bondCount);
const formalCharges = handlePropertiesBlock(tokenizer);
const result: MolFile = {
title,
program,
comment,
atoms,
bonds
bonds,
formalCharges,
};
return Result.success(result);
}
......
/**
* Copyright (c) 2021-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Jason Pattle <jpattle@exscientia.co.uk>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { Column } from '../../../mol-data/db';
import { MolFile } from '../mol/parser';
import { Tokenizer, TokenBuilder, Tokens } from '../common/text/tokenizer';
......@@ -61,6 +68,9 @@ export function handleAtomsV3(
y: TokenColumn(y)(Column.Schema.float),
z: TokenColumn(z)(Column.Schema.float),
type_symbol: TokenColumn(type_symbol)(Column.Schema.str),
/* No support for formal charge parsing in V3000 molfiles at the moment,
so all charges default to 0.*/
formal_charge: Column.ofConst(0, atomCount, Column.Schema.int)
};
}
......
/**
* Copyright (c) 2020-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
* Copyright (c) 2020-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Sebastian Bittrich <sebastian.bittrich@rcsb.org>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author Jason Pattle <jpattle@exscientia.co.uk>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { Column } from '../../../mol-data/db';
import { MolFile, handleAtoms, handleBonds } from '../mol/parser';
import { MolFile, handleAtoms, handleBonds, handlePropertiesBlock } from '../mol/parser';
import { Task } from '../../../mol-task';
import { ReaderResult as Result } from '../result';
import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
......@@ -29,6 +31,7 @@ export interface SdfFile {
const delimiter = '$$$$';
function handleDataItems(tokenizer: Tokenizer): { dataHeader: Column<string>, data: Column<string> } {
const dataHeader = TokenBuilder.create(tokenizer.data, 32);
const data = TokenBuilder.create(tokenizer.data, 32);
......@@ -93,12 +96,20 @@ function handleMolFile(tokenizer: Tokenizer) {
return;
}
/* No support for formal charge parsing in V3000 molfiles at the moment,
so all charges default to 0.*/
const nullFormalCharges: MolFile['formalCharges'] = {
atomIdx: Column.ofConst(0, atomCount, Column.Schema.int),
charge: Column.ofConst(0, atomCount, Column.Schema.int)
};
const atoms = molIsV3 ? handleAtomsV3(tokenizer, atomCount) : handleAtoms(tokenizer, atomCount);
const bonds = molIsV3 ? handleBondsV3(tokenizer, bondCount) : handleBonds(tokenizer, bondCount);
const formalCharges = molIsV3 ? nullFormalCharges : handlePropertiesBlock(tokenizer);
const dataItems = handleDataItems(tokenizer);
return {
molFile: { title, program, comment, atoms, bonds },
molFile: { title, program, comment, atoms, bonds, formalCharges },
dataItems
};
}
......
/**
* Copyright (c) 2019-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
* Copyright (c) 2019-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author Panagiotis Tourlas <panagiot_tourlov@hotmail.com>
*/
import { Column, Table } from '../../mol-data/db';
import { MolFile } from '../../mol-io/reader/mol/parser';
import { MolFile, formalChargeMapper } from '../../mol-io/reader/mol/parser';
import { MoleculeType } from '../../mol-model/structure/model/types';
import { RuntimeContext, Task } from '../../mol-task';
import { createModels } from './basic/parser';
......@@ -18,13 +19,24 @@ import { IndexPairBonds } from './property/bonds/index-pair';
import { Trajectory } from '../../mol-model/structure';
export async function getMolModels(mol: MolFile, format: ModelFormat<any> | undefined, ctx: RuntimeContext) {
const { atoms, bonds } = mol;
const { atoms, bonds, formalCharges } = mol;
const MOL = Column.ofConst('MOL', mol.atoms.count, Column.Schema.str);
const A = Column.ofConst('A', mol.atoms.count, Column.Schema.str);
const type_symbol = Column.asArrayColumn(atoms.type_symbol);
const seq_id = Column.ofConst(1, atoms.count, Column.Schema.int);
const computedFormalCharges = new Int32Array(mol.atoms.count);
if (formalCharges.atomIdx.rowCount > 0) {
for (let i = 0; i < formalCharges.atomIdx.rowCount; i++) {
computedFormalCharges[formalCharges.atomIdx.value(i) - 1] = formalCharges.charge.value(i);
}
} else {
for (let i = 0; i < mol.atoms.count; i++) {
computedFormalCharges[i] = formalChargeMapper(atoms.formal_charge.value(i));
}
}
const atom_site = Table.ofPartialColumns(BasicSchema.atom_site, {
auth_asym_id: A,
auth_atom_id: type_symbol,
......@@ -45,6 +57,7 @@ export async function getMolModels(mol: MolFile, format: ModelFormat<any> | unde
type_symbol,
pdbx_PDB_model_num: Column.ofConst(1, atoms.count, Column.Schema.int),
pdbx_formal_charge: Column.ofIntArray(computedFormalCharges)
}, atoms.count);
const entityBuilder = new EntityBuilder();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment