Skip to content
Snippets Groups Projects
Commit b64faed5 authored by Alexander Rose's avatar Alexander Rose
Browse files

fixed mol2 parser

parent b2c1012c
No related branches found
No related tags found
No related merge requests found
...@@ -62,126 +62,126 @@ GASTEIGER ...@@ -62,126 +62,126 @@ GASTEIGER
25 13 23 1 25 13 23 1
26 13 24 1` 26 13 24 1`
// const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
// 5816 5816
// 26 26 0 0 0 26 26 0 0 0
// SMALL SMALL
// GASTEIGER GASTEIGER
// @<TRIPOS>ATOM @<TRIPOS>ATOM
// 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859
// 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033
// 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033
// 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162
// 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927
// 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143
// 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258
// 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109
// 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524
// 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586
// 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162
// 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582
// 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157
// 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656
// 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453
// 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453
// 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659
// 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622
// 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217
// 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655
// 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103
// 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388
// 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388
// 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388
// 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923
// 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923
// @<TRIPOS>BOND @<TRIPOS>BOND
// 1 1 5 1 1 1 5 1
// 2 1 21 1 2 1 21 1
// 3 2 10 1 3 2 10 1
// 4 2 25 1 4 2 25 1
// 5 3 12 1 5 3 12 1
// 6 3 26 1 6 3 26 1
// 7 4 7 1 7 4 7 1
// 8 4 13 1 8 4 13 1
// 9 4 19 1 9 4 19 1
// 10 5 6 1 10 5 6 1
// 11 5 7 1 11 5 7 1
// 12 5 14 1 12 5 14 1
// 13 6 8 ar 13 6 8 ar
// 14 6 9 ar 14 6 9 ar
// 15 7 15 1 15 7 15 1
// 16 7 16 1 16 7 16 1
// 17 8 10 ar 17 8 10 ar
// 18 8 17 1 18 8 17 1
// 19 9 11 ar 19 9 11 ar
// 20 9 18 1 20 9 18 1
// 21 10 12 ar 21 10 12 ar
// 22 11 12 ar 22 11 12 ar
// 23 11 20 1 23 11 20 1
// 24 13 22 1 24 13 22 1
// 25 13 23 1 25 13 23 1
// 26 13 24 1 26 13 24 1
// @<TRIPOS>MOLECULE @<TRIPOS>MOLECULE
// 5816 5816
// 26 26 0 0 0 26 26 0 0 0
// SMALL SMALL
// GASTEIGER GASTEIGER
// @<TRIPOS>ATOM @<TRIPOS>ATOM
// 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859
// 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033
// 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033
// 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162
// 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927
// 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143
// 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258
// 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109
// 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524
// 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586
// 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162
// 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582
// 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157
// 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656
// 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453
// 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453
// 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659
// 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622
// 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217
// 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655
// 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103
// 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388
// 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388
// 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388
// 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923
// 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923
// @<TRIPOS>BOND @<TRIPOS>BOND
// 1 1 5 1 1 1 5 1
// 2 1 21 1 2 1 21 1
// 3 2 10 1 3 2 10 1
// 4 2 25 1 4 2 25 1
// 5 3 12 1 5 3 12 1
// 6 3 26 1 6 3 26 1
// 7 4 7 1 7 4 7 1
// 8 4 13 1 8 4 13 1
// 9 4 19 1 9 4 19 1
// 10 5 6 1 10 5 6 1
// 11 5 7 1 11 5 7 1
// 12 5 14 1 12 5 14 1
// 13 6 8 ar 13 6 8 ar
// 14 6 9 ar 14 6 9 ar
// 15 7 15 1 15 7 15 1
// 16 7 16 1 16 7 16 1
// 17 8 10 ar 17 8 10 ar
// 18 8 17 1 18 8 17 1
// 19 9 11 ar 19 9 11 ar
// 20 9 18 1 20 9 18 1
// 21 10 12 ar 21 10 12 ar
// 22 11 12 ar 22 11 12 ar
// 23 11 20 1 23 11 20 1
// 24 13 22 1 24 13 22 1
// 25 13 23 1 25 13 23 1
// 26 13 24 1` 26 13 24 1`
const Mol2StringMinimal = `@<TRIPOS>MOLECULE const Mol2StringMinimal = `@<TRIPOS>MOLECULE
5816 5816
...@@ -251,6 +251,10 @@ describe('mol2 reader', () => { ...@@ -251,6 +251,10 @@ describe('mol2 reader', () => {
throw new Error(parsed.message); throw new Error(parsed.message);
} }
const mol2File = parsed.result; const mol2File = parsed.result;
// number of structures
expect(mol2File.structures.length).toBe(1);
const data = mol2File.structures[0]; const data = mol2File.structures[0];
const { molecule, atoms, bonds } = data; const { molecule, atoms, bonds } = data;
...@@ -292,52 +296,56 @@ describe('mol2 reader', () => { ...@@ -292,52 +296,56 @@ describe('mol2 reader', () => {
expect(bonds.status_bits.value(0)).toBe(''); expect(bonds.status_bits.value(0)).toBe('');
}); });
// it('multiblocks', async () => { it('multiblocks', async () => {
// const parsed = await Mol2(Mol2StringMultiBlocks)(); const parsed = await Mol2(Mol2StringMultiBlocks)();
// if (parsed.isError) { if (parsed.isError) {
// throw new Error(parsed.message); throw new Error(parsed.message);
// } }
// const mol2File = parsed.result; const mol2File = parsed.result;
// const data = mol2File.structures[1];
// const { molecule, atoms, bonds } = data; // number of structures
expect(mol2File.structures.length).toBe(2);
// // molecule fields
// expect(molecule.mol_name).toBe('5816') const data = mol2File.structures[1];
// expect(molecule.num_atoms).toBe(26) const { molecule, atoms, bonds } = data;
// expect(molecule.num_bonds).toBe(26);
// expect(molecule.num_subst).toBe(0); // molecule fields
// expect(molecule.num_feat).toBe(0); expect(molecule.mol_name).toBe('5816')
// expect(molecule.num_sets).toBe(0); expect(molecule.num_atoms).toBe(26)
// expect(molecule.mol_type).toBe("SMALL") expect(molecule.num_bonds).toBe(26);
// expect(molecule.charge_type).toBe("GASTEIGER"); expect(molecule.num_subst).toBe(0);
// expect(molecule.status_bits).toBe(""); expect(molecule.num_feat).toBe(0);
// expect(molecule.mol_comment).toBe(""); expect(molecule.num_sets).toBe(0);
expect(molecule.mol_type).toBe("SMALL")
// // required atom fields expect(molecule.charge_type).toBe("GASTEIGER");
// expect(atoms.count).toBe(26); expect(molecule.status_bits).toBe("");
// expect(atoms.atom_id.value(0)).toBe(1); expect(molecule.mol_comment).toBe("");
// expect(atoms.atom_name.value(0)).toBe('O');
// expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); // required atom fields
// expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); expect(atoms.count).toBe(26);
// expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); expect(atoms.atom_id.value(0)).toBe(1);
// expect(atoms.atom_type.value(0)).toBe("O.3"); expect(atoms.atom_name.value(0)).toBe('O');
expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
// // optional atom fields expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
// expect(atoms.subst_id.value(0)).toBe(1); expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
// expect(atoms.subst_name.value(0)).toBe('LIG1'); expect(atoms.atom_type.value(0)).toBe("O.3");
// expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
// expect(atoms.status_bit.value(0)).toBe(''); // optional atom fields
expect(atoms.subst_id.value(0)).toBe(1);
// // required bond fields expect(atoms.subst_name.value(0)).toBe('LIG1');
// expect(bonds.count).toBe(26); expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
// expect(bonds.bond_id.value(0)).toBe(1); expect(atoms.status_bit.value(0)).toBe('');
// expect(bonds.origin_atom_id.value(0)).toBe(1);
// expect(bonds.target_atom_id.value(0)).toBe(5); // required bond fields
// expect(bonds.bond_type.value(0)).toBe('1'); expect(bonds.count).toBe(26);
expect(bonds.bond_id.value(0)).toBe(1);
// // optional bond fields expect(bonds.origin_atom_id.value(0)).toBe(1);
// expect(bonds.status_bits.value(0)).toBe(''); expect(bonds.target_atom_id.value(0)).toBe(5);
// }); expect(bonds.bond_type.value(0)).toBe('1');
// optional bond fields
expect(bonds.status_bits.value(0)).toBe('');
});
it('minimal', async () => { it('minimal', async () => {
const parsed = await Mol2(Mol2StringMinimal)(); const parsed = await Mol2(Mol2StringMinimal)();
...@@ -345,6 +353,10 @@ describe('mol2 reader', () => { ...@@ -345,6 +353,10 @@ describe('mol2 reader', () => {
throw new Error(parsed.message); throw new Error(parsed.message);
} }
const mol2File = parsed.result; const mol2File = parsed.result;
// number of structures
expect(mol2File.structures.length).toBe(1);
const data = mol2File.structures[0]; const data = mol2File.structures[0];
const { molecule, atoms, bonds } = data; const { molecule, atoms, bonds } = data;
......
...@@ -53,17 +53,21 @@ const reWhitespace = /\s+/g; ...@@ -53,17 +53,21 @@ const reWhitespace = /\s+/g;
function handleMolecule(state: State) { function handleMolecule(state: State) {
const { tokenizer, molecule } = state; const { tokenizer, molecule } = state;
markLine(tokenizer);
while(getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE'){
markLine(tokenizer);
}
markLine(tokenizer); markLine(tokenizer);
molecule.mol_name = getTokenString(tokenizer); molecule.mol_name = getTokenString(tokenizer);
markLine(tokenizer); markLine(tokenizer);
const values = getTokenString(tokenizer).trim().split(reWhitespace); const values = getTokenString(tokenizer).trim().split(reWhitespace);
molecule.num_atoms = parseInt(values[0]) ? parseInt(values[0]) : 0; molecule.num_atoms = parseInt(values[0]);
molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0; molecule.num_bonds = parseInt(values[1]);
molecule.num_subst = parseInt(values[2]) ? parseInt(values[2]) : 0; molecule.num_subst = parseInt(values[2]);
molecule.num_feat = parseInt(values[3]) ? parseInt(values[3]) : 0; molecule.num_feat = parseInt(values[3]);
molecule.num_sets = parseInt(values[4]) ? parseInt(values[4]) : 0; molecule.num_sets = parseInt(values[4]);
markLine(tokenizer); markLine(tokenizer);
molecule.mol_type = getTokenString(tokenizer); molecule.mol_type = getTokenString(tokenizer);
...@@ -252,20 +256,20 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { ...@@ -252,20 +256,20 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
hasStatus_bit = true; hasStatus_bit = true;
} }
// required columns
const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
// optional
const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
const bond_idTokenColumn = TokenColumn(bond_idTokens); const bond_idTokenColumn = TokenColumn(bond_idTokens);
const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens); const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens);
const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens); const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens);
const bondTypeTokenColumn = TokenColumn(bondTypeTokens); const bondTypeTokenColumn = TokenColumn(bondTypeTokens);
// optional
const status_bitTokenColumn = TokenColumn(status_bitTokens);
// optional columns
const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2);
const status_bitTokenColumn = TokenColumn(status_bitTokens);
const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str); const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str);
let numberOfColumn = 4; let numberOfColumn = 4;
...@@ -312,6 +316,8 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { ...@@ -312,6 +316,8 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int),
target_atom_id: target_bond_idTokenColumn(Column.Schema.int), target_atom_id: target_bond_idTokenColumn(Column.Schema.int),
bond_type: bondTypeTokenColumn(Column.Schema.str), bond_type: bondTypeTokenColumn(Column.Schema.str),
// optional columns
status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr,
}; };
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment