diff --git a/src/mol-io/reader/_spec/mol2.spec.ts b/src/mol-io/reader/_spec/mol2.spec.ts index bb792004a96763a6ab87674d7aa823050ba8be55..c329f61a4e5c66a28588d8424eb4327dbafb25ff 100644 --- a/src/mol-io/reader/_spec/mol2.spec.ts +++ b/src/mol-io/reader/_spec/mol2.spec.ts @@ -62,126 +62,126 @@ GASTEIGER 25 13 23 1 26 13 24 1` -// const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE -// 5816 -// 26 26 0 0 0 -// SMALL -// GASTEIGER - -// @<TRIPOS>ATOM -// 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 -// 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 -// 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 -// 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 -// 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 -// 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 -// 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 -// 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 -// 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 -// 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 -// 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 -// 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 -// 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 -// 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 -// 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 -// 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 -// 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 -// 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 -// 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 -// 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 -// 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 -// 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 -// 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 -// 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 -// 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 -// 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 -// @<TRIPOS>BOND -// 1 1 5 1 -// 2 1 21 1 -// 3 2 10 1 -// 4 2 25 1 -// 5 3 12 1 -// 6 3 26 1 -// 7 4 7 1 -// 8 4 13 1 -// 9 4 19 1 -// 10 5 6 1 -// 11 5 7 1 -// 12 5 14 1 -// 13 6 8 ar -// 14 6 9 ar -// 15 7 15 1 -// 16 7 16 1 -// 17 8 10 ar -// 18 8 17 1 -// 19 9 11 ar -// 20 9 18 1 -// 21 10 12 ar -// 22 11 12 ar -// 23 11 20 1 -// 24 13 22 1 -// 25 13 23 1 -// 26 13 24 1 -// @<TRIPOS>MOLECULE -// 5816 -// 26 26 0 0 0 -// SMALL -// GASTEIGER - -// @<TRIPOS>ATOM -// 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 -// 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 -// 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 -// 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 -// 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 -// 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 -// 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 -// 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 -// 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 -// 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 -// 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 -// 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 -// 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 -// 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 -// 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 -// 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 -// 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 -// 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 -// 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 -// 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 -// 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 -// 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 -// 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 -// 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 -// 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 -// 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 -// @<TRIPOS>BOND -// 1 1 5 1 -// 2 1 21 1 -// 3 2 10 1 -// 4 2 25 1 -// 5 3 12 1 -// 6 3 26 1 -// 7 4 7 1 -// 8 4 13 1 -// 9 4 19 1 -// 10 5 6 1 -// 11 5 7 1 -// 12 5 14 1 -// 13 6 8 ar -// 14 6 9 ar -// 15 7 15 1 -// 16 7 16 1 -// 17 8 10 ar -// 18 8 17 1 -// 19 9 11 ar -// 20 9 18 1 -// 21 10 12 ar -// 22 11 12 ar -// 23 11 20 1 -// 24 13 22 1 -// 25 13 23 1 -// 26 13 24 1` +const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1 +@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` const Mol2StringMinimal = `@<TRIPOS>MOLECULE 5816 @@ -251,6 +251,10 @@ describe('mol2 reader', () => { throw new Error(parsed.message); } const mol2File = parsed.result; + + // number of structures + expect(mol2File.structures.length).toBe(1); + const data = mol2File.structures[0]; const { molecule, atoms, bonds } = data; @@ -292,52 +296,56 @@ describe('mol2 reader', () => { expect(bonds.status_bits.value(0)).toBe(''); }); - // it('multiblocks', async () => { - // const parsed = await Mol2(Mol2StringMultiBlocks)(); - // if (parsed.isError) { - // throw new Error(parsed.message); - // } - // const mol2File = parsed.result; - // const data = mol2File.structures[1]; - // const { molecule, atoms, bonds } = data; - - // // molecule fields - // expect(molecule.mol_name).toBe('5816') - // expect(molecule.num_atoms).toBe(26) - // expect(molecule.num_bonds).toBe(26); - // expect(molecule.num_subst).toBe(0); - // expect(molecule.num_feat).toBe(0); - // expect(molecule.num_sets).toBe(0); - // expect(molecule.mol_type).toBe("SMALL") - // expect(molecule.charge_type).toBe("GASTEIGER"); - // expect(molecule.status_bits).toBe(""); - // expect(molecule.mol_comment).toBe(""); - - // // required atom fields - // expect(atoms.count).toBe(26); - // expect(atoms.atom_id.value(0)).toBe(1); - // expect(atoms.atom_name.value(0)).toBe('O'); - // expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); - // expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); - // expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); - // expect(atoms.atom_type.value(0)).toBe("O.3"); - - // // optional atom fields - // expect(atoms.subst_id.value(0)).toBe(1); - // expect(atoms.subst_name.value(0)).toBe('LIG1'); - // expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); - // expect(atoms.status_bit.value(0)).toBe(''); - - // // required bond fields - // expect(bonds.count).toBe(26); - // expect(bonds.bond_id.value(0)).toBe(1); - // expect(bonds.origin_atom_id.value(0)).toBe(1); - // expect(bonds.target_atom_id.value(0)).toBe(5); - // expect(bonds.bond_type.value(0)).toBe('1'); - - // // optional bond fields - // expect(bonds.status_bits.value(0)).toBe(''); - // }); + it('multiblocks', async () => { + const parsed = await Mol2(Mol2StringMultiBlocks)(); + if (parsed.isError) { + throw new Error(parsed.message); + } + const mol2File = parsed.result; + + // number of structures + expect(mol2File.structures.length).toBe(2); + + const data = mol2File.structures[1]; + const { molecule, atoms, bonds } = data; + + // molecule fields + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + + // required atom fields + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + + // optional atom fields + expect(atoms.subst_id.value(0)).toBe(1); + expect(atoms.subst_name.value(0)).toBe('LIG1'); + expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); + expect(atoms.status_bit.value(0)).toBe(''); + + // required bond fields + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + + // optional bond fields + expect(bonds.status_bits.value(0)).toBe(''); + }); it('minimal', async () => { const parsed = await Mol2(Mol2StringMinimal)(); @@ -345,6 +353,10 @@ describe('mol2 reader', () => { throw new Error(parsed.message); } const mol2File = parsed.result; + + // number of structures + expect(mol2File.structures.length).toBe(1); + const data = mol2File.structures[0]; const { molecule, atoms, bonds } = data; diff --git a/src/mol-io/reader/mol2/parser.ts b/src/mol-io/reader/mol2/parser.ts index 5613570b979740aa13252591a7e60b3af4117714..f807f4ff7b78d1c3f7a051fbba88a0a69c6216de 100644 --- a/src/mol-io/reader/mol2/parser.ts +++ b/src/mol-io/reader/mol2/parser.ts @@ -53,17 +53,21 @@ const reWhitespace = /\s+/g; function handleMolecule(state: State) { const { tokenizer, molecule } = state; - markLine(tokenizer); + + while(getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE'){ + markLine(tokenizer); + } + markLine(tokenizer); molecule.mol_name = getTokenString(tokenizer); markLine(tokenizer); const values = getTokenString(tokenizer).trim().split(reWhitespace); - molecule.num_atoms = parseInt(values[0]) ? parseInt(values[0]) : 0; - molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0; - molecule.num_subst = parseInt(values[2]) ? parseInt(values[2]) : 0; - molecule.num_feat = parseInt(values[3]) ? parseInt(values[3]) : 0; - molecule.num_sets = parseInt(values[4]) ? parseInt(values[4]) : 0; + molecule.num_atoms = parseInt(values[0]); + molecule.num_bonds = parseInt(values[1]); + molecule.num_subst = parseInt(values[2]); + molecule.num_feat = parseInt(values[3]); + molecule.num_sets = parseInt(values[4]); markLine(tokenizer); molecule.mol_type = getTokenString(tokenizer); @@ -252,20 +256,20 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { hasStatus_bit = true; } + // required columns const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); - // optional - const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); const bond_idTokenColumn = TokenColumn(bond_idTokens); const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens); const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens); const bondTypeTokenColumn = TokenColumn(bondTypeTokens); - // optional - const status_bitTokenColumn = TokenColumn(status_bitTokens); + // optional columns + const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const status_bitTokenColumn = TokenColumn(status_bitTokens); const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str); let numberOfColumn = 4; @@ -312,6 +316,8 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), target_atom_id: target_bond_idTokenColumn(Column.Schema.int), bond_type: bondTypeTokenColumn(Column.Schema.str), + + // optional columns status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, };