From 2735ab4555573ed2e40487e15be9295314a5baa1 Mon Sep 17 00:00:00 2001 From: Zepei Xu <xuzepei19950617@gmail.com> Date: Sat, 21 Oct 2017 23:08:03 -0700 Subject: [PATCH] complete mol2 parser, and its unit tests --- src/reader/mol2/parser.ts | 167 ++++++++--------- src/reader/mol2/schema.d.ts | 1 - src/reader/spec/gro.spec.ts | 1 - src/reader/spec/mol2.spec.ts | 340 +++++++++++++++++++++++++++++++++-- 4 files changed, 412 insertions(+), 97 deletions(-) diff --git a/src/reader/mol2/parser.ts b/src/reader/mol2/parser.ts index 035f1f5da..87a22cbf7 100644 --- a/src/reader/mol2/parser.ts +++ b/src/reader/mol2/parser.ts @@ -1,27 +1,20 @@ +// NOTES +//When want to created undefined string column, must use +// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str) +// but not +// const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); +// because latter actuall return a column of zeros import Tokenizer from '../common/text/tokenizer' import FixedColumn from '../common/text/column/fixed' import { ColumnType, UndefinedColumn } from '../common/column' import * as Schema from './schema' import Result from '../result' -import Computation from '../../utils/computation' ////////// not using this - - -/*////////////////////////// NOTES ////////////////////////////// - Not using async, wait, promises, computation, chunker. - Formatting is not clear, different exmaples mol2 files has different field sizes, so making -columns using col() is not possible. Need to implement checks for optional columns, but not clear -about the names of each entry in a row in the example files. - Don't know when to use str and when to use pooledStr - Unlike gro file, mol2 file don't save 'hasSomthing' properties in the header-like Molecule -///////////////////////////////////////////////////////////////*/ - - - +import Computation from '../../utils/computation' interface State { tokenizer: Tokenizer, molecule: Schema.Molecule, - ///////////// not including Computation.chunker ///////////// + chunker: Computation.Chunker } @@ -44,11 +37,11 @@ function createEmptyMolecule(): Schema.Molecule { -function State(tokenizer: Tokenizer): State { //////////// not having ctx: Computation.Context as a parameter ////////////// +function State(tokenizer: Tokenizer, ctx: Computation.Context): State { return { tokenizer, molecule: createEmptyMolecule(), - //////////// not having chunker: Computation.chunker(ctx, 100000) /////////// + chunker: Computation.chunker(ctx, 100000) }; } @@ -58,18 +51,18 @@ function State(tokenizer: Tokenizer): State { //////////// not having ctx: Compu function handleMolecule(state: State) { const { tokenizer, molecule } = state; - + Tokenizer.markLine(tokenizer); // skip the line '@<TRIPOS>MOLECULE' Tokenizer.markLine(tokenizer); let name = Tokenizer.getTokenString(tokenizer); molecule.mol_name = name; Tokenizer.markLine(tokenizer); const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g); - molecule.num_atoms = parseInt(values[0]); - molecule.num_bonds = parseInt(values[1]); - molecule.num_subst = parseInt(values[2]); - molecule.num_feat = parseInt(values[3]); - molecule.num_sets = parseInt(values[4]); + molecule.num_atoms = parseInt(values[0]) ? parseInt(values[1]) : 0; + molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0; + molecule.num_subst = parseInt(values[2]) ? parseInt(values[1]) : 0; + molecule.num_feat = parseInt(values[3]) ? parseInt(values[1]) : 0; + molecule.num_sets = parseInt(values[4]) ? parseInt(values[1]) : 0; Tokenizer.markLine(tokenizer); molecule.mol_type = Tokenizer.getTokenString(tokenizer); @@ -77,56 +70,71 @@ function handleMolecule(state: State) { Tokenizer.markLine(tokenizer); molecule.charge_type = Tokenizer.getTokenString(tokenizer); - // skip the empty line - Tokenizer.markLine(tokenizer) + Tokenizer.markLine(tokenizer); + if(Tokenizer.getTokenString(tokenizer) == ''){return} + else{molecule.status_bits = Tokenizer.getTokenString(tokenizer)} + + Tokenizer.markLine(tokenizer); + if(Tokenizer.getTokenString(tokenizer) == ''){return} + else{molecule.mol_comment = Tokenizer.getTokenString(tokenizer)} } -function handleAtoms(state: State): Schema.Atoms { +async function handleAtoms(state: State): Promise<Schema.Atoms> { const { tokenizer, molecule } = state; - - ////////// not using readLinesAsync ///////// - const lines = Tokenizer.readLines(tokenizer, molecule.num_atoms); - - // default all false - const hasSubst_id = false; - const hasSubst_name = false; - const hasCharge = false; - const hasStatus_bit = false; - - /* - const pO = 20; - const pW = state.header.precision.position + 5; - const vO = pO + 3 * pW; - const vW = state.header.precision.velocity + 4; - */ + let hasSubst_id = false; + let hasSubst_name = false; + let hasCharge = false; + let hasStatus_bit = false; + + // skip empty lines and '@<TRIPOS>ATOM' + while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){ + Tokenizer.markLine(tokenizer); + } + const lines = await Tokenizer.readLinesAsync(tokenizer, molecule.num_atoms, state.chunker); + const firstLine = tokenizer.data.substring(lines.indices[0], lines.indices[1]); + const firstLineArray = firstLine.trim().split(/\s+/g) + const length = firstLineArray.length; + if(length == 9){ + hasSubst_id = true; + hasSubst_name = true; + hasCharge = true; + }else if(length == 10){ + hasSubst_id = true; + hasSubst_name = true; + hasCharge = true; + hasStatus_bit = true; + } const col = FixedColumn(lines); const undefInt = UndefinedColumn(molecule.num_atoms, ColumnType.int); const undefFloat = UndefinedColumn(molecule.num_atoms, ColumnType.float); + //const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); + // created below column to pass unit tests const undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str); - const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); - - /////// wanted to have const undef = UndefinedColumn(molecule.num_atoms) like col, but failed - /////// some unclear about the formatting, like the field sizes const ret = { count: molecule.num_atoms, atom_id: col(0, 7, ColumnType.int), - atom_name: col(7, 9, ColumnType.str), ////// don't know use str or pooledStr + atom_name: col(7, 9, ColumnType.pooledStr), x: col(16, 10, ColumnType.float), y: col(26, 10, ColumnType.float), z: col(36, 10, ColumnType.float), - atom_type: col(46, 0, ColumnType.str), ////// don't know use str or pooledStr //////// don't know which is the atom_type + atom_type: col(46, 6, ColumnType.pooledStr), // optional properties - subst_id: hasSubst_id ? col(0, 0, ColumnType.int) : undefInt, - subst_name: hasSubst_name ? col(0, 0, ColumnType.str) : undefStr,///////// don't know use str or pooledStr - charge: hasCharge ? col(0, 0, ColumnType.float) : undefFloat, //////// don't know use int or float - status_bit: hasStatus_bit ? col(0, 0, ColumnType.pooledStr) : undefPooledStr, ////////// don't know use str or pooledStr + subst_id: hasSubst_id ? col(52, 6, ColumnType.int) : undefInt, + subst_name: hasSubst_name ? col(58, 8, ColumnType.pooledStr) : undefStr, + charge: hasCharge ? col(66, 10, ColumnType.float) : undefFloat, + // undefPooledStr cannot pass unit tests because it create a column of zeros but not empty strings + //status_bit: hasStatus_bit ? col(76, 100, ColumnType.pooledStr) : undefPooledStr, + + // use undefStr instead to pass unit tests + status_bit: hasStatus_bit ? col(76, 100, ColumnType.pooledStr) : undefStr, + }; return ret; @@ -135,39 +143,35 @@ function handleAtoms(state: State): Schema.Atoms { -function handleBonds(state: State): Schema.Bonds { +async function handleBonds(state: State): Promise<Schema.Bonds> { const { tokenizer, molecule } = state; + let hasStatus_bit = false; - ////////// not using readLinesAsync ///////// - const lines = Tokenizer.readLines(tokenizer, molecule.num_bonds); - - // default all false - const hasStatus_bit = false; - - /* - const pO = 20; - const pW = state.header.precision.position + 5; - const vO = pO + 3 * pW; - const vW = state.header.precision.velocity + 4; - */ + while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){ + Tokenizer.markLine(tokenizer); + } + const lines = await Tokenizer.readLinesAsync(tokenizer, molecule.num_bonds, state.chunker); + const firstLine = tokenizer.data.substring(lines.indices[0], lines.indices[1]); + const length = firstLine.split(' ').length; + if(length == 4){ + hasStatus_bit = true; + } const col = FixedColumn(lines); - const undefInt = UndefinedColumn(molecule.num_bonds, ColumnType.int); - const undefFloat = UndefinedColumn(molecule.num_bonds, ColumnType.float); - const undefStr = UndefinedColumn(molecule.num_bonds, ColumnType.str); - const undefPooledStr = UndefinedColumn(molecule.num_bonds, ColumnType.pooledStr); - - /////// wanted to have const undef = UndefinedColumn(molecule.num_atoms) like col, but failed + //const undefPooledStr = UndefinedColumn(molecule.num_bonds, ColumnType.pooledStr); + // created below column to pass unit tests + const undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str); - /////// some unclear about the formatting, like the field sizes const ret = { count: molecule.num_bonds, bond_id: col(0, 6, ColumnType.int), origin_atom_id: col(6, 6, ColumnType.int), target_atom_id: col(12, 6, ColumnType.int), - bond_type: col(18, 5, ColumnType.str), ///////// don't know use str or pooledStr + bond_type: col(18, 5, ColumnType.pooledStr), // optional properties - status_bits: hasStatus_bit ? col(0, 0, ColumnType.str) : undefStr, ///////// don't know use str or pooledStr + // undefPooledStr cannot pass unit tests because it create a column of zeros but not empty strings + //status_bits: hasStatus_bit ? col(23, 50, ColumnType.pooledStr) : undefPooledStr, + status_bits: hasStatus_bit ? col(23, 50, ColumnType.pooledStr) : undefStr, }; return ret; @@ -176,16 +180,16 @@ function handleBonds(state: State): Schema.Bonds { -//////// not using async here -function parseInternal(data: string): Result<Schema.File> { /////// not having ctx as a parameter, and not returning Promise +async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> { const tokenizer = Tokenizer(data); + ctx.update({ message: 'Parsing...', current: 0, max: data.length }); const structures: Schema.Structure[] = []; while (tokenizer.position < data.length) { - const state = State(tokenizer);//////////different + const state = State(tokenizer, ctx); handleMolecule(state); - const atoms = handleAtoms(state); - const bonds = handleBonds(state); + const atoms = await handleAtoms(state); + const bonds = await handleBonds(state); structures.push({ molecule: state.molecule, atoms, bonds }); } @@ -197,9 +201,10 @@ function parseInternal(data: string): Result<Schema.File> { /////// not having c -///////// diffrent than gro parser export function parse(data: string) { - return parseInternal(data); + return Computation.create<Result<Schema.File>>(async ctx => { + return await parseInternal(data, ctx); + }); } export default parse; \ No newline at end of file diff --git a/src/reader/mol2/schema.d.ts b/src/reader/mol2/schema.d.ts index a34dc9f9e..7f1e73fb2 100644 --- a/src/reader/mol2/schema.d.ts +++ b/src/reader/mol2/schema.d.ts @@ -25,7 +25,6 @@ export interface Molecule { charge_type: string status_bits: string mol_comment: string - ///////////// precisions are not saved for later use, and there is not 'hasSomthing' properties } export interface Atoms { diff --git a/src/reader/spec/gro.spec.ts b/src/reader/spec/gro.spec.ts index 8f78668d5..9f74a7960 100644 --- a/src/reader/spec/gro.spec.ts +++ b/src/reader/spec/gro.spec.ts @@ -32,7 +32,6 @@ describe('gro reader', () => { console.log(parsed) return; } - const groFile = parsed.result; const data = groFile.structures[0]; diff --git a/src/reader/spec/mol2.spec.ts b/src/reader/spec/mol2.spec.ts index 675f41a69..2a04c751f 100644 --- a/src/reader/spec/mol2.spec.ts +++ b/src/reader/spec/mol2.spec.ts @@ -1,5 +1,4 @@ - import Mol2 from '../mol2/parser' const Mol2String = `@<TRIPOS>MOLECULE @@ -11,51 +10,364 @@ GASTEIGER @<TRIPOS>ATOM 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 @<TRIPOS>BOND 1 1 5 1 - 2 1 21 1` + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` +const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1 +@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` + +const Mol2StringMinimal = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 + 2 O -2.2941 1.0781 -1.7979 O.3 + 3 O -3.6584 0.5842 0.5722 O.3 + 4 N 2.6359 1.0243 0.7030 N.3 + 5 C 1.6787 -1.1447 -0.0373 C.3 + 6 C 0.2684 -0.6866 0.1208 C.ar + 7 C 2.6376 0.0193 -0.3576 C.3 + 8 C -0.3658 -0.0099 -0.9212 C.ar + 9 C -0.4164 -0.9343 1.3105 C.ar + 10 C -1.6849 0.4191 -0.7732 C.ar + 11 C -1.7353 -0.5053 1.4585 C.ar + 12 C -2.3696 0.1713 0.4166 C.ar + 13 C 3.5645 2.1013 0.3950 C.3 + 14 H 2.0210 -1.6511 0.8741 H + 15 H 2.3808 0.4742 -1.3225 H + 16 H 3.6478 -0.3931 -0.4831 H + 17 H 0.1501 0.1801 -1.8589 H + 18 H 0.0640 -1.4598 2.1315 H + 19 H 2.9013 0.5888 1.5858 H + 20 H -2.2571 -0.7050 2.3907 H + 21 H 2.6646 -2.4067 -1.1652 H + 22 H 3.2862 2.6124 -0.5325 H + 23 H 4.5925 1.7346 0.3078 H + 24 H 3.5401 2.8441 1.1985 H + 25 H -3.2008 1.2997 -1.5231 H + 26 H -3.9690 0.3259 1.4570 H +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` -////////// nothing works until add async and await and promise to parser.mol2 file. describe('mol2 reader', () => { it('basic', async () => { - const parsed = await Mol2(Mol2String)(); + const parsed = await Mol2(Mol2String)(); + if (parsed.isError) { + console.log(parsed) + return; + } + const mol2File = parsed.result; + const data = mol2File.structures[0]; + const { molecule, atoms, bonds } = data; + + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + ///// optionals + expect(atoms.subst_id.value(0)).toBe(1); + expect(atoms.subst_name.value(0)).toBe('LIG1'); + expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); + expect(atoms.status_bit.value(0)).toBe(''); + + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + /////// optional + expect(bonds.status_bits.value(0)).toBe(''); + + }); + + it('multiblocks', async () => { + const parsed = await Mol2(Mol2StringMultiBlocks)(); if (parsed.isError) { console.log(parsed) return; } + const mol2File = parsed.result; + const data = mol2File.structures[1]; + const { molecule, atoms, bonds } = data; + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + ///// optionals + expect(atoms.subst_id.value(0)).toBe(1); + expect(atoms.subst_name.value(0)).toBe('LIG1'); + expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); + expect(atoms.status_bit.value(0)).toBe(''); + + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + /////// optional + expect(bonds.status_bits.value(0)).toBe(''); + + }); + + it('minimal', async () => { + const parsed = await Mol2(Mol2StringMinimal)(); + if (parsed.isError) { + console.log(parsed) + return; + } const mol2File = parsed.result; const data = mol2File.structures[0]; - const { molecule, atoms, bonds } = data; - expect(molecule.mol_name).toBe(5816) + expect(molecule.mol_name).toBe('5816') expect(molecule.num_atoms).toBe(26) expect(molecule.num_bonds).toBe(26); expect(molecule.num_subst).toBe(0); expect(molecule.num_feat).toBe(0); expect(molecule.num_sets).toBe(0); - expect(molecule.mol_type).toBe("") - expect(molecule.charge_type).toBe(""); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); expect(molecule.status_bits).toBe(""); expect(molecule.mol_comment).toBe(""); - expect(atoms.count).toBe(2); + expect(atoms.count).toBe(26); expect(atoms.atom_id.value(0)).toBe(1); - expect(atoms.atom_name.value(0)).toBe('o'); + expect(atoms.atom_name.value(0)).toBe('O'); expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); - expect(atoms.atom_type.value(0)).toBe(''); + expect(atoms.atom_type.value(0)).toBe("O.3"); ///// optionals expect(atoms.subst_id.value(0)).toBe(0); expect(atoms.subst_name.value(0)).toBe(''); - expect(atoms.charge.value(0)).toBeCloseTo(0.000); - expect(atoms.status_bits.value(0)).toBe(''); + expect(atoms.charge.value(0)).toBeCloseTo(0); + expect(atoms.status_bit.value(0)).toBe(''); - expect(bonds.count).toBe(2); + expect(bonds.count).toBe(26); expect(bonds.bond_id.value(0)).toBe(1); expect(bonds.origin_atom_id.value(0)).toBe(1); expect(bonds.target_atom_id.value(0)).toBe(5); -- GitLab