Skip to content
Snippets Groups Projects
Commit 923404c2 authored by Alexander Rose's avatar Alexander Rose
Browse files

wip, fixing mol2 parser (multi model still broken)

parent 86b1643a
No related branches found
No related tags found
No related merge requests found
import Mol2 from '../mol2/parser' import Mol2 from '../mol2/parser'
// const Mol2String = `@<TRIPOS>MOLECULE const Mol2String = `@<TRIPOS>MOLECULE
// 5816 5816
// 26 26 0 0 0 26 26 0 0 0
// SMALL SMALL
// GASTEIGER GASTEIGER
// @<TRIPOS>ATOM @<TRIPOS>ATOM
// 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859
// 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033
// 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033
// 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162
// 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927
// 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143
// 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258
// 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109
// 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524
// 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586
// 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162
// 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582
// 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157
// 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656
// 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453
// 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453
// 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659
// 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622
// 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217
// 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655
// 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103
// 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388
// 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388
// 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388
// 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923
// 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923
// @<TRIPOS>BOND @<TRIPOS>BOND
// 1 1 5 1 1 1 5 1
// 2 1 21 1 2 1 21 1
// 3 2 10 1 3 2 10 1
// 4 2 25 1 4 2 25 1
// 5 3 12 1 5 3 12 1
// 6 3 26 1 6 3 26 1
// 7 4 7 1 7 4 7 1
// 8 4 13 1 8 4 13 1
// 9 4 19 1 9 4 19 1
// 10 5 6 1 10 5 6 1
// 11 5 7 1 11 5 7 1
// 12 5 14 1 12 5 14 1
// 13 6 8 ar 13 6 8 ar
// 14 6 9 ar 14 6 9 ar
// 15 7 15 1 15 7 15 1
// 16 7 16 1 16 7 16 1
// 17 8 10 ar 17 8 10 ar
// 18 8 17 1 18 8 17 1
// 19 9 11 ar 19 9 11 ar
// 20 9 18 1 20 9 18 1
// 21 10 12 ar 21 10 12 ar
// 22 11 12 ar 22 11 12 ar
// 23 11 20 1 23 11 20 1
// 24 13 22 1 24 13 22 1
// 25 13 23 1 25 13 23 1
// 26 13 24 1` 26 13 24 1`
// const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE // const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE
// 5816 // 5816
...@@ -245,60 +245,63 @@ GASTEIGER ...@@ -245,60 +245,63 @@ GASTEIGER
26 13 24 1` 26 13 24 1`
describe('mol2 reader', () => { describe('mol2 reader', () => {
// it('basic', async () => { it('basic', async () => {
// const parsed = await Mol2(Mol2String)(); const parsed = await Mol2(Mol2String)();
// if (parsed.isError) { if (parsed.isError) {
// console.log(parsed) throw new Error(parsed.message);
// return; }
// } const mol2File = parsed.result;
// const mol2File = parsed.result; const data = mol2File.structures[0];
// const data = mol2File.structures[0]; const { molecule, atoms, bonds } = data;
// const { molecule, atoms, bonds } = data;
// expect(molecule.mol_name).toBe('5816') // molecule fields
// expect(molecule.num_atoms).toBe(26) expect(molecule.mol_name).toBe('5816')
// expect(molecule.num_bonds).toBe(26); expect(molecule.num_atoms).toBe(26)
// expect(molecule.num_subst).toBe(0); expect(molecule.num_bonds).toBe(26);
// expect(molecule.num_feat).toBe(0); expect(molecule.num_subst).toBe(0);
// expect(molecule.num_sets).toBe(0); expect(molecule.num_feat).toBe(0);
// expect(molecule.mol_type).toBe("SMALL") expect(molecule.num_sets).toBe(0);
// expect(molecule.charge_type).toBe("GASTEIGER"); expect(molecule.mol_type).toBe("SMALL")
// expect(molecule.status_bits).toBe(""); expect(molecule.charge_type).toBe("GASTEIGER");
// expect(molecule.mol_comment).toBe(""); expect(molecule.status_bits).toBe("");
expect(molecule.mol_comment).toBe("");
// expect(atoms.count).toBe(26); // required atom fields
// expect(atoms.atom_id.value(0)).toBe(1); expect(atoms.count).toBe(26);
// expect(atoms.atom_name.value(0)).toBe('O'); expect(atoms.atom_id.value(0)).toBe(1);
// expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); expect(atoms.atom_name.value(0)).toBe('O');
// expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
// expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
// expect(atoms.atom_type.value(0)).toBe("O.3"); expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
// ///// optionals expect(atoms.atom_type.value(0)).toBe("O.3");
// expect(atoms.subst_id.value(0)).toBe(1);
// expect(atoms.subst_name.value(0)).toBe('LIG1');
// expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
// expect(atoms.status_bit.value(0)).toBe('');
// expect(bonds.count).toBe(26); // optional atom fields
// expect(bonds.bond_id.value(0)).toBe(1); expect(atoms.subst_id.value(0)).toBe(1);
// expect(bonds.origin_atom_id.value(0)).toBe(1); expect(atoms.subst_name.value(0)).toBe('LIG1');
// expect(bonds.target_atom_id.value(0)).toBe(5); expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
// expect(bonds.bond_type.value(0)).toBe('1'); expect(atoms.status_bit.value(0)).toBe('');
// /////// optional
// expect(bonds.status_bits.value(0)).toBe('');
// }); // required bond fields
expect(bonds.count).toBe(26);
expect(bonds.bond_id.value(0)).toBe(1);
expect(bonds.origin_atom_id.value(0)).toBe(1);
expect(bonds.target_atom_id.value(0)).toBe(5);
expect(bonds.bond_type.value(0)).toBe('1');
// optional bond fields
expect(bonds.status_bits.value(0)).toBe('');
});
// it('multiblocks', async () => { // it('multiblocks', async () => {
// const parsed = await Mol2(Mol2StringMultiBlocks)(); // const parsed = await Mol2(Mol2StringMultiBlocks)();
// if (parsed.isError) { // if (parsed.isError) {
// console.log(parsed) // throw new Error(parsed.message);
// return;
// } // }
// const mol2File = parsed.result; // const mol2File = parsed.result;
// const data = mol2File.structures[1]; // const data = mol2File.structures[1];
// const { molecule, atoms, bonds } = data; // const { molecule, atoms, bonds } = data;
// // molecule fields
// expect(molecule.mol_name).toBe('5816') // expect(molecule.mol_name).toBe('5816')
// expect(molecule.num_atoms).toBe(26) // expect(molecule.num_atoms).toBe(26)
// expect(molecule.num_bonds).toBe(26); // expect(molecule.num_bonds).toBe(26);
...@@ -310,6 +313,7 @@ describe('mol2 reader', () => { ...@@ -310,6 +313,7 @@ describe('mol2 reader', () => {
// expect(molecule.status_bits).toBe(""); // expect(molecule.status_bits).toBe("");
// expect(molecule.mol_comment).toBe(""); // expect(molecule.mol_comment).toBe("");
// // required atom fields
// expect(atoms.count).toBe(26); // expect(atoms.count).toBe(26);
// expect(atoms.atom_id.value(0)).toBe(1); // expect(atoms.atom_id.value(0)).toBe(1);
// expect(atoms.atom_name.value(0)).toBe('O'); // expect(atoms.atom_name.value(0)).toBe('O');
...@@ -317,32 +321,34 @@ describe('mol2 reader', () => { ...@@ -317,32 +321,34 @@ describe('mol2 reader', () => {
// expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); // expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
// expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); // expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
// expect(atoms.atom_type.value(0)).toBe("O.3"); // expect(atoms.atom_type.value(0)).toBe("O.3");
// ///// optionals
// // optional atom fields
// expect(atoms.subst_id.value(0)).toBe(1); // expect(atoms.subst_id.value(0)).toBe(1);
// expect(atoms.subst_name.value(0)).toBe('LIG1'); // expect(atoms.subst_name.value(0)).toBe('LIG1');
// expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); // expect(atoms.charge.value(0)).toBeCloseTo(-0.3859);
// expect(atoms.status_bit.value(0)).toBe(''); // expect(atoms.status_bit.value(0)).toBe('');
// // required bond fields
// expect(bonds.count).toBe(26); // expect(bonds.count).toBe(26);
// expect(bonds.bond_id.value(0)).toBe(1); // expect(bonds.bond_id.value(0)).toBe(1);
// expect(bonds.origin_atom_id.value(0)).toBe(1); // expect(bonds.origin_atom_id.value(0)).toBe(1);
// expect(bonds.target_atom_id.value(0)).toBe(5); // expect(bonds.target_atom_id.value(0)).toBe(5);
// expect(bonds.bond_type.value(0)).toBe('1'); // expect(bonds.bond_type.value(0)).toBe('1');
// /////// optional
// expect(bonds.status_bits.value(0)).toBe('');
// // optional bond fields
// expect(bonds.status_bits.value(0)).toBe('');
// }); // });
it('minimal', async () => { it('minimal', async () => {
const parsed = await Mol2(Mol2StringMinimal)(); const parsed = await Mol2(Mol2StringMinimal)();
if (parsed.isError) { if (parsed.isError) {
console.log(parsed) throw new Error(parsed.message);
return;
} }
const mol2File = parsed.result; const mol2File = parsed.result;
const data = mol2File.structures[0]; const data = mol2File.structures[0];
const { molecule, atoms, bonds } = data; const { molecule, atoms, bonds } = data;
// molecule fields
expect(molecule.mol_name).toBe('5816') expect(molecule.mol_name).toBe('5816')
expect(molecule.num_atoms).toBe(26) expect(molecule.num_atoms).toBe(26)
expect(molecule.num_bonds).toBe(26); expect(molecule.num_bonds).toBe(26);
...@@ -354,26 +360,29 @@ describe('mol2 reader', () => { ...@@ -354,26 +360,29 @@ describe('mol2 reader', () => {
expect(molecule.status_bits).toBe(""); expect(molecule.status_bits).toBe("");
expect(molecule.mol_comment).toBe(""); expect(molecule.mol_comment).toBe("");
// required atom fields
expect(atoms.count).toBe(26); expect(atoms.count).toBe(26);
// expect(atoms.atom_id.value(0)).toBe(1); expect(atoms.atom_id.value(0)).toBe(1);
// expect(atoms.atom_name.value(0)).toBe('O'); expect(atoms.atom_name.value(0)).toBe('O');
// expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001);
// expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001);
// expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001);
// expect(atoms.atom_type.value(0)).toBe("O.3"); expect(atoms.atom_type.value(0)).toBe("O.3");
///// optionals
// expect(atoms.subst_id.value(0)).toBe(0);
// expect(atoms.subst_name.value(0)).toBe('');
// expect(atoms.charge.value(0)).toBeCloseTo(0);
// expect(atoms.status_bit.value(0)).toBe('');
// optional atom fields
expect(atoms.subst_id.value(0)).toBe(0);
expect(atoms.subst_name.value(0)).toBe('');
expect(atoms.charge.value(0)).toBeCloseTo(0);
expect(atoms.status_bit.value(0)).toBe('');
// required bond fields
expect(bonds.count).toBe(26); expect(bonds.count).toBe(26);
// expect(bonds.bond_id.value(0)).toBe(1); expect(bonds.bond_id.value(0)).toBe(1);
// expect(bonds.origin_atom_id.value(0)).toBe(1); expect(bonds.origin_atom_id.value(0)).toBe(1);
// expect(bonds.target_atom_id.value(0)).toBe(5); expect(bonds.target_atom_id.value(0)).toBe(5);
// expect(bonds.bond_type.value(0)).toBe('1'); expect(bonds.bond_type.value(0)).toBe('1');
// /////// optional
// expect(bonds.status_bits.value(0)).toBe('');
// optional bond fields
expect(bonds.status_bits.value(0)).toBe('');
}); });
}); });
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Zepei Xu <xuzepei19950617@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
// NOTES // NOTES
//When want to created undefined string column, must use //When want to created undefined string column, must use
// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str) // undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
...@@ -11,6 +18,8 @@ import * as Schema from './schema' ...@@ -11,6 +18,8 @@ import * as Schema from './schema'
import Result from '../result' import Result from '../result'
import Computation from 'mol-util/computation' import Computation from 'mol-util/computation'
const { skipWhitespace, eatValue, markLine, getTokenString, readLine } = Tokenizer;
interface State { interface State {
tokenizer: Tokenizer, tokenizer: Tokenizer,
molecule: Schema.Molecule, molecule: Schema.Molecule,
...@@ -40,36 +49,37 @@ function State(tokenizer: Tokenizer, ctx: Computation.Context): State { ...@@ -40,36 +49,37 @@ function State(tokenizer: Tokenizer, ctx: Computation.Context): State {
}; };
} }
const reWhitespace = /\s+/g;
function handleMolecule(state: State) { function handleMolecule(state: State) {
const { tokenizer, molecule } = state; const { tokenizer, molecule } = state;
Tokenizer.markLine(tokenizer); markLine(tokenizer);
Tokenizer.markLine(tokenizer); markLine(tokenizer);
molecule.mol_name = Tokenizer.getTokenString(tokenizer); molecule.mol_name = getTokenString(tokenizer);
Tokenizer.markLine(tokenizer); markLine(tokenizer);
const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g); const values = getTokenString(tokenizer).trim().split(reWhitespace);
molecule.num_atoms = parseInt(values[0]) ? parseInt(values[1]) : 0; molecule.num_atoms = parseInt(values[0]) ? parseInt(values[0]) : 0;
molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0; molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0;
molecule.num_subst = parseInt(values[2]) ? parseInt(values[1]) : 0; molecule.num_subst = parseInt(values[2]) ? parseInt(values[2]) : 0;
molecule.num_feat = parseInt(values[3]) ? parseInt(values[1]) : 0; molecule.num_feat = parseInt(values[3]) ? parseInt(values[3]) : 0;
molecule.num_sets = parseInt(values[4]) ? parseInt(values[1]) : 0; molecule.num_sets = parseInt(values[4]) ? parseInt(values[4]) : 0;
Tokenizer.markLine(tokenizer); markLine(tokenizer);
molecule.mol_type = Tokenizer.getTokenString(tokenizer); molecule.mol_type = getTokenString(tokenizer);
Tokenizer.markLine(tokenizer); markLine(tokenizer);
molecule.charge_type = Tokenizer.getTokenString(tokenizer); molecule.charge_type = getTokenString(tokenizer);
Tokenizer.markLine(tokenizer); markLine(tokenizer);
if (Tokenizer.getTokenString(tokenizer) == '') return if (getTokenString(tokenizer) === '') return
molecule.status_bits = Tokenizer.getTokenString(tokenizer) molecule.status_bits = getTokenString(tokenizer)
Tokenizer.markLine(tokenizer); markLine(tokenizer);
if (Tokenizer.getTokenString(tokenizer) == '') return if (getTokenString(tokenizer) === '') return
molecule.mol_comment = Tokenizer.getTokenString(tokenizer) molecule.mol_comment = getTokenString(tokenizer)
} }
function isStatus_bit(aString: String): Boolean{ function isStatus_bit(aString: String): Boolean{
if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP') if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP')
|| aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL') || aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL')
...@@ -79,7 +89,6 @@ function isStatus_bit(aString: String): Boolean{ ...@@ -79,7 +89,6 @@ function isStatus_bit(aString: String): Boolean{
return false; return false;
} }
async function handleAtoms(state: State): Promise<Schema.Atoms> { async function handleAtoms(state: State): Promise<Schema.Atoms> {
const { tokenizer, molecule } = state; const { tokenizer, molecule } = state;
let hasSubst_id = false; let hasSubst_id = false;
...@@ -88,17 +97,17 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -88,17 +97,17 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
let hasStatus_bit = false; let hasStatus_bit = false;
// skip empty lines and '@<TRIPOS>ATOM' // skip empty lines and '@<TRIPOS>ATOM'
while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){ while(getTokenString(tokenizer) != '@<TRIPOS>ATOM'){
Tokenizer.markLine(tokenizer); markLine(tokenizer);
} }
const initialTokenizerPosition = tokenizer.position; const initialTokenizerPosition = tokenizer.position;
const initialTokenizerLineNumber = tokenizer.lineNumber; const initialTokenizerLineNumber = tokenizer.lineNumber;
const firstLine = Tokenizer.readLine(tokenizer); const firstLine = readLine(tokenizer);
const firstLineArray = firstLine.trim().split(/\s+/g) const firstLineArray = firstLine.trim().split(/\s+/g)
const firstLineLength = firstLineArray.length; const firstLineLength = firstLineArray.length;
// optionals are in order "integer string float string". // optional columns are in order "integer string float string".
// Use this to find out which column is missing or empty // Use this to find out which column is missing or empty
for(let i = 6; i < firstLineLength; i++){ for(let i = 6; i < firstLineLength; i++){
if(!isNaN(Number(firstLineArray[i]))){ if(!isNaN(Number(firstLineArray[i]))){
...@@ -116,17 +125,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -116,17 +125,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
} }
} }
// required columns
const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);; const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);;
const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
// optionals
const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const atom_idTokenColumn = TokenColumn(atom_idTokens); const atom_idTokenColumn = TokenColumn(atom_idTokens);
const atom_nameTokenColumn = TokenColumn(atom_nameTokens); const atom_nameTokenColumn = TokenColumn(atom_nameTokens);
...@@ -134,7 +139,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -134,7 +139,13 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
const yTokenColumn = TokenColumn(yTokens); const yTokenColumn = TokenColumn(yTokens);
const zTokenColumn = TokenColumn(zTokens); const zTokenColumn = TokenColumn(zTokens);
const atom_typeColumn = TokenColumn(atom_typeTokens); const atom_typeColumn = TokenColumn(atom_typeTokens);
// optionals
// optional columns
const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);
const subst_idTokenColumn = TokenColumn(subst_idTokens); const subst_idTokenColumn = TokenColumn(subst_idTokens);
const subst_nameTokenColumn = TokenColumn(subst_nameTokens); const subst_nameTokenColumn = TokenColumn(subst_nameTokens);
const chargeTokenColumn = TokenColumn(chargeTokens); const chargeTokenColumn = TokenColumn(chargeTokens);
...@@ -144,7 +155,7 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -144,7 +155,7 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int); const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int);
const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str); const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str);
let numOfColumn = 5; let numOfColumn = 6;
if(hasSubst_id){numOfColumn++} if(hasSubst_id){numOfColumn++}
if(hasSubst_name){numOfColumn++} if(hasSubst_name){numOfColumn++}
if(hasCharge){numOfColumn++} if(hasCharge){numOfColumn++}
...@@ -163,8 +174,9 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -163,8 +174,9 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
let chargeWritten = false; let chargeWritten = false;
let status_bitWritten = false; let status_bitWritten = false;
for(let j = 0; j < numOfColumn; j++){ for(let j = 0; j < numOfColumn; j++){
Tokenizer.skipWhitespace(tokenizer); skipWhitespace(tokenizer);
Tokenizer.eatValue(tokenizer); tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch(j){ switch(j){
case 0: case 0:
TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
...@@ -185,16 +197,16 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -185,16 +197,16 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break; break;
default: default:
if(hasSubst_id == true && subst_idWritten == false){ if(hasSubst_id === true && subst_idWritten === false){
TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
subst_idWritten = true; subst_idWritten = true;
}else if(hasSubst_name == true && subst_nameWritten == false){ }else if(hasSubst_name === true && subst_nameWritten === false){
TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
subst_nameWritten = true; subst_nameWritten = true;
}else if(hasCharge == true && chargeWritten == false){ }else if(hasCharge === true && chargeWritten === false){
TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
chargeWritten = true; chargeWritten = true;
}else if(hasStatus_bit == true && status_bitWritten == false){ }else if(hasStatus_bit === true && status_bitWritten === false){
TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
status_bitWritten = true; status_bitWritten = true;
} }
...@@ -213,12 +225,12 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { ...@@ -213,12 +225,12 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> {
y: yTokenColumn(Column.Schema.float), y: yTokenColumn(Column.Schema.float),
z: zTokenColumn(Column.Schema.float), z: zTokenColumn(Column.Schema.float),
atom_type: atom_typeColumn(Column.Schema.str), atom_type: atom_typeColumn(Column.Schema.str),
// optional properties
// optional columns
subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt, subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt,
subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr, subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr,
charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat, charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat,
status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr,
}; };
return ret; return ret;
} }
...@@ -227,13 +239,13 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { ...@@ -227,13 +239,13 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
const { tokenizer, molecule } = state; const { tokenizer, molecule } = state;
let hasStatus_bit = false; let hasStatus_bit = false;
while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){ while(getTokenString(tokenizer) !== '@<TRIPOS>BOND'){
Tokenizer.markLine(tokenizer); markLine(tokenizer);
} }
const initialTokenizerPosition = tokenizer.position; const initialTokenizerPosition = tokenizer.position;
const initialTokenizerLineNumber = tokenizer.lineNumber; const initialTokenizerLineNumber = tokenizer.lineNumber;
const firstLine = Tokenizer.readLine(tokenizer); const firstLine = readLine(tokenizer);
const firstLineArray = firstLine.trim().split(/\s+/g) const firstLineArray = firstLine.trim().split(/\s+/g)
const firstLineLength = firstLineArray.length; const firstLineLength = firstLineArray.length;
if(firstLineLength === 5){ if(firstLineLength === 5){
...@@ -268,8 +280,9 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { ...@@ -268,8 +280,9 @@ async function handleBonds(state: State): Promise<Schema.Bonds> {
const linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize); const linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize);
for(let i = 0; i < linesToRead; i++){ for(let i = 0; i < linesToRead; i++){
for(let j = 0; j < numberOfColumn; j++){ for(let j = 0; j < numberOfColumn; j++){
Tokenizer.skipWhitespace(tokenizer); skipWhitespace(tokenizer);
Tokenizer.eatValue(tokenizer); tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch(j){ switch(j){
case 0: case 0:
TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment