diff --git a/src/mol-io/reader/mol2/parser.ts b/src/mol-io/reader/mol2/parser.ts index e856a87e89194796c20ee35ff5994dfc3c5cc5f6..9f40147fb008e9f0cd4c5bbab97adc87830d7499 100644 --- a/src/mol-io/reader/mol2/parser.ts +++ b/src/mol-io/reader/mol2/parser.ts @@ -5,8 +5,8 @@ // const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); // because latter actuall return a column of zeros import { Column } from 'mol-data/db' -import Tokenizer from '../common/text/tokenizer' -import FixedColumn from '../common/text/column/fixed' +import { TokenBuilder, Tokenizer } from '../common/text/tokenizer' +import TokenColumn from '../common/text/column/token' import * as Schema from './schema' import Result from '../result' import Computation from 'mol-util/computation' @@ -81,7 +81,14 @@ function handleMolecule(state: State) { } - +function isStatus_bit(aString: String): Boolean{ + if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP') + || aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL') + || aString.includes('WATER') || aString.includes('DIRECT')){ + return true; + } + return false; +} async function handleAtoms(state: State): Promise<Schema.Atoms> { @@ -95,44 +102,123 @@ async function handleAtoms(state: State): Promise<Schema.Atoms> { while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){ Tokenizer.markLine(tokenizer); } - const lines = await Tokenizer.readLinesAsync(tokenizer, molecule.num_atoms, state.chunker); - const firstLine = tokenizer.data.substring(lines.indices[0], lines.indices[1]); + + const initialTokenizerPosition = tokenizer.position; + const initialTOkenizerLineNumber = tokenizer.lineNumber; + const firstLine = Tokenizer.readLine(tokenizer); const firstLineArray = firstLine.trim().split(/\s+/g) - const length = firstLineArray.length; - if(length == 9){ - hasSubst_id = true; - hasSubst_name = true; - hasCharge = true; - }else if(length == 10){ - hasSubst_id = true; - hasSubst_name = true; - hasCharge = true; - hasStatus_bit = true; - } + const firstLineLength = firstLineArray.length; + + + // optionals are in order "integer string float string". Use this to find out which column is missing or empty + for(let i = 6; i < firstLineLength; i++){ + if(!isNaN(Number(firstLineArray[i]))){ + if(firstLineArray[i].indexOf('.') == -1){ + hasSubst_id = true; + }else{ + hasCharge = true; + } + }else if(isNaN(Number(firstLineArray[i]))){ + if(!isStatus_bit(firstLineArray[i])){ + hasSubst_name = true; + }else{ + hasStatus_bit = true; + } + } - const col = FixedColumn(lines); - const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int); + const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);; + const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + // optionals + const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + + + const atom_idTokenColumn = TokenColumn(atom_idTokens); + const atom_nameTokenColumn = TokenColumn(atom_nameTokens); + const xTokenColumn = TokenColumn(xTokens); + const yTokenColumn = TokenColumn(yTokens); + const zTokenColumn = TokenColumn(zTokens); + const atom_typeColumn = TokenColumn(atom_typeTokens); + // optionals + const subst_idTokenColumn = TokenColumn(subst_idTokens); + const subst_nameTokenColumn = TokenColumn(subst_nameTokens); + const chargeTokenColumn = TokenColumn(chargeTokens); + const status_bitTokenColumn = TokenColumn(status_bitTokens); + + const undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float); - //const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); - // created below column to pass unit tests + const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int); const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str); + let numOfColumn = 5; + if(hasSubst_id){numOfColumn++} + if(hasSubst_name){numOfColumn++} + if(hasCharge){numOfColumn++} + if(hasStatus_bit){numOfColumn++} + + tokenizer.position = initialTokenizerPosition; + tokenizer.lineNumber = initialTOkenizerLineNumber; + + for(let i = 0; i < molecule.num_atoms; i++){ + let subst_idWritten = false; + let subst_nameWritten = false; + let chargeWritten = false; + let status_bitWritten = false; + for(let j = 0; j < numOfColumn; j++){ + Tokenizer.skipWhitespace(tokenizer); + Tokenizer.eatValue(tokenizer); + switch(j){ + case 0: + TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 1: + TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 2: + TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 3: + TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 4: + TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 5: + TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + default: + if(hasSubst_id == true && subst_idWritten == false){ + TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + subst_idWritten = true; + }else if(hasSubst_name == true && subst_nameWritten == false){ + TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + subst_nameWritten = true; + }else if(hasCharge == true && chargeWritten == false){ + TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + chargeWritten = true; + }else if(hasStatus_bit == true && status_bitWritten == false){ + TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + status_bitWritten = true; + } + } + } + } + const ret = { count: molecule.num_atoms, - atom_id: col(0, 7, Column.Schema.int), - atom_name: col(7, 9, Column.Schema.str), - x: col(16, 10, Column.Schema.float), - y: col(26, 10, Column.Schema.float), - z: col(36, 10, Column.Schema.float), - atom_type: col(46, 6, Column.Schema.str), + atom_id: atom_idTokenColumn(Column.Schema.int), + atom_name: atom_nameTokenColumn(Column.Schema.str), + x: xTokenColumn(Column.Schema.float), + y: yTokenColumn(Column.Schema.float), + z: zTokenColumn(Column.Schema.float), + atom_type: atom_typeColumn(Column.Schema.str), // optional properties - subst_id: hasSubst_id ? col(52, 6, Column.Schema.int) : undefInt, - subst_name: hasSubst_name ? col(58, 8, Column.Schema.str) : undefStr, - charge: hasCharge ? col(66, 10, Column.Schema.float) : undefFloat, - status_bit: hasStatus_bit ? col(76, 100, Column.Schema.str) : undefStr, + subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt, + subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr, + charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat, + status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, }; - return ret; } @@ -146,32 +232,64 @@ async function handleBonds(state: State): Promise<Schema.Bonds> { while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){ Tokenizer.markLine(tokenizer); } - const lines = await Tokenizer.readLinesAsync(tokenizer, molecule.num_bonds, state.chunker); - const firstLine = tokenizer.data.substring(lines.indices[0], lines.indices[1]); - const length = firstLine.split(' ').length; - if(length == 4){ + + const initialTokenizerPosition = tokenizer.position; + const initialTokenizerLineNumber = tokenizer.lineNumber; + const firstLine = Tokenizer.readLine(tokenizer); + const firstLineArray = firstLine.trim().split(/\s+/g) + const firstLineLength = firstLineArray.length; + if(firstLineLength == 5){ hasStatus_bit = true; } - const col = FixedColumn(lines); - //const undefPooledStr = UndefinedColumn(molecule.num_bonds, ColumnType.pooledStr); - // created below column to pass unit tests - const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str); - - // if don't want to assume a fixed format, we can access a value of a column at a certain row by - // parse out the whole line at row x, then split the line, and return the yth value that is at the - // index of wanted value. + const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + // optional + const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + + const bond_idTokenColumn = TokenColumn(bond_idTokens); + const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens); + const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens); + const bondTypeTokenColumn = TokenColumn(bondTypeTokens); + // optional + const status_bitTokenColumn = TokenColumn(status_bitTokens); + + const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str); + + let numberOfColumn = 4; + if(hasStatus_bit){numberOfColumn++} + + tokenizer.position = initialTokenizerPosition; + tokenizer.lineNumber = initialTokenizerLineNumber; + + for(let i = 0; i < molecule.num_bonds; i++){ + for(let j = 0; j < numberOfColumn; j++){ + Tokenizer.skipWhitespace(tokenizer); + Tokenizer.eatValue(tokenizer); + switch(j){ + case 0: + TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 1: + TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 2: + TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + case 3: + TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + default: + TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + } + } + } const ret = { count: molecule.num_bonds, - bond_id: col(0, 6, Column.Schema.int), - origin_atom_id: col(6, 6, Column.Schema.int), - target_atom_id: col(12, 6, Column.Schema.int), - bond_type: col(18, 5, Column.Schema.str), - // optional properties - // undefPooledStr cannot pass unit tests because it create a column of zeros but not empty strings - //status_bits: hasStatus_bit ? col(23, 50, ColumnType.pooledStr) : undefPooledStr, - status_bits: hasStatus_bit ? col(23, 50, Column.Schema.str) : undefStr, + bond_id: bond_idTokenColumn(Column.Schema.int), + origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), + target_atom_id: target_bond_idTokenColumn(Column.Schema.int), + bond_type: bondTypeTokenColumn(Column.Schema.str), + status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, }; return ret;