diff --git a/examples/adrenalin.mol2 b/examples/adrenalin.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..44d5736be0aec82be8822d3a604fee1c120fe8a9 --- /dev/null +++ b/examples/adrenalin.mol2 @@ -0,0 +1,60 @@ +@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1 \ No newline at end of file diff --git a/examples/sp-after.mol2 b/examples/sp-after.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..d09597b336fcebf017796a609a970840c06c2225 --- /dev/null +++ b/examples/sp-after.mol2 @@ -0,0 +1,71 @@ +@<TRIPOS>MOLECULE +ZINC12921206_1_1 + 30 33 0 0 0 +SMALL +USER_CHARGES + +@<TRIPOS>ATOM + 1 C1 10.8630 31.3859 32.2468 1 CAN 0.0000 + 2 C2 11.5701 31.4080 33.4817 1 CAN 0.0000 + 3 C3 17.5821 22.4473 38.5639 1 CAN 0.0000 + 4 C4 17.9273 24.7114 37.0200 1 CAN 0.0000 + 5 C5 10.5243 30.1190 31.8528 1 CAN 0.0000 + 6 C6 12.3942 31.2627 36.0033 1 CAN 0.0000 + 7 C7 12.3703 30.0954 35.2910 1 CAN 0.0000 + 8 C8 12.7652 28.9134 36.0072 1 CAN 0.0000 + 9 C9 16.4753 23.1064 38.0609 1 CAN 0.0000 + 10 C10 16.6480 24.2419 37.2829 1 CAN 0.0000 + 11 C11 18.8581 22.9199 38.3189 1 CAN 0.0000 + 12 C12 19.0369 24.0561 37.5370 1 CAN 0.0000 + 13 C13 11.8010 30.1533 33.9700 1 CAN 0.0000 + 14 C14 12.9774 29.2537 37.3208 1 CAN 0.0000 + 15 C15 13.3581 27.0671 37.7950 1 CAN 0.0000 + 16 C16 13.0045 27.5630 35.4958 1 CAN 0.0000 + 17 C17 14.5101 24.5014 36.0984 1 CAN 0.0000 + 18 C18 15.0909 22.6148 38.4220 1 CAN 0.0000 + 19 C19 19.6116 21.0974 39.7116 1 CAN 0.0000 + 20 C20 21.4990 23.9541 37.7136 1 CAN 0.0000 + 21 C21 13.2073 25.2763 36.0792 1 CAN 0.0000 + 22 N1 13.3019 28.3134 38.2813 1 CAN 0.0000 + 23 N2 13.2604 26.6887 36.4706 1 CAN 0.0000 + 24 N3 15.5062 24.9862 36.8657 1 CAN 0.0000 + 25 O1 13.0317 27.1909 34.3441 1 CAN 0.0000 + 26 O2 14.6486 23.4801 35.4573 1 CAN 0.0000 + 27 O3 19.9073 22.2497 38.8981 1 CAN 0.0000 + 28 O4 20.2810 24.5742 37.2617 1 CAN 0.0000 + 29 S1 11.0122 28.9270 33.0152 1 CAN 0.0000 + 30 S2 12.7971 30.9513 37.6624 1 CAN 0.0000 +@<TRIPOS>BOND + 1 1 2 0 + 2 1 5 0 + 3 2 13 0 + 4 3 11 0 + 5 3 9 0 + 6 4 12 0 + 7 4 10 0 + 8 5 29 0 + 9 6 7 0 + 10 6 30 0 + 11 7 8 0 + 12 7 13 0 + 13 8 14 0 + 14 8 16 0 + 15 9 10 0 + 16 9 18 0 + 17 10 24 0 + 18 11 27 0 + 19 11 12 0 + 20 12 28 0 + 21 13 29 0 + 22 14 22 0 + 23 14 30 0 + 24 15 22 0 + 25 15 23 0 + 26 16 23 0 + 27 16 25 0 + 28 17 21 0 + 29 17 24 0 + 30 17 26 0 + 31 19 27 0 + 32 20 28 0 + 33 21 23 0 diff --git a/examples/sp-ido40.mol2 b/examples/sp-ido40.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..52836aa1a7d909083ac60aae6b7c91691c83b200 --- /dev/null +++ b/examples/sp-ido40.mol2 @@ -0,0 +1,109 @@ +@<TRIPOS>MOLECULE +ZINC12921206_1 + 49 52 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 C1 0.6216 -0.2760 -6.8683 C.ar 1 <1> -0.0541 + 2 C2 0.3683 -0.8994 -5.6124 C.ar 1 <1> -0.0469 + 3 C3 -0.4563 -9.3380 -7.8499 C.ar 1 <1> -0.0116 + 4 C4 -1.3323 -7.7730 -5.7131 C.ar 1 <1> 0.0052 + 5 C5 1.6788 0.6049 -6.8049 C.ar 1 <1> -0.0237 + 6 C6 1.0293 -0.0678 -2.1894 C.ar 1 <1> -0.0133 + 7 C7 1.2229 -0.9407 -3.2374 C.ar 1 <1> 0.0102 + 8 C8 1.3908 -2.2806 -2.7857 C.ar 1 <1> 0.0803 + 9 C9 0.4711 -8.6946 -7.0304 C.ar 1 <1> -0.0256 + 10 C10 0.0332 -7.9122 -5.9620 C.ar 1 <1> 0.0401 + 11 C11 -1.8217 -9.1987 -7.6011 C.ar 1 <1> 0.1619 + 12 C12 -2.2597 -8.4163 -6.5327 C.ar 1 <1> 0.1633 + 13 C13 1.2418 -0.4712 -4.6368 C.ar 1 <1> 0.0206 + 14 C14 1.3196 -2.3849 -1.4088 C.ar 1 <1> 0.1157 + 15 C15 1.6389 -4.5865 -1.3837 C.ar 1 <1> 0.1003 + 16 C16 1.6164 -3.4830 -3.5741 C.ar 1 <1> 0.2632 + 17 C17 0.6789 -6.4424 -4.0211 C.2 1 <1> 0.2371 + 18 C18 1.9274 -8.8602 -7.3213 C.3 1 <1> -0.0378 + 19 C19 -2.1874 -10.6131 -9.4777 C.3 1 <1> 0.0790 + 20 C20 -3.9546 -7.4559 -5.1686 C.3 1 <1> 0.0790 + 21 C21 1.9636 -5.9327 -3.3970 C.3 1 <1> 0.1040 + 22 N1 1.4391 -3.5205 -0.6696 N.ar 1 <1> -0.2298 + 23 N2 1.7329 -4.6371 -2.7763 N.ar 1 <1> -0.2898 + 24 N3 0.9585 -7.2517 -5.1191 N.am 1 <1> -0.2837 + 25 O1 1.6923 -3.4619 -4.7990 O.2 1 <1> -0.2669 + 26 O2 -0.4256 -6.1563 -3.5700 O.2 1 <1> -0.2735 + 27 O3 -2.7176 -9.8330 -8.4079 O.3 1 <1> -0.4914 + 28 O4 -3.5906 -8.2727 -6.2796 O.3 1 <1> -0.4914 + 29 S1 2.3612 0.6778 -5.2360 S.2 1 <1> -0.0966 + 30 S2 1.0512 -0.8647 -0.6747 S.2 1 <1> -0.0763 + 31 H1 0.0574 -0.4648 -7.7724 H 1 <1> 0.0623 + 32 H2 -0.4177 -1.6249 -5.4418 H 1 <1> 0.0629 + 33 H3 -0.1187 -9.9489 -8.6838 H 1 <1> 0.0658 + 34 H4 -1.6794 -7.1646 -4.8809 H 1 <1> 0.0674 + 35 H5 2.0837 1.2088 -7.6047 H 1 <1> 0.0693 + 36 H6 0.8746 1.0019 -2.2311 H 1 <1> 0.0700 + 37 H7 1.7520 -5.5673 -0.8940 H 1 <1> 0.1030 + 38 H8 2.2679 -8.0781 -8.0093 H 1 <1> 0.0278 + 39 H9 2.5235 -8.8023 -6.4025 H 1 <1> 0.0278 + 40 H10 2.1317 -9.8382 -7.7740 H 1 <1> 0.0278 + 41 H11 -3.0074 -11.0637 -10.0422 H 1 <1> 0.0660 + 42 H12 -1.5373 -11.3932 -9.0735 H 1 <1> 0.0660 + 43 H13 -1.6073 -9.9638 -10.1376 H 1 <1> 0.0660 + 44 H14 -5.0433 -7.4258 -5.0787 H 1 <1> 0.0660 + 45 H15 -3.5623 -6.4462 -5.3135 H 1 <1> 0.0660 + 46 H16 -3.5278 -7.8834 -4.2582 H 1 <1> 0.0660 + 47 H17 2.7361 -5.8372 -4.1665 H 1 <1> 0.0589 + 48 H18 2.3025 -6.6506 -2.6435 H 1 <1> 0.0589 + 49 H19 1.9385 -7.3938 -5.3543 H 1 <1> 0.1549 +@<TRIPOS>BOND + 1 1 2 ar + 2 1 5 ar + 3 2 13 ar + 4 3 9 ar + 5 3 11 ar + 6 4 10 ar + 7 4 12 ar + 8 5 29 ar + 9 6 7 ar + 10 6 30 ar + 11 7 8 ar + 12 7 13 1 + 13 8 14 ar + 14 8 16 ar + 15 9 10 ar + 16 9 18 1 + 17 10 24 1 + 18 11 12 ar + 19 11 27 1 + 20 12 28 1 + 21 13 29 ar + 22 14 22 ar + 23 14 30 ar + 24 15 22 ar + 25 15 23 ar + 26 16 23 ar + 27 16 25 2 + 28 17 21 1 + 29 17 24 am + 30 17 26 2 + 31 19 27 1 + 32 20 28 1 + 33 21 23 1 + 34 1 31 1 + 35 2 32 1 + 36 3 33 1 + 37 4 34 1 + 38 5 35 1 + 39 6 36 1 + 40 15 37 1 + 41 18 38 1 + 42 18 39 1 + 43 18 40 1 + 44 19 41 1 + 45 19 42 1 + 46 19 43 1 + 47 20 44 1 + 48 20 45 1 + 49 20 46 1 + 50 21 47 1 + 51 21 48 1 + 52 24 49 1 diff --git a/src/reader/mol2/parser.ts b/src/reader/mol2/parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..b97792a6f5d50cecdee11d1bef3861b27c9d3c57 --- /dev/null +++ b/src/reader/mol2/parser.ts @@ -0,0 +1,138 @@ +import Tokenizer from '../common/text/tokenizer' +import FixedColumn from '../common/text/column/fixed' +import { ColumnType, UndefinedColumn } from '../common/column' +import * as Schema from './schema' +import Result from '../result' +//import Computation from '../../utils/computation' + +interface State { + tokenizer: Tokenizer, + molecule: Schema.Molecule, + ///////////// not including Computation.chunker ///////////// +} + +function createEmptyMolecule(): Schema.Molecule { + return { + mol_name: '', + num_atoms: 0, + num_bonds: 0, + num_subst: 0, + num_feat: 0, + num_sets: 0, + mol_type: '', + charge_type: '', + status_bits:'', + mol_comment: '' + }; +} + +function State(tokenizer: Tokenizer): State { //////////// not having ctx: Computation.Context as a parameter ////////////// + return { + tokenizer, + molecule: createEmptyMolecule(), + //////////// not having chunker: Computation.chunker(ctx, 100000) /////////// + }; +} + +/** + * title string (free format string, optional time in ps after 't=') + */ +function handleMolecule(state: State) { + const { tokenizer, molecule } = state; + + Tokenizer.markLine(tokenizer); + let name = Tokenizer.getTokenString(tokenizer); + molecule.mol_name = name; + + Tokenizer.markLine(tokenizer); + const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g); + molecule.num_atoms = parseInt(values[0]); + molecule.num_bonds = parseInt(values[1]); + molecule.num_subst = parseInt(values[2]); + molecule.num_feat = parseInt(values[3]); + molecule.num_sets = parseInt(values[4]); + + Tokenizer.markLine(tokenizer); + molecule.mol_type = Tokenizer.getTokenString(tokenizer); + + Tokenizer.markLine(tokenizer); + molecule.charge_type = Tokenizer.getTokenString(tokenizer); + + // skip the empty line + Tokenizer.markLine(tokenizer) + +} + +/** + * This format is fixed, ie. all columns are in a fixed position. + * Optionally (for now only yet with trjconv) you can write gro files + * with any number of decimal places, the format will then be n+5 + * positions with n decimal places (n+1 for velocities) in stead + * of 8 with 3 (with 4 for velocities). Upon reading, the precision + * will be inferred from the distance between the decimal points + * (which will be n+5). Columns contain the following information + * (from left to right): + * residue number (5 positions, integer) + * residue name (5 characters) + * atom name (5 characters) + * atom number (5 positions, integer) + * position (in nm, x y z in 3 columns, each 8 positions with 3 decimal places) + * velocity (in nm/ps (or km/s), x y z in 3 columns, each 8 positions with 4 decimal places) + */ +function handleAtoms(state: State): Schema.Atoms { + const { tokenizer, molecule } = state; + + ////////// not using readLinesAsync ///////// + const lines = Tokenizer.readLines(tokenizer, molecule.num_atoms); + + const pO = 20; + const pW = state.header.precision.position + 5; + const vO = pO + 3 * pW; + const vW = state.header.precision.velocity + 4; + + const col = FixedColumn({ data: tokenizer.data, lines, rowCount: state.numberOfAtoms }); + + const ret = { + count: molecule.num_atoms, + atom_id: col(0, 0, ColumnType.int), + atom_name: col(0, 0, ColumnType.str), + x: col(0, 0, ColumnType.float), + }; + + return ret; +} + +/** + * box vectors (free format, space separated reals), values: + * v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y), + * the last 6 values may be omitted (they will be set to zero). + * Gromacs only supports boxes with v1(y)=v1(z)=v2(z)=0. + */ +function handleBoxVectors(state: State) { + const { tokenizer } = state; + markLine(tokenizer); + const values = getTokenString(tokenizer).trim().split(/\s+/g); + state.header.box = [+values[0], +values[1], +values[2]]; +} + +function parseInternal(data: string): Result<Schema.File> { + const tokenizer = TokenizerState(data); + + const structures: Schema.Structure[] = []; + while (tokenizer.position < data.length) { + const state = createState(tokenizer); + handleMolecule(state); + const atoms = handleAtoms(state); + handleBoxVectors(state); + structures.push({ header: state.header, atoms }); + } + + const result: Schema.File = { structures }; + return Result.success(result); +} + +export function parse(data: string) { + return parseInternal(data); +} + +export default parse; \ No newline at end of file