diff --git a/examples/adrenalin.mol2 b/examples/adrenalin.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..44d5736be0aec82be8822d3a604fee1c120fe8a9 --- /dev/null +++ b/examples/adrenalin.mol2 @@ -0,0 +1,60 @@ +@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1 \ No newline at end of file diff --git a/examples/sp-after.mol2 b/examples/sp-after.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..d09597b336fcebf017796a609a970840c06c2225 --- /dev/null +++ b/examples/sp-after.mol2 @@ -0,0 +1,71 @@ +@<TRIPOS>MOLECULE +ZINC12921206_1_1 + 30 33 0 0 0 +SMALL +USER_CHARGES + +@<TRIPOS>ATOM + 1 C1 10.8630 31.3859 32.2468 1 CAN 0.0000 + 2 C2 11.5701 31.4080 33.4817 1 CAN 0.0000 + 3 C3 17.5821 22.4473 38.5639 1 CAN 0.0000 + 4 C4 17.9273 24.7114 37.0200 1 CAN 0.0000 + 5 C5 10.5243 30.1190 31.8528 1 CAN 0.0000 + 6 C6 12.3942 31.2627 36.0033 1 CAN 0.0000 + 7 C7 12.3703 30.0954 35.2910 1 CAN 0.0000 + 8 C8 12.7652 28.9134 36.0072 1 CAN 0.0000 + 9 C9 16.4753 23.1064 38.0609 1 CAN 0.0000 + 10 C10 16.6480 24.2419 37.2829 1 CAN 0.0000 + 11 C11 18.8581 22.9199 38.3189 1 CAN 0.0000 + 12 C12 19.0369 24.0561 37.5370 1 CAN 0.0000 + 13 C13 11.8010 30.1533 33.9700 1 CAN 0.0000 + 14 C14 12.9774 29.2537 37.3208 1 CAN 0.0000 + 15 C15 13.3581 27.0671 37.7950 1 CAN 0.0000 + 16 C16 13.0045 27.5630 35.4958 1 CAN 0.0000 + 17 C17 14.5101 24.5014 36.0984 1 CAN 0.0000 + 18 C18 15.0909 22.6148 38.4220 1 CAN 0.0000 + 19 C19 19.6116 21.0974 39.7116 1 CAN 0.0000 + 20 C20 21.4990 23.9541 37.7136 1 CAN 0.0000 + 21 C21 13.2073 25.2763 36.0792 1 CAN 0.0000 + 22 N1 13.3019 28.3134 38.2813 1 CAN 0.0000 + 23 N2 13.2604 26.6887 36.4706 1 CAN 0.0000 + 24 N3 15.5062 24.9862 36.8657 1 CAN 0.0000 + 25 O1 13.0317 27.1909 34.3441 1 CAN 0.0000 + 26 O2 14.6486 23.4801 35.4573 1 CAN 0.0000 + 27 O3 19.9073 22.2497 38.8981 1 CAN 0.0000 + 28 O4 20.2810 24.5742 37.2617 1 CAN 0.0000 + 29 S1 11.0122 28.9270 33.0152 1 CAN 0.0000 + 30 S2 12.7971 30.9513 37.6624 1 CAN 0.0000 +@<TRIPOS>BOND + 1 1 2 0 + 2 1 5 0 + 3 2 13 0 + 4 3 11 0 + 5 3 9 0 + 6 4 12 0 + 7 4 10 0 + 8 5 29 0 + 9 6 7 0 + 10 6 30 0 + 11 7 8 0 + 12 7 13 0 + 13 8 14 0 + 14 8 16 0 + 15 9 10 0 + 16 9 18 0 + 17 10 24 0 + 18 11 27 0 + 19 11 12 0 + 20 12 28 0 + 21 13 29 0 + 22 14 22 0 + 23 14 30 0 + 24 15 22 0 + 25 15 23 0 + 26 16 23 0 + 27 16 25 0 + 28 17 21 0 + 29 17 24 0 + 30 17 26 0 + 31 19 27 0 + 32 20 28 0 + 33 21 23 0 diff --git a/examples/sp-ido40.mol2 b/examples/sp-ido40.mol2 new file mode 100644 index 0000000000000000000000000000000000000000..52836aa1a7d909083ac60aae6b7c91691c83b200 --- /dev/null +++ b/examples/sp-ido40.mol2 @@ -0,0 +1,109 @@ +@<TRIPOS>MOLECULE +ZINC12921206_1 + 49 52 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 C1 0.6216 -0.2760 -6.8683 C.ar 1 <1> -0.0541 + 2 C2 0.3683 -0.8994 -5.6124 C.ar 1 <1> -0.0469 + 3 C3 -0.4563 -9.3380 -7.8499 C.ar 1 <1> -0.0116 + 4 C4 -1.3323 -7.7730 -5.7131 C.ar 1 <1> 0.0052 + 5 C5 1.6788 0.6049 -6.8049 C.ar 1 <1> -0.0237 + 6 C6 1.0293 -0.0678 -2.1894 C.ar 1 <1> -0.0133 + 7 C7 1.2229 -0.9407 -3.2374 C.ar 1 <1> 0.0102 + 8 C8 1.3908 -2.2806 -2.7857 C.ar 1 <1> 0.0803 + 9 C9 0.4711 -8.6946 -7.0304 C.ar 1 <1> -0.0256 + 10 C10 0.0332 -7.9122 -5.9620 C.ar 1 <1> 0.0401 + 11 C11 -1.8217 -9.1987 -7.6011 C.ar 1 <1> 0.1619 + 12 C12 -2.2597 -8.4163 -6.5327 C.ar 1 <1> 0.1633 + 13 C13 1.2418 -0.4712 -4.6368 C.ar 1 <1> 0.0206 + 14 C14 1.3196 -2.3849 -1.4088 C.ar 1 <1> 0.1157 + 15 C15 1.6389 -4.5865 -1.3837 C.ar 1 <1> 0.1003 + 16 C16 1.6164 -3.4830 -3.5741 C.ar 1 <1> 0.2632 + 17 C17 0.6789 -6.4424 -4.0211 C.2 1 <1> 0.2371 + 18 C18 1.9274 -8.8602 -7.3213 C.3 1 <1> -0.0378 + 19 C19 -2.1874 -10.6131 -9.4777 C.3 1 <1> 0.0790 + 20 C20 -3.9546 -7.4559 -5.1686 C.3 1 <1> 0.0790 + 21 C21 1.9636 -5.9327 -3.3970 C.3 1 <1> 0.1040 + 22 N1 1.4391 -3.5205 -0.6696 N.ar 1 <1> -0.2298 + 23 N2 1.7329 -4.6371 -2.7763 N.ar 1 <1> -0.2898 + 24 N3 0.9585 -7.2517 -5.1191 N.am 1 <1> -0.2837 + 25 O1 1.6923 -3.4619 -4.7990 O.2 1 <1> -0.2669 + 26 O2 -0.4256 -6.1563 -3.5700 O.2 1 <1> -0.2735 + 27 O3 -2.7176 -9.8330 -8.4079 O.3 1 <1> -0.4914 + 28 O4 -3.5906 -8.2727 -6.2796 O.3 1 <1> -0.4914 + 29 S1 2.3612 0.6778 -5.2360 S.2 1 <1> -0.0966 + 30 S2 1.0512 -0.8647 -0.6747 S.2 1 <1> -0.0763 + 31 H1 0.0574 -0.4648 -7.7724 H 1 <1> 0.0623 + 32 H2 -0.4177 -1.6249 -5.4418 H 1 <1> 0.0629 + 33 H3 -0.1187 -9.9489 -8.6838 H 1 <1> 0.0658 + 34 H4 -1.6794 -7.1646 -4.8809 H 1 <1> 0.0674 + 35 H5 2.0837 1.2088 -7.6047 H 1 <1> 0.0693 + 36 H6 0.8746 1.0019 -2.2311 H 1 <1> 0.0700 + 37 H7 1.7520 -5.5673 -0.8940 H 1 <1> 0.1030 + 38 H8 2.2679 -8.0781 -8.0093 H 1 <1> 0.0278 + 39 H9 2.5235 -8.8023 -6.4025 H 1 <1> 0.0278 + 40 H10 2.1317 -9.8382 -7.7740 H 1 <1> 0.0278 + 41 H11 -3.0074 -11.0637 -10.0422 H 1 <1> 0.0660 + 42 H12 -1.5373 -11.3932 -9.0735 H 1 <1> 0.0660 + 43 H13 -1.6073 -9.9638 -10.1376 H 1 <1> 0.0660 + 44 H14 -5.0433 -7.4258 -5.0787 H 1 <1> 0.0660 + 45 H15 -3.5623 -6.4462 -5.3135 H 1 <1> 0.0660 + 46 H16 -3.5278 -7.8834 -4.2582 H 1 <1> 0.0660 + 47 H17 2.7361 -5.8372 -4.1665 H 1 <1> 0.0589 + 48 H18 2.3025 -6.6506 -2.6435 H 1 <1> 0.0589 + 49 H19 1.9385 -7.3938 -5.3543 H 1 <1> 0.1549 +@<TRIPOS>BOND + 1 1 2 ar + 2 1 5 ar + 3 2 13 ar + 4 3 9 ar + 5 3 11 ar + 6 4 10 ar + 7 4 12 ar + 8 5 29 ar + 9 6 7 ar + 10 6 30 ar + 11 7 8 ar + 12 7 13 1 + 13 8 14 ar + 14 8 16 ar + 15 9 10 ar + 16 9 18 1 + 17 10 24 1 + 18 11 12 ar + 19 11 27 1 + 20 12 28 1 + 21 13 29 ar + 22 14 22 ar + 23 14 30 ar + 24 15 22 ar + 25 15 23 ar + 26 16 23 ar + 27 16 25 2 + 28 17 21 1 + 29 17 24 am + 30 17 26 2 + 31 19 27 1 + 32 20 28 1 + 33 21 23 1 + 34 1 31 1 + 35 2 32 1 + 36 3 33 1 + 37 4 34 1 + 38 5 35 1 + 39 6 36 1 + 40 15 37 1 + 41 18 38 1 + 42 18 39 1 + 43 18 40 1 + 44 19 41 1 + 45 19 42 1 + 46 19 43 1 + 47 20 44 1 + 48 20 45 1 + 49 20 46 1 + 50 21 47 1 + 51 21 48 1 + 52 24 49 1 diff --git a/src/mol-io/reader/_spec/gro.spec.ts b/src/mol-io/reader/_spec/gro.spec.ts index e13fcf713a9b0fa7daec2d452e3c75c1ac9b3b1c..888f23c83369fa8fc27e4adec486aab86003e7ab 100644 --- a/src/mol-io/reader/_spec/gro.spec.ts +++ b/src/mol-io/reader/_spec/gro.spec.ts @@ -32,7 +32,6 @@ describe('gro reader', () => { console.log(parsed) return; } - const groFile = parsed.result; const data = groFile.structures[0]; diff --git a/src/mol-io/reader/_spec/mol2.spec.ts b/src/mol-io/reader/_spec/mol2.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..2a04c751f6710e990ea684635868752ec3c1e046 --- /dev/null +++ b/src/mol-io/reader/_spec/mol2.spec.ts @@ -0,0 +1,379 @@ + +import Mol2 from '../mol2/parser' + +const Mol2String = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` + +const Mol2StringMultiBlocks = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1 +@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 1 LIG1 -0.3859 + 2 O -2.2941 1.0781 -1.7979 O.3 1 LIG1 -0.5033 + 3 O -3.6584 0.5842 0.5722 O.3 1 LIG1 -0.5033 + 4 N 2.6359 1.0243 0.7030 N.3 1 LIG1 -0.3162 + 5 C 1.6787 -1.1447 -0.0373 C.3 1 LIG1 0.0927 + 6 C 0.2684 -0.6866 0.1208 C.ar 1 LIG1 -0.0143 + 7 C 2.6376 0.0193 -0.3576 C.3 1 LIG1 0.0258 + 8 C -0.3658 -0.0099 -0.9212 C.ar 1 LIG1 -0.0109 + 9 C -0.4164 -0.9343 1.3105 C.ar 1 LIG1 -0.0524 + 10 C -1.6849 0.4191 -0.7732 C.ar 1 LIG1 0.1586 + 11 C -1.7353 -0.5053 1.4585 C.ar 1 LIG1 -0.0162 + 12 C -2.3696 0.1713 0.4166 C.ar 1 LIG1 0.1582 + 13 C 3.5645 2.1013 0.3950 C.3 1 LIG1 -0.0157 + 14 H 2.0210 -1.6511 0.8741 H 1 LIG1 0.0656 + 15 H 2.3808 0.4742 -1.3225 H 1 LIG1 0.0453 + 16 H 3.6478 -0.3931 -0.4831 H 1 LIG1 0.0453 + 17 H 0.1501 0.1801 -1.8589 H 1 LIG1 0.0659 + 18 H 0.0640 -1.4598 2.1315 H 1 LIG1 0.0622 + 19 H 2.9013 0.5888 1.5858 H 1 LIG1 0.1217 + 20 H -2.2571 -0.7050 2.3907 H 1 LIG1 0.0655 + 21 H 2.6646 -2.4067 -1.1652 H 1 LIG1 0.2103 + 22 H 3.2862 2.6124 -0.5325 H 1 LIG1 0.0388 + 23 H 4.5925 1.7346 0.3078 H 1 LIG1 0.0388 + 24 H 3.5401 2.8441 1.1985 H 1 LIG1 0.0388 + 25 H -3.2008 1.2997 -1.5231 H 1 LIG1 0.2923 + 26 H -3.9690 0.3259 1.4570 H 1 LIG1 0.2923 +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` + +const Mol2StringMinimal = `@<TRIPOS>MOLECULE +5816 + 26 26 0 0 0 +SMALL +GASTEIGER + +@<TRIPOS>ATOM + 1 O 1.7394 -2.1169 -1.0894 O.3 + 2 O -2.2941 1.0781 -1.7979 O.3 + 3 O -3.6584 0.5842 0.5722 O.3 + 4 N 2.6359 1.0243 0.7030 N.3 + 5 C 1.6787 -1.1447 -0.0373 C.3 + 6 C 0.2684 -0.6866 0.1208 C.ar + 7 C 2.6376 0.0193 -0.3576 C.3 + 8 C -0.3658 -0.0099 -0.9212 C.ar + 9 C -0.4164 -0.9343 1.3105 C.ar + 10 C -1.6849 0.4191 -0.7732 C.ar + 11 C -1.7353 -0.5053 1.4585 C.ar + 12 C -2.3696 0.1713 0.4166 C.ar + 13 C 3.5645 2.1013 0.3950 C.3 + 14 H 2.0210 -1.6511 0.8741 H + 15 H 2.3808 0.4742 -1.3225 H + 16 H 3.6478 -0.3931 -0.4831 H + 17 H 0.1501 0.1801 -1.8589 H + 18 H 0.0640 -1.4598 2.1315 H + 19 H 2.9013 0.5888 1.5858 H + 20 H -2.2571 -0.7050 2.3907 H + 21 H 2.6646 -2.4067 -1.1652 H + 22 H 3.2862 2.6124 -0.5325 H + 23 H 4.5925 1.7346 0.3078 H + 24 H 3.5401 2.8441 1.1985 H + 25 H -3.2008 1.2997 -1.5231 H + 26 H -3.9690 0.3259 1.4570 H +@<TRIPOS>BOND + 1 1 5 1 + 2 1 21 1 + 3 2 10 1 + 4 2 25 1 + 5 3 12 1 + 6 3 26 1 + 7 4 7 1 + 8 4 13 1 + 9 4 19 1 + 10 5 6 1 + 11 5 7 1 + 12 5 14 1 + 13 6 8 ar + 14 6 9 ar + 15 7 15 1 + 16 7 16 1 + 17 8 10 ar + 18 8 17 1 + 19 9 11 ar + 20 9 18 1 + 21 10 12 ar + 22 11 12 ar + 23 11 20 1 + 24 13 22 1 + 25 13 23 1 + 26 13 24 1` + +describe('mol2 reader', () => { + it('basic', async () => { + const parsed = await Mol2(Mol2String)(); + if (parsed.isError) { + console.log(parsed) + return; + } + const mol2File = parsed.result; + const data = mol2File.structures[0]; + const { molecule, atoms, bonds } = data; + + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + ///// optionals + expect(atoms.subst_id.value(0)).toBe(1); + expect(atoms.subst_name.value(0)).toBe('LIG1'); + expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); + expect(atoms.status_bit.value(0)).toBe(''); + + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + /////// optional + expect(bonds.status_bits.value(0)).toBe(''); + + }); + + it('multiblocks', async () => { + const parsed = await Mol2(Mol2StringMultiBlocks)(); + if (parsed.isError) { + console.log(parsed) + return; + } + const mol2File = parsed.result; + const data = mol2File.structures[1]; + const { molecule, atoms, bonds } = data; + + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + ///// optionals + expect(atoms.subst_id.value(0)).toBe(1); + expect(atoms.subst_name.value(0)).toBe('LIG1'); + expect(atoms.charge.value(0)).toBeCloseTo(-0.3859); + expect(atoms.status_bit.value(0)).toBe(''); + + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + /////// optional + expect(bonds.status_bits.value(0)).toBe(''); + + }); + + it('minimal', async () => { + const parsed = await Mol2(Mol2StringMinimal)(); + if (parsed.isError) { + console.log(parsed) + return; + } + const mol2File = parsed.result; + const data = mol2File.structures[0]; + const { molecule, atoms, bonds } = data; + + expect(molecule.mol_name).toBe('5816') + expect(molecule.num_atoms).toBe(26) + expect(molecule.num_bonds).toBe(26); + expect(molecule.num_subst).toBe(0); + expect(molecule.num_feat).toBe(0); + expect(molecule.num_sets).toBe(0); + expect(molecule.mol_type).toBe("SMALL") + expect(molecule.charge_type).toBe("GASTEIGER"); + expect(molecule.status_bits).toBe(""); + expect(molecule.mol_comment).toBe(""); + + expect(atoms.count).toBe(26); + expect(atoms.atom_id.value(0)).toBe(1); + expect(atoms.atom_name.value(0)).toBe('O'); + expect(atoms.x.value(0)).toBeCloseTo(1.7394, 0.001); + expect(atoms.y.value(0)).toBeCloseTo(-2.1169, 0.0001); + expect(atoms.z.value(0)).toBeCloseTo(-1.0893, 0.0001); + expect(atoms.atom_type.value(0)).toBe("O.3"); + ///// optionals + expect(atoms.subst_id.value(0)).toBe(0); + expect(atoms.subst_name.value(0)).toBe(''); + expect(atoms.charge.value(0)).toBeCloseTo(0); + expect(atoms.status_bit.value(0)).toBe(''); + + expect(bonds.count).toBe(26); + expect(bonds.bond_id.value(0)).toBe(1); + expect(bonds.origin_atom_id.value(0)).toBe(1); + expect(bonds.target_atom_id.value(0)).toBe(5); + expect(bonds.bond_type.value(0)).toBe('1'); + /////// optional + expect(bonds.status_bits.value(0)).toBe(''); + + }); +}); diff --git a/src/mol-io/reader/mol2/parser.ts b/src/mol-io/reader/mol2/parser.ts new file mode 100644 index 0000000000000000000000000000000000000000..4d3f209925bdbe833a27308c0e76578db0841e24 --- /dev/null +++ b/src/mol-io/reader/mol2/parser.ts @@ -0,0 +1,359 @@ +// NOTES +//When want to created undefined string column, must use +// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str) +// but not +// const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); +// because latter actuall return a column of zeros +import { Column } from 'mol-data/db' +import { TokenBuilder, Tokenizer } from '../common/text/tokenizer' +import TokenColumn from '../common/text/column/token' +import * as Schema from './schema' +import Result from '../result' +import Computation from 'mol-util/computation' + +interface State { + tokenizer: Tokenizer, + molecule: Schema.Molecule, + chunker: Computation.Chunker +} + + + +function createEmptyMolecule(): Schema.Molecule { + return { + mol_name: '', + num_atoms: 0, + num_bonds: 0, + num_subst: 0, + num_feat: 0, + num_sets: 0, + mol_type: '', + charge_type: '', + status_bits:'', + mol_comment: '' + }; +} + + + + +function State(tokenizer: Tokenizer, ctx: Computation.Context): State { + return { + tokenizer, + molecule: createEmptyMolecule(), + chunker: Computation.chunker(ctx, 100000) + }; +} + + + + + +function handleMolecule(state: State) { + const { tokenizer, molecule } = state; + Tokenizer.markLine(tokenizer); + Tokenizer.markLine(tokenizer); + let name = Tokenizer.getTokenString(tokenizer); + molecule.mol_name = name; + + Tokenizer.markLine(tokenizer); + const values = Tokenizer.getTokenString(tokenizer).trim().split(/\s+/g); + molecule.num_atoms = parseInt(values[0]) ? parseInt(values[1]) : 0; + molecule.num_bonds = parseInt(values[1]) ? parseInt(values[1]) : 0; + molecule.num_subst = parseInt(values[2]) ? parseInt(values[1]) : 0; + molecule.num_feat = parseInt(values[3]) ? parseInt(values[1]) : 0; + molecule.num_sets = parseInt(values[4]) ? parseInt(values[1]) : 0; + + Tokenizer.markLine(tokenizer); + molecule.mol_type = Tokenizer.getTokenString(tokenizer); + + Tokenizer.markLine(tokenizer); + molecule.charge_type = Tokenizer.getTokenString(tokenizer); + + Tokenizer.markLine(tokenizer); + if(Tokenizer.getTokenString(tokenizer) == ''){return} + else{molecule.status_bits = Tokenizer.getTokenString(tokenizer)} + + + Tokenizer.markLine(tokenizer); + if(Tokenizer.getTokenString(tokenizer) == ''){return} + else{molecule.mol_comment = Tokenizer.getTokenString(tokenizer)} +} + + +function isStatus_bit(aString: String): Boolean{ + if(aString.includes('DSPMOD') || aString.includes('TYPECOL') || aString.includes('CAP') + || aString.includes('BACKBONE') || aString.includes('DICT') || aString.includes('ESSENTIAL') + || aString.includes('WATER') || aString.includes('DIRECT')){ + return true; + } + return false; +} + + +async function handleAtoms(state: State): Promise<Schema.Atoms> { + const { tokenizer, molecule } = state; + let hasSubst_id = false; + let hasSubst_name = false; + let hasCharge = false; + let hasStatus_bit = false; + + // skip empty lines and '@<TRIPOS>ATOM' + while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>ATOM'){ + Tokenizer.markLine(tokenizer); + } + + const initialTokenizerPosition = tokenizer.position; + const initialTokenizerLineNumber = tokenizer.lineNumber; + const firstLine = Tokenizer.readLine(tokenizer); + const firstLineArray = firstLine.trim().split(/\s+/g) + const firstLineLength = firstLineArray.length; + + + // optionals are in order "integer string float string". Use this to find out which column is missing or empty + for(let i = 6; i < firstLineLength; i++){ + if(!isNaN(Number(firstLineArray[i]))){ + if(firstLineArray[i].indexOf('.') == -1){ + hasSubst_id = true; + }else{ + hasCharge = true; + } + }else if(isNaN(Number(firstLineArray[i]))){ + if(!isStatus_bit(firstLineArray[i])){ + hasSubst_name = true; + }else{ + hasStatus_bit = true; + } + } + } + + const atom_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const atom_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2);; + const xTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const yTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const zTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const atom_typeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + // optionals + const subst_idTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const subst_nameTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const chargeTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_atoms * 2); + + + const atom_idTokenColumn = TokenColumn(atom_idTokens); + const atom_nameTokenColumn = TokenColumn(atom_nameTokens); + const xTokenColumn = TokenColumn(xTokens); + const yTokenColumn = TokenColumn(yTokens); + const zTokenColumn = TokenColumn(zTokens); + const atom_typeColumn = TokenColumn(atom_typeTokens); + // optionals + const subst_idTokenColumn = TokenColumn(subst_idTokens); + const subst_nameTokenColumn = TokenColumn(subst_nameTokens); + const chargeTokenColumn = TokenColumn(chargeTokens); + const status_bitTokenColumn = TokenColumn(status_bitTokens); + + + const undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float); + const undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int); + const undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str); + + let numOfColumn = 5; + if(hasSubst_id){numOfColumn++} + if(hasSubst_name){numOfColumn++} + if(hasCharge){numOfColumn++} + if(hasStatus_bit){numOfColumn++} + + tokenizer.position = initialTokenizerPosition; + tokenizer.lineNumber = initialTokenizerLineNumber; + + + + + const { length } = tokenizer; + let linesAlreadyRead = 0; + await state.chunker.process(chunkSize => { + const linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize); + for(let i = 0; i < linesToRead; i++){ + let subst_idWritten = false; + let subst_nameWritten = false; + let chargeWritten = false; + let status_bitWritten = false; + for(let j = 0; j < numOfColumn; j++){ + Tokenizer.skipWhitespace(tokenizer); + Tokenizer.eatValue(tokenizer); + switch(j){ + case 0: + TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 1: + TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 2: + TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 3: + TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 4: + TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 5: + TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + default: + if(hasSubst_id == true && subst_idWritten == false){ + TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + subst_idWritten = true; + }else if(hasSubst_name == true && subst_nameWritten == false){ + TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + subst_nameWritten = true; + }else if(hasCharge == true && chargeWritten == false){ + TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + chargeWritten = true; + }else if(hasStatus_bit == true && status_bitWritten == false){ + TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + status_bitWritten = true; + } + } + } + } + linesAlreadyRead += linesToRead; + return linesToRead; + }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length })); + + + + const ret = { + count: molecule.num_atoms, + atom_id: atom_idTokenColumn(Column.Schema.int), + atom_name: atom_nameTokenColumn(Column.Schema.str), + x: xTokenColumn(Column.Schema.float), + y: yTokenColumn(Column.Schema.float), + z: zTokenColumn(Column.Schema.float), + atom_type: atom_typeColumn(Column.Schema.str), + // optional properties + subst_id: hasSubst_id ? subst_idTokenColumn(Column.Schema.int) : undefInt, + subst_name: hasSubst_name ? subst_nameTokenColumn(Column.Schema.str) : undefStr, + charge: hasCharge ? chargeTokenColumn(Column.Schema.float) : undefFloat, + status_bit: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, + + }; + return ret; +} + + +async function handleBonds(state: State): Promise<Schema.Bonds> { + const { tokenizer, molecule } = state; + let hasStatus_bit = false; + + while(Tokenizer.getTokenString(tokenizer) != '@<TRIPOS>BOND'){ + Tokenizer.markLine(tokenizer); + } + + const initialTokenizerPosition = tokenizer.position; + const initialTokenizerLineNumber = tokenizer.lineNumber; + const firstLine = Tokenizer.readLine(tokenizer); + const firstLineArray = firstLine.trim().split(/\s+/g) + const firstLineLength = firstLineArray.length; + if(firstLineLength == 5){ + hasStatus_bit = true; + } + + const bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const origin_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const target_bond_idTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + const bondTypeTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + // optional + const status_bitTokens = TokenBuilder.create(tokenizer, molecule.num_bonds * 2); + + const bond_idTokenColumn = TokenColumn(bond_idTokens); + const origin_bond_idTokenColumn = TokenColumn(origin_bond_idTokens); + const target_bond_idTokenColumn = TokenColumn(target_bond_idTokens); + const bondTypeTokenColumn = TokenColumn(bondTypeTokens); + // optional + const status_bitTokenColumn = TokenColumn(status_bitTokens); + + const undefStr = Column.Undefined(molecule.num_bonds, Column.Schema.str); + + let numberOfColumn = 4; + if(hasStatus_bit){numberOfColumn++} + + tokenizer.position = initialTokenizerPosition; + tokenizer.lineNumber = initialTokenizerLineNumber; + + const { length } = tokenizer; + let linesAlreadyRead = 0; + await state.chunker.process(chunkSize => { + const linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize); + for(let i = 0; i < linesToRead; i++){ + for(let j = 0; j < numberOfColumn; j++){ + Tokenizer.skipWhitespace(tokenizer); + Tokenizer.eatValue(tokenizer); + switch(j){ + case 0: + TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 1: + TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 2: + TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + case 3: + TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + default: + TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); + break; + } + } + } + linesAlreadyRead += linesToRead; + return linesToRead; + }, update => update({ message: 'Parsing...', current: tokenizer.position, max: length })); + + + + const ret = { + count: molecule.num_bonds, + bond_id: bond_idTokenColumn(Column.Schema.int), + origin_atom_id: origin_bond_idTokenColumn(Column.Schema.int), + target_atom_id: target_bond_idTokenColumn(Column.Schema.int), + bond_type: bondTypeTokenColumn(Column.Schema.str), + status_bits: hasStatus_bit ? status_bitTokenColumn(Column.Schema.str) : undefStr, + }; + + return ret; +} + + + + +async function parseInternal(data: string, ctx: Computation.Context): Promise<Result<Schema.File>> { + const tokenizer = Tokenizer(data); + + ctx.update({ message: 'Parsing...', current: 0, max: data.length }); + const structures: Schema.Structure[] = []; + while (tokenizer.position < data.length) { + const state = State(tokenizer, ctx); + handleMolecule(state); + const atoms = await handleAtoms(state); + const bonds = await handleBonds(state); + structures.push({ molecule: state.molecule, atoms, bonds }); + } + + const result: Schema.File = { structures }; + return Result.success(result); +} + + + + + +export function parse(data: string) { + return Computation.create<Result<Schema.File>>(async ctx => { + return await parseInternal(data, ctx); + }); +} + +export default parse; \ No newline at end of file