diff --git a/src/mol-util/_spec/zip.spec.ts b/src/mol-util/_spec/zip.spec.ts new file mode 100644 index 0000000000000000000000000000000000000000..2e17ab40f8b97ad8567b947cb80855efcc3f6482 --- /dev/null +++ b/src/mol-util/_spec/zip.spec.ts @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + */ + +import { deflate, inflate, parse, encode } from '../zip/zip' + +describe('zip', () => { + it('roundtrip deflate/inflate', () => { + const data = new Uint8Array([1, 2, 3, 4, 5, 6, 7]) + const deflated = deflate(data) + console.log(deflated) + const inflated = inflate(deflated) + console.log(inflated) + }) + + it('roundtrip zip', () => { + const zipped = encode({ + 'test.foo': new Uint8Array([1, 2, 3, 4, 5, 6, 7]) + }) + console.log(zipped) + const unzipped = parse(zipped) + console.log(unzipped) + }) +}) \ No newline at end of file diff --git a/src/mol-util/zip/bin.ts b/src/mol-util/zip/bin.ts new file mode 100644 index 0000000000000000000000000000000000000000..c3a41a16939fa40855fbec64b75657378a99c61f --- /dev/null +++ b/src/mol-util/zip/bin.ts @@ -0,0 +1,101 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +export function toUint32(x: number) { + return x >>> 0 +} +export function toInt32(x: number) { + return x >> 0 +} + +export function readUshort(buff: Uint8Array, p: number) { + return (buff[p]) | (buff[p+1]<<8); +} + +export function writeUshort(buff: Uint8Array, p: number, n: number) { + buff[p] = (n)&255; buff[p+1] = (n>>8)&255; +} + +export function readUint(buff: Uint8Array, p: number) { + return (buff[p+3]*(256*256*256)) + ((buff[p+2]<<16) | (buff[p+1]<< 8) | buff[p]); +} + +export function writeUint(buff: Uint8Array, p: number, n: number) { + buff[p] = n&255; + buff[p+1] = (n>>8)&255; + buff[p+2] = (n>>16)&255; + buff[p+3] = (n>>24)&255; +} + +function readASCII(buff: Uint8Array, p: number, l: number){ + let s = ''; + for(let i=0; i<l; i++) s += String.fromCharCode(buff[p+i]); + return s; +} + +// function writeASCII(data: Uint8Array, p: number, s: string){ +// for(let i=0; i<s.length; i++) data[p+i] = s.charCodeAt(i); +// } + +function pad(n: string) { + return n.length < 2 ? '0' + n : n; +} + +export function readUTF8(buff: Uint8Array, p: number, l: number) { + let s = '', ns; + for(let i = 0; i<l; i++) s += '%' + pad(buff[p+i].toString(16)); + try { + ns = decodeURIComponent(s); + } catch(e) { + return readASCII(buff, p, l); + } + return ns; +} + +export function writeUTF8(buff: Uint8Array, p: number, str: string) { + const strl = str.length + let i = 0; + for(let ci = 0; ci<strl; ci++) { + const code = str.charCodeAt(ci); + if((code&(0xffffffff-(1<< 7)+1)) === 0) { + buff[p+i] = ( code ); + i++; + } else if((code&(0xffffffff-(1<<11)+1)) === 0) { + buff[p+i] = (192|(code>> 6)); + buff[p+i+1] = (128|((code>> 0)&63)); + i+=2; + } else if((code&(0xffffffff-(1<<16)+1)) === 0) { + buff[p+i] = (224|(code>>12)); + buff[p+i+1] = (128|((code>> 6)&63)); + buff[p+i+2] = (128|((code>>0)&63)); + i+=3; + } else if((code&(0xffffffff-(1<<21)+1)) === 0) { + buff[p+i] = (240|(code>>18)); + buff[p+i+1] = (128|((code>>12)&63)); + buff[p+i+2] = (128|((code>>6)&63)); + buff[p+i+3] = (128|((code>>0)&63)); + i+=4; + } else throw 'e'; + } + return i; +} + +export function sizeUTF8(str: string) { + const strl = str.length + let i = 0; + for(let ci = 0; ci < strl; ci++) { + const code = str.charCodeAt(ci); + if ((code&(0xffffffff-(1<< 7)+1)) === 0) { i++ ; } + else if((code&(0xffffffff-(1<<11)+1)) === 0) { i+=2; } + else if((code&(0xffffffff-(1<<16)+1)) === 0) { i+=3; } + else if((code&(0xffffffff-(1<<21)+1)) === 0) { i+=4; } + else throw 'e'; + } + return i; +} diff --git a/src/mol-util/zip/checksum.ts b/src/mol-util/zip/checksum.ts new file mode 100644 index 0000000000000000000000000000000000000000..aa9a9dcaacb59587710a7d5f5ef7e04741722e1e --- /dev/null +++ b/src/mol-util/zip/checksum.ts @@ -0,0 +1,48 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +const CrcTable = (function() { + const tab = new Uint32Array(256); + for (let n=0; n<256; n++) { + let c = n; + for (let k=0; k<8; k++) { + if (c & 1) c = 0xedb88320 ^ (c >>> 1); + else c = c >>> 1; + } + tab[n] = c; + } + return tab; +})() + +function _crc(c: number, buf: Uint8Array, off: number, len: number) { + for (let i = 0; i<len; i++) { + c = CrcTable[(c ^ buf[off+i]) & 0xff] ^ (c >>> 8); + } + return c; +} + +export function crc(b: Uint8Array, o: number, l: number) { + return _crc(0xffffffff, b, o, l) ^ 0xffffffff; +} + +export function adler(data: Uint8Array, o: number, len: number) { + let a = 1, b = 0; + let off = o + const end = o+len; + while(off < end) { + const eend = Math.min(off + 5552, end); + while(off < eend) { + a += data[off++]; + b += a; + } + a = a % 65521; + b = b % 65521; + } + return (b << 16) | a; +} \ No newline at end of file diff --git a/src/mol-util/zip/deflate.ts b/src/mol-util/zip/deflate.ts new file mode 100644 index 0000000000000000000000000000000000000000..464805474b8c7a6d1539fdb44e2e857576e83b43 --- /dev/null +++ b/src/mol-util/zip/deflate.ts @@ -0,0 +1,339 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +import { NumberArray } from '../type-helpers'; +import { _hufTree } from './huffman'; +import { U, revCodes, makeCodes } from './util'; + +export function _deflateRaw(data: Uint8Array, out: Uint8Array, opos: number, lvl: number) { + const opts = [ + /* + ush good_length; /* reduce lazy search above this match length + ush max_lazy; /* do not perform lazy search above this match length + ush nice_length; /* quit search above this match length + */ + /* good lazy nice chain */ + /* 0 */ [ 0, 0, 0, 0, 0], /* store only */ + /* 1 */ [ 4, 4, 8, 4, 0], /* max speed, no lazy matches */ + /* 2 */ [ 4, 5, 16, 8, 0], + /* 3 */ [ 4, 6, 16, 16, 0], + + /* 4 */ [ 4, 10, 16, 32, 0], /* lazy matches */ + /* 5 */ [ 8, 16, 32, 32, 0], + /* 6 */ [ 8, 16, 128, 128, 0], + /* 7 */ [ 8, 32, 128, 256, 0], + /* 8 */ [32, 128, 258, 1024, 1], + /* 9 */ [32, 258, 258, 4096, 1] /* max compression */ + ] + + const opt = opts[lvl]; + + let i = 0, pos = opos<<3, cvrd = 0 + const dlen = data.length; + + if(lvl === 0) { + while(i < dlen) { + const len = Math.min(0xffff, dlen-i); + _putsE(out, pos, (i + len === dlen ? 1 : 0)); + pos = _copyExact(data, i, len, out, pos + 8); + i += len; + } + return pos>>>3; + } + + const { lits, strt, prev } = U + let li = 0, lc = 0, bs = 0, ebits = 0, c = 0, nc = 0; // last_item, literal_count, block_start + if(dlen > 2) { + nc=_hash(data,0); + strt[nc]=0; + } + + // let nmch = 0 + // let nmci = 0 + + for(i = 0; i<dlen; i++) { + c = nc; + //* + if(i + 1 < dlen - 2) { + nc = _hash(data, i+1); + const ii = ((i+1)&0x7fff); + prev[ii] = strt[nc]; + strt[nc] = ii; + } // */ + if(cvrd <= i) { + if((li > 14000 || lc > 26697) && (dlen - i) > 100) { + if(cvrd < i) { + lits[li] = i - cvrd; + li += 2; + cvrd = i; + } + pos = _writeBlock(((i === dlen - 1) || (cvrd === dlen)) ? 1 : 0, lits, li, ebits, data, bs, i - bs, out, pos); + li = lc = ebits = 0; + bs = i; + } + + let mch = 0; + // if(nmci==i) mch= nmch; else + if(i < dlen-2) { + mch = _bestMatch(data, i, prev, c, Math.min(opt[2],dlen-i), opt[3]); + } + /* + if(mch!=0 && opt[4]==1 && (mch>>>16)<opt[1] && i+1<dlen-2) { + nmch = UZIP.F._bestMatch(data, i+1, prev, nc, opt[2], opt[3]); nmci=i+1; + //var mch2 = UZIP.F._bestMatch(data, i+2, prev, nnc); //nmci=i+1; + if((nmch>>>16)>(mch>>>16)) mch=0; + }//*/ + // const len = mch>>>16, dst = mch & 0xffff; // if(i-dst<0) throw "e"; + if(mch !== 0) { + const len = mch>>>16, dst = mch&0xffff; // if(i-dst<0) throw "e"; + const lgi = _goodIndex(len, U.of0); U.lhst[257+lgi]++; + const dgi = _goodIndex(dst, U.df0); U.dhst[ dgi]++; ebits += U.exb[lgi] + U.dxb[dgi]; + lits[li] = (len<<23)|(i-cvrd); lits[li+1] = (dst<<16)|(lgi<<8)|dgi; li+=2; + cvrd = i + len; + } + else { U.lhst[data[i]]++; } + lc++; + } + } + if(bs !== i || data.length === 0) { + if(cvrd < i) { + lits[li]=i-cvrd; + li+=2; + cvrd=i; + } + pos = _writeBlock(1, lits, li, ebits, data, bs, i-bs, out, pos); + li=0; + lc=0; + li = lc = ebits=0; + bs=i; + } + while((pos&7) !== 0) pos++; + return pos>>>3; +} + +function _bestMatch(data: Uint8Array, i: number, prev: Uint16Array, c: number, nice: number, chain: number) { + let ci = (i&0x7fff), pi=prev[ci]; + // console.log("----", i); + let dif = ((ci-pi + (1<<15)) & 0x7fff); + if(pi === ci || c !== _hash(data,i-dif)) return 0; + let tl=0, td=0; // top length, top distance + const dlim = Math.min(0x7fff, i); + while(dif<=dlim && --chain !== 0 && pi !== ci /* && c==UZIP.F._hash(data,i-dif)*/) { + if(tl === 0 || (data[i+tl] === data[i+tl-dif])) { + let cl = _howLong(data, i, dif); + if(cl>tl) { + tl=cl; td=dif; if(tl>=nice) break; //* + if(dif+2<cl) cl = dif+2; + let maxd = 0; // pi does not point to the start of the word + for(let j=0; j<cl-2; j++) { + const ei = (i-dif+j+ (1<<15)) & 0x7fff; + const li = prev[ei]; + const curd = (ei-li + (1<<15)) & 0x7fff; + if(curd>maxd) { maxd=curd; pi = ei; } + } + } + } + + ci=pi; pi = prev[ci]; + dif += ((ci-pi + (1<<15)) & 0x7fff); + } + return (tl<<16)|td; +} + +function _howLong(data: Uint8Array, i: number, dif: number) { + if(data[i] !== data[i-dif] || data[i+1] !== data[i+1-dif] || data[i+2] !== data[i+2-dif]) return 0; + const oi=i, l = Math.min(data.length, i+258); + i+=3; + // while(i+4<l && data[i]==data[i-dif] && data[i+1]==data[i+1-dif] && data[i+2]==data[i+2-dif] && data[i+3]==data[i+3-dif]) i+=4; + while(i<l && data[i] === data[i-dif]) i++; + return i-oi; +} + +function _hash(data: Uint8Array, i: number) { + return (((data[i]<<8) | data[i+1])+(data[i+2]<<4))&0xffff; + // var hash_shift = 0, hash_mask = 255; + // var h = data[i+1] % 251; + // h = (((h << 8) + data[i+2]) % 251); + // h = (((h << 8) + data[i+2]) % 251); + // h = ((h<<hash_shift) ^ (c) ) & hash_mask; + // return h | (data[i]<<8); + // return (data[i] | (data[i+1]<<8)); +} + +function _writeBlock(BFINAL: number, lits: Uint32Array, li: number, ebits: number, data: Uint8Array, o0: number, l0: number, out: Uint8Array, pos: number) { + U.lhst[256]++; + const [ ML, MD, MH, numl, numd, numh, lset, dset ] = getTrees() + + const cstSize = (((pos+3)&7) === 0 ? 0 : 8-((pos+3)&7)) + 32 + (l0<<3); + const fxdSize = ebits + contSize(U.fltree, U.lhst) + contSize(U.fdtree, U.dhst); + let dynSize = ebits + contSize(U.ltree , U.lhst) + contSize(U.dtree , U.dhst); + dynSize += 14 + 3 * numh + contSize(U.itree, U.ihst) + (U.ihst[16]*2 + U.ihst[17]*3 + U.ihst[18]*7); + + for(let j=0; j<286; j++) U.lhst[j]=0; + for(let j=0; j<30; j++) U.dhst[j]=0; + for(let j=0; j<19; j++) U.ihst[j]=0; + + const BTYPE = (cstSize<fxdSize && cstSize<dynSize) ? 0 : ( fxdSize<dynSize ? 1 : 2 ); + _putsF(out, pos, BFINAL); + _putsF(out, pos+1, BTYPE); + pos+=3; + + // let opos = pos; + if(BTYPE === 0) { + while((pos&7) !== 0) pos++; + pos = _copyExact(data, o0, l0, out, pos); + } else { + let ltree: number[], dtree: number[]; + if(BTYPE === 1) { + ltree=U.fltree; dtree=U.fdtree; + } else if(BTYPE === 2) { + makeCodes(U.ltree, ML); revCodes(U.ltree, ML); + makeCodes(U.dtree, MD); revCodes(U.dtree, MD); + makeCodes(U.itree, MH); revCodes(U.itree, MH); + + ltree = U.ltree; dtree = U.dtree; + + _putsE(out, pos,numl-257); pos+=5; // 286 + _putsE(out, pos,numd- 1); pos+=5; // 30 + _putsE(out, pos,numh- 4); pos+=4; // 19 + + for(let i=0; i<numh; i++) _putsE(out, pos+i*3, U.itree[(U.ordr[i]<<1)+1]); + pos+=3* numh; + pos = _codeTiny(lset, U.itree, out, pos); + pos = _codeTiny(dset, U.itree, out, pos); + } else { + throw new Error(`unknown BTYPE ${BTYPE}`) + } + + let off=o0; + for(let si=0; si<li; si+=2) { + const qb=lits[si], len=(qb>>>23), end = off+(qb&((1<<23)-1)); + while(off<end) pos = _writeLit(data[off++], ltree, out, pos); + + if(len !== 0) { + const qc = lits[si+1], dst=(qc>>16), lgi=(qc>>8)&255, dgi=(qc&255); + pos = _writeLit(257+lgi, ltree, out, pos); + _putsE(out, pos, len-U.of0[lgi]); pos+=U.exb[lgi]; + + pos = _writeLit(dgi, dtree, out, pos); + _putsF(out, pos, dst-U.df0[dgi]); pos+=U.dxb[dgi]; off+=len; + } + } + pos = _writeLit(256, ltree, out, pos); + } + // console.log(pos-opos, fxdSize, dynSize, cstSize); + return pos; +} + +function _copyExact(data: Uint8Array, off: number, len: number, out: Uint8Array, pos: number) { + let p8 = (pos>>>3); + out[p8]=(len); + out[p8+1]=(len>>>8); + out[p8+2]=255-out[p8]; + out[p8+3]=255-out[p8+1]; + p8+=4; + out.set(new Uint8Array(data.buffer, off, len), p8); + // for(var i=0; i<len; i++) out[p8+i]=data[off+i]; + return pos + ((len+4)<<3); +} + + +/* + Interesting facts: + - decompressed block can have bytes, which do not occur in a Huffman tree (copied from the previous block by reference) +*/ + +function getTrees() { + const ML = _hufTree(U.lhst, U.ltree, 15); + const MD = _hufTree(U.dhst, U.dtree, 15); + const lset: number[] = [] + const numl = _lenCodes(U.ltree, lset); + const dset: number[] = [] + const numd = _lenCodes(U.dtree, dset); + for(let i=0; i<lset.length; i+=2) U.ihst[lset[i]]++; + for(let i=0; i<dset.length; i+=2) U.ihst[dset[i]]++; + const MH = _hufTree(U.ihst, U.itree, 7); + let numh = 19; + while(numh>4 && U.itree[(U.ordr[numh-1]<<1)+1] === 0) numh--; + return [ML, MD, MH, numl, numd, numh, lset, dset] as const; +} + +function contSize(tree: number[], hst: NumberArray) { + let s=0; + for(let i=0; i<hst.length; i++) s+= hst[i]*tree[(i<<1)+1]; + return s; +} + +function _codeTiny(set: number[], tree: number[], out: Uint8Array, pos: number) { + for(let i=0; i<set.length; i+=2) { + const l = set[i], rst = set[i+1]; // console.log(l, pos, tree[(l<<1)+1]); + pos = _writeLit(l, tree, out, pos); + const rsl = l === 16 ? 2 : (l === 17 ? 3 : 7); + if(l>15) { + _putsE(out, pos, rst); + pos+=rsl; + } + } + return pos; +} + + +function _lenCodes(tree: number[], set: number[]) { + let len = tree.length; + while(len !== 2 && tree[len-1] === 0) len-=2; // when no distances, keep one code with length 0 + for(let i=0; i<len; i+=2) { + const l = tree[i+1], nxt = (i+3<len ? tree[i+3]:-1), nnxt = (i+5<len ? tree[i+5]:-1), prv = (i === 0 ? -1 : tree[i-1]); + if(l === 0 && nxt === l && nnxt === l) { + let lz = i+5; + while(lz+2<len && tree[lz+2] === l) lz+=2; + const zc = Math.min((lz+1-i)>>>1, 138); + if(zc<11) set.push(17, zc-3); + else set.push(18, zc-11); + i += zc*2-2; + } else if(l === prv && nxt === l && nnxt === l) { + let lz = i+5; + while(lz+2<len && tree[lz+2] === l) lz+=2; + const zc = Math.min((lz+1-i)>>>1, 6); + set.push(16, zc-3); + i += zc*2-2; + } + else set.push(l, 0); + } + return len>>>1; +} + +function _goodIndex(v: number, arr: number[]) { + let i=0; + if(arr[i|16]<=v) i|=16; + if(arr[i|8]<=v) i|=8; + if(arr[i|4]<=v) i|=4; + if(arr[i|2]<=v) i|=2; + if(arr[i|1]<=v) i|=1; + return i; +} + +function _writeLit(ch: number, ltree: number[], out: Uint8Array, pos: number) { + _putsF(out, pos, ltree[ch<<1]); + return pos+ltree[(ch<<1)+1]; +} + +function _putsE(dt: NumberArray, pos: number, val: number) { + val = val<<(pos&7); + const o=(pos>>>3); + dt[o]|=val; + dt[o+1]|=(val>>>8); +} + +function _putsF(dt: NumberArray, pos: number, val: number) { + val = val<<(pos&7); + const o=(pos>>>3); + dt[o]|=val; + dt[o+1]|=(val>>>8); + dt[o+2]|=(val>>>16); +} diff --git a/src/mol-util/zip/huffman.ts b/src/mol-util/zip/huffman.ts new file mode 100644 index 0000000000000000000000000000000000000000..1e2216034d2e5f6c22a2c9167ed4d7f3dff0d2c3 --- /dev/null +++ b/src/mol-util/zip/huffman.ts @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +import { NumberArray } from '../type-helpers'; + +export type HufTree = { + lit: number, + f: number, + l?: HufTree, + r?: HufTree, + d: number +} +export function _hufTree(hst: NumberArray, tree: number[], MAXL: number) { + const list: HufTree[] = [] + const hl = hst.length, tl=tree.length + for(let i=0; i<tl; i+=2) { + tree[i]=0; + tree[i+1]=0; + } + for(let i=0; i<hl; i++) if(hst[i] !== 0) list.push({ lit: i, f: hst[i], d: undefined as any }); + const end = list.length, l2=list.slice(0); + if(end === 0) return 0; // empty histogram (usually for dist) + if(end === 1) { + const lit = list[0].lit, l2 = lit === 0 ? 1 : 0; + tree[(lit<<1)+1]=1; + tree[(l2<<1)+1]=1; + return 1; + } + list.sort(function(a,b){return a.f - b.f;}); + let a=list[0], b=list[1], i0=0, i1=1, i2=2; + list[0] = { + lit: -1, + f: a.f + b.f, + l: a, + r: b, + d: 0 + }; + while(i1 !== end-1) { + if(i0 !== i1 && (i2 === end || list[i0].f<list[i2].f)) { + a=list[i0++]; + } else { + a=list[i2++]; + } + if(i0 !== i1 && (i2 === end || list[i0].f<list[i2].f)) { + b=list[i0++]; + } else { + b=list[i2++]; + } + list[i1++] = { + lit: -1, + f: a.f + b.f, + l: a, + r: b, + d: undefined as any + }; + } + let maxl = setDepth(list[i1-1], 0); + if(maxl > MAXL) { + restrictDepth(l2, MAXL, maxl); + maxl = MAXL; + } + for(let i=0; i<end; i++) tree[(l2[i].lit<<1)+1] = l2[i].d; + return maxl; +} + +function setDepth(t: HufTree, d: number): number { + if(t.lit !== -1) { + t.d = d; + return d; + } + return Math.max(setDepth(t.l!, d+1), setDepth(t.r!, d+1)); +} + +function restrictDepth(dps: HufTree[], MD: number, maxl: number) { + let i=0, bCost=1<<(maxl-MD), dbt=0; + dps.sort(function(a: HufTree, b: HufTree){return b.d === a.d ? a.f - b.f : b.d - a.d;}); + + for(i=0; i<dps.length; i++) { + if(dps[i].d>MD) { + const od=dps[i].d; + dps[i].d=MD; + dbt+=bCost-(1<<(maxl-od)); + } else { + break; + } + } + dbt = dbt>>>(maxl-MD); + while(dbt>0) { + const od=dps[i].d; + if(od<MD) { + dps[i].d++; + dbt-=(1<<(MD-od-1)); + } else { + i++; + } + } + for(; i>=0; i--) { + if(dps[i].d === MD && dbt<0) { + dps[i].d--; + dbt++; + } + } + if(dbt !== 0) console.log('debt left'); +} \ No newline at end of file diff --git a/src/mol-util/zip/inflate.ts b/src/mol-util/zip/inflate.ts new file mode 100644 index 0000000000000000000000000000000000000000..dc7132e24e7a707a296603e7226f79de1f0980b5 --- /dev/null +++ b/src/mol-util/zip/inflate.ts @@ -0,0 +1,189 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +import { NumberArray } from '../type-helpers'; +import { U, makeCodes, codes2map } from './util'; + +export function _inflate(data: Uint8Array, buf?: Uint8Array) { + if(data[0] === 3 && data[1] === 0) return (buf ? buf : new Uint8Array(0)); + // var F=UZIP.F, bitsF = F._bitsF, bitsE = F._bitsE, decodeTiny = F._decodeTiny, makeCodes = F.makeCodes, codes2map=F.codes2map, get17 = F._get17; + // var U = F.U; + + const noBuf = buf === undefined; + if(buf === undefined) buf = new Uint8Array((data.length>>>2)<<3); + + let BFINAL=0, BTYPE=0, HLIT=0, HDIST=0, HCLEN=0, ML=0, MD=0; + let off = 0, pos = 0; + let lmap, dmap; + + while(BFINAL === 0) { + BFINAL = _bitsF(data, pos , 1); + BTYPE = _bitsF(data, pos+1, 2); + pos+=3; + + if(BTYPE === 0) { + if((pos&7) !== 0) pos+=8-(pos&7); + const p8 = (pos>>>3)+4, len = data[p8-4]|(data[p8-3]<<8); // console.log(len);//bitsF(data, pos, 16), + if(noBuf) buf=_check(buf, off+len); + buf.set(new Uint8Array(data.buffer, data.byteOffset+p8, len), off); + // for(var i=0; i<len; i++) buf[off+i] = data[p8+i]; + // for(var i=0; i<len; i++) if(buf[off+i] != data[p8+i]) throw "e"; + pos = ((p8+len)<<3); off+=len; continue; + } + if(noBuf) buf=_check(buf, off+(1<<17)); // really not enough in many cases (but PNG and ZIP provide buffer in advance) + if(BTYPE === 1) { + lmap = U.flmap; + dmap = U.fdmap; + ML = (1<<9)-1; + MD = (1<<5)-1; + } else if(BTYPE === 2) { + HLIT = _bitsE(data, pos , 5)+257; + HDIST = _bitsE(data, pos+ 5, 5)+ 1; + HCLEN = _bitsE(data, pos+10, 4)+ 4; pos+=14; + // const ppos = pos; + for(let i=0; i<38; i+=2) { + U.itree[i]=0; + U.itree[i+1]=0; + } + let tl = 1; + for(let i=0; i<HCLEN; i++) { + const l=_bitsE(data, pos+i*3, 3); + U.itree[(U.ordr[i]<<1)+1] = l; + if(l>tl) tl = l; + } + pos+=3*HCLEN; // console.log(itree); + makeCodes(U.itree, tl); + codes2map(U.itree, tl, U.imap); + + lmap = U.lmap; dmap = U.dmap; + + pos = _decodeTiny(U.imap, (1<<tl)-1, HLIT+HDIST, data, pos, U.ttree); + const mx0 = _copyOut(U.ttree, 0, HLIT , U.ltree); ML = (1<<mx0)-1; + const mx1 = _copyOut(U.ttree, HLIT, HDIST, U.dtree); MD = (1<<mx1)-1; + + // var ml = decodeTiny(U.imap, (1<<tl)-1, HLIT , data, pos, U.ltree); ML = (1<<(ml>>>24))-1; pos+=(ml&0xffffff); + makeCodes(U.ltree, mx0); + codes2map(U.ltree, mx0, lmap); + + // var md = decodeTiny(U.imap, (1<<tl)-1, HDIST, data, pos, U.dtree); MD = (1<<(md>>>24))-1; pos+=(md&0xffffff); + makeCodes(U.dtree, mx1); + codes2map(U.dtree, mx1, dmap); + } else { + throw new Error(`unknown BTYPE ${BTYPE}`) + } + + // var ooff=off, opos=pos; + while(true) { + const code = lmap[_get17(data, pos) & ML]; + pos += code&15; + const lit = code >>> 4; // U.lhst[lit]++; + if((lit >>> 8) === 0) { + buf[off++] = lit; + } else if(lit === 256) { + break; + } else { + let end = off+lit-254; + if(lit > 264) { + const ebs = U.ldef[lit-257]; + end = off + (ebs>>>3) + _bitsE(data, pos, ebs&7); + pos += ebs&7; + } + // UZIP.F.dst[end-off]++; + + const dcode = dmap[_get17(data, pos) & MD]; pos += dcode&15; + const dlit = dcode>>>4; + const dbs = U.ddef[dlit], dst = (dbs>>>4) + _bitsF(data, pos, dbs&15); pos += dbs&15; + + // var o0 = off-dst, stp = Math.min(end-off, dst); + // if(stp>20) while(off<end) { buf.copyWithin(off, o0, o0+stp); off+=stp; } else + // if(end-dst<=off) buf.copyWithin(off, off-dst, end-dst); else + // if(dst==1) buf.fill(buf[off-1], off, end); else + if(noBuf) buf = _check(buf, off+(1<<17)); + while(off<end) { + buf[off]=buf[off++-dst]; + buf[off]=buf[off++-dst]; + buf[off]=buf[off++-dst]; + buf[off]=buf[off++-dst]; + } + off=end; + // while(off!=end) { buf[off]=buf[off++-dst]; } + } + } + // console.log(off-ooff, (pos-opos)>>>3); + } + // console.log(UZIP.F.dst); + // console.log(tlen, dlen, off-tlen+tcnt); + return buf.length === off ? buf : buf.slice(0, off); +} + +function _check(buf: Uint8Array, len: number) { + const bl = buf.length; + if(len <= bl) return buf; + const nbuf = new Uint8Array(Math.max(bl << 1, len)); + nbuf.set(buf, 0); + // for(var i=0; i<bl; i+=4) { nbuf[i]=buf[i]; nbuf[i+1]=buf[i+1]; nbuf[i+2]=buf[i+2]; nbuf[i+3]=buf[i+3]; } + return nbuf; +} + +function _decodeTiny(lmap: NumberArray, LL: number, len: number, data: Uint8Array, pos: number, tree: number[]) { + let i = 0; + while(i<len) { + const code = lmap[_get17(data, pos)&LL]; + pos += code&15; + const lit = code>>>4; + if(lit<=15) { + tree[i]=lit; + i++; + } else { + let ll = 0, n = 0; + if(lit === 16) { + n = (3 + _bitsE(data, pos, 2)); pos += 2; ll = tree[i-1]; + } else if(lit === 17) { + n = (3 + _bitsE(data, pos, 3)); pos += 3; + } else if(lit === 18) { + n = (11 + _bitsE(data, pos, 7)); pos += 7; + } + const ni = i+n; + while(i<ni) { + tree[i]=ll; + i++; + } + } + } + return pos; +} + +function _copyOut(src: number[], off: number, len: number, tree: number[]) { + let mx=0, i=0, tl=tree.length>>>1; + while(i<len) { + let v=src[i+off]; + tree[(i<<1)]=0; + tree[(i<<1)+1]=v; + if(v>mx)mx=v; + i++; + } + while(i<tl ) { + tree[(i<<1)]=0; + tree[(i<<1)+1]=0; + i++; + } + return mx; +} + +function _bitsE(dt: NumberArray, pos: number, length: number) { + return ((dt[pos>>>3] | (dt[(pos>>>3)+1]<<8))>>>(pos&7))&((1<<length)-1); +} + +function _bitsF(dt: NumberArray, pos: number, length: number) { + return ((dt[pos>>>3] | (dt[(pos>>>3)+1]<<8) | (dt[(pos>>>3)+2]<<16))>>>(pos&7))&((1<<length)-1); +} + +function _get17(dt: NumberArray, pos: number) { // return at least 17 meaningful bytes + return (dt[pos>>>3] | (dt[(pos>>>3)+1]<<8) | (dt[(pos>>>3)+2]<<16) )>>>(pos&7); +} \ No newline at end of file diff --git a/src/mol-util/zip/util.ts b/src/mol-util/zip/util.ts new file mode 100644 index 0000000000000000000000000000000000000000..16230de92e9be1dc76a4d09ebfc9c2c1364c184e --- /dev/null +++ b/src/mol-util/zip/util.ts @@ -0,0 +1,136 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + */ + +export const U = (function(){ + const u16=Uint16Array, u32=Uint32Array; + return { + next_code : new u16(16), + bl_count : new u16(16), + ordr : [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ], + of0 : [3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258,999,999,999], + exb : [0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0], + ldef : new u16(32), + df0 : [1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 65535, 65535], + dxb : [0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 0, 0], + ddef : new u32(32), + flmap: new u16( 512), fltree: [] as number[], + fdmap: new u16( 32), fdtree: [] as number[], + lmap : new u16(32768), ltree : [] as number[], ttree:[] as number[], + dmap : new u16(32768), dtree : [] as number[], + imap : new u16( 512), itree : [] as number[], + // rev9 : new u16( 512) + rev15: new u16(1<<15), + lhst : new u32(286), dhst : new u32( 30), ihst : new u32(19), + lits : new u32(15000), + strt : new u16(1<<16), + prev : new u16(1<<15) + }; +})(); + +(function(){ + const len = 1<<15; + for(let i=0; i<len; i++) { + let x = i; + x = (((x & 0xaaaaaaaa) >>> 1) | ((x & 0x55555555) << 1)); + x = (((x & 0xcccccccc) >>> 2) | ((x & 0x33333333) << 2)); + x = (((x & 0xf0f0f0f0) >>> 4) | ((x & 0x0f0f0f0f) << 4)); + x = (((x & 0xff00ff00) >>> 8) | ((x & 0x00ff00ff) << 8)); + U.rev15[i] = (((x >>> 16) | (x << 16)))>>>17; + } + + function pushV(tgt: number[], n: number, sv: number) { + while(n-- !== 0) tgt.push(0, sv); + } + + for(let i=0; i<32; i++) { + U.ldef[i] = (U.of0[i]<<3)|U.exb[i]; + U.ddef[i] = (U.df0[i]<<4)|U.dxb[i]; + } + + pushV(U.fltree, 144, 8); + pushV(U.fltree, 255-143, 9); + pushV(U.fltree, 279-255, 7); + pushV(U.fltree,287-279,8); + /* + var i = 0; + for(; i<=143; i++) U.fltree.push(0,8); + for(; i<=255; i++) U.fltree.push(0,9); + for(; i<=279; i++) U.fltree.push(0,7); + for(; i<=287; i++) U.fltree.push(0,8); + */ + makeCodes(U.fltree, 9); + codes2map(U.fltree, 9, U.flmap); + revCodes (U.fltree, 9) + + pushV(U.fdtree,32,5); + // for(i=0;i<32; i++) U.fdtree.push(0,5); + makeCodes(U.fdtree, 5); + codes2map(U.fdtree, 5, U.fdmap); + revCodes (U.fdtree, 5) + + pushV(U.itree,19,0); pushV(U.ltree,286,0); pushV(U.dtree,30,0); pushV(U.ttree,320,0); + /* + for(var i=0; i< 19; i++) U.itree.push(0,0); + for(var i=0; i<286; i++) U.ltree.push(0,0); + for(var i=0; i< 30; i++) U.dtree.push(0,0); + for(var i=0; i<320; i++) U.ttree.push(0,0); + */ +})(); + +export function codes2map(tree: number[], MAX_BITS: number, map: Uint16Array) { + const max_code = tree.length; + const r15 = U.rev15; + for(let i=0; i<max_code; i+=2) { + if(tree[i+1] !== 0) { + const lit = i>>1; + const cl = tree[i+1], val = (lit<<4)|cl; // : (0x8000 | (U.of0[lit-257]<<7) | (U.exb[lit-257]<<4) | cl); + const rest = (MAX_BITS-cl) + let i0 = tree[i]<<rest + const i1 = i0 + (1<<rest); + // tree[i]=r15[i0]>>>(15-MAX_BITS); + while(i0 !== i1) { + const p0 = r15[i0]>>>(15-MAX_BITS); + map[p0]=val; i0++; + } + } + } +} + +export function makeCodes(tree: number[], MAX_BITS: number) { // code, length + const max_code = tree.length; + + const bl_count = U.bl_count; + for(let i=0; i<=MAX_BITS; i++) bl_count[i]=0; + for(let i=1; i<max_code; i+=2) bl_count[tree[i]]++; + + const next_code = U.next_code; // smallest code for each length + + let code = 0; + bl_count[0] = 0; + for (let bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = code; + } + + for (let n = 0; n < max_code; n+=2) { + const len = tree[n+1]; + if (len !== 0) { + tree[n] = next_code[len]; + next_code[len]++; + } + } +} + +export function revCodes(tree: number[], MAX_BITS: number) { + const r15 = U.rev15, imb = 15-MAX_BITS; + for(let i=0; i<tree.length; i+=2) { + const i0 = (tree[i]<<(MAX_BITS-tree[i+1])); + tree[i] = r15[i0]>>>imb; + } +} \ No newline at end of file diff --git a/src/mol-util/zip/zip.ts b/src/mol-util/zip/zip.ts new file mode 100644 index 0000000000000000000000000000000000000000..04c1829a3d015d5e925e8079eb6b9c821b88984a --- /dev/null +++ b/src/mol-util/zip/zip.ts @@ -0,0 +1,281 @@ +/** + * Copyright (c) 2020 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author Alexander Rose <alexander.rose@weirdbyte.de> + * + * ported from https://github.com/photopea/UZIP.js/blob/master/UZIP.js + * MIT License, Copyright (c) 2018 Photopea + * + * - added `ungzip` + */ + +import { writeUint, writeUshort, sizeUTF8, writeUTF8, readUshort, readUint, readUTF8, toInt32 } from './bin'; +import { crc, adler } from './checksum'; +import { _inflate } from './inflate'; +import { _deflateRaw } from './deflate'; + +export function parse(buf: ArrayBuffer, onlyNames = false) { + const out: { [k: string]: Uint8Array | { size: number, csize: number } } = Object.create(null); + const data = new Uint8Array(buf); + let eocd = data.length-4; + + while(readUint(data, eocd) !== 0x06054b50) eocd--; + + let o = eocd; + o += 4; // sign = 0x06054b50 + o += 4; // disks = 0; + const cnu = readUshort(data, o); + o += 2; + // const cnt = readUshort(data, o); + o += 2; + + // const csize = readUint(data, o); + o+=4; + const coffs = readUint(data, o); o+=4; + + o = coffs; + for(let i = 0; i<cnu; i++) { + // const sign = readUint(data, o); + o += 4; + o += 4; // versions; + o += 4; // flag + compr + o += 4; // time + + // const crc32 = readUint(data, o); + o+=4; + const csize = readUint(data, o); + o+=4; + const usize = readUint(data, o); + o+=4; + + const nl = readUshort(data, o) + const el = readUshort(data, o+2) + const cl = readUshort(data, o+4); + o += 6; // name, extra, comment + o += 8; // disk, attribs + + const roff = readUint(data, o); o+=4; + o += nl + el + cl; + + _readLocal(data, roff, out, csize, usize, onlyNames); + } + // console.log(out); + return out; +} + +function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array | { size: number, csize: number } }, csize: number, usize: number, onlyNames: boolean) { + // const sign = readUint(data, o); + o+=4; + // const ver = readUshort(data, o); + o+=2; + // const gpflg = readUshort(data, o); + o+=2; + // if((gpflg&8)!=0) throw "unknown sizes"; + const cmpr = readUshort(data, o); + o+=2; + + // const time = readUint(data, o); + o+=4; + + // const crc32 = readUint(data, o); + o+=4; + // var csize = rUi(data, o); o+=4; + // var usize = rUi(data, o); o+=4; + o+=8; + + const nlen = readUshort(data, o); + o+=2; + const elen = readUshort(data, o); + o+=2; + + const name = readUTF8(data, o, nlen); + o += nlen; // console.log(name); + o += elen; + + // console.log(sign.toString(16), ver, gpflg, cmpr, crc32.toString(16), "csize, usize", csize, usize, nlen, elen, name, o); + if(onlyNames) { + out[name] = { size: usize, csize }; + return; + } + + const file = new Uint8Array(data.buffer, o); + if(cmpr === 0) { + out[name] = new Uint8Array(file.buffer.slice(o, o+csize)); + } else if(cmpr === 8) { + const buf = new Uint8Array(usize); + inflateRaw(file, buf); + // var nbuf = pako["inflateRaw"](file); + // if(usize>8514000) { + // //console.log(PUtils.readASCII(buf , 8514500, 500)); + // //console.log(PUtils.readASCII(nbuf, 8514500, 500)); + // } + // for(var i=0; i<buf.length; i++) if(buf[i]!=nbuf[i]) { console.log(buf.length, nbuf.length, usize, i); throw "e"; } + out[name] = buf; + } + else throw `unknown compression method: ${cmpr}`; +} + +export function inflateRaw(file: Uint8Array, buf?: Uint8Array) { + return _inflate(file, buf); +} + +export function inflate(file: Uint8Array, buf?: Uint8Array) { + // const CMF = file[0] + // const FLG = file[1] + // const CM = (CMF&15) + // const CINFO = (CMF>>>4); + // console.log(CM, CINFO,CMF,FLG); + return inflateRaw(new Uint8Array(file.buffer, file.byteOffset+2, file.length-6), buf); +} + +// https://tools.ietf.org/html/rfc1952 +export function ungzip(file: Uint8Array, buf?: Uint8Array) { + // const id1 = file[0] + // const id2 = file[1] + // const cm = file[2] + const flg = file[3] + // const mtime = readUint(file, 4) + // const xfl = file[8] + // const os = file[9] + + let o = 10 + if (flg & 4) { // FEXTRA + const xlen = readUshort(file, o) + // console.log('FEXTRA', xlen) + o += xlen + } + if (flg & 8) { // FNAME + let zero = o + while(file[zero] !== 0) ++zero + // const name = readUTF8(file, o, zero - o) + // console.log('FNAME', name, zero - o) + o = zero + 1 + } + if (flg & 16) { // FCOMMENT + let zero = o + while(file[zero] !== 0) ++zero + // const comment = readUTF8(file, o, zero - o) + // console.log('FCOMMENT', comment) + o = zero + 1 + } + + if (flg & 1) { // FHCRC + // const hcrc = readUshort(file, o) + // console.log('FHCRC', hcrc) + o += 2 + } + + const crc32 = toInt32(readUint(file, file.length - 8)) + const isize = readUint(file, file.length - 4) + if (buf === undefined) buf = new Uint8Array(isize) + + const blocks = new Uint8Array(file.buffer, file.byteOffset + o, file.length - o - 8) + const inflated = inflateRaw(blocks, buf); + const crcValue = crc(inflated, 0, inflated.length) + if (crc32 !== crcValue) { + console.error("ungzip: checksums don't match") + } + + return inflated +} + +export function deflate(data: Uint8Array, opts?: { level: number }/* , buf, off*/) { + if(opts === undefined) opts={ level: 6 }; + let off=0 + const buf = new Uint8Array(50 + Math.floor(data.length * 1.1)); + buf[off]=120; buf[off+1]=156; off+=2; + off = _deflateRaw(data, buf, off, opts.level); + const crcValue = adler(data, 0, data.length); + buf[off+0] = ((crcValue>>>24)&255); + buf[off+1] = ((crcValue>>>16)&255); + buf[off+2] = ((crcValue>>> 8)&255); + buf[off+3] = ((crcValue>>> 0)&255); + return new Uint8Array(buf.buffer, 0, off+4); +} + +function deflateRaw(data: Uint8Array, opts?: { level: number }) { + if(opts === undefined) opts = { level: 6 }; + const buf = new Uint8Array(50 + Math.floor(data.length * 1.1)); + const off = _deflateRaw(data, buf, 0, opts.level); + return new Uint8Array(buf.buffer, 0, off); +} + +export function encode(obj: { [k: string]: Uint8Array }, noCmpr = false) { + let tot = 0; + const zpd: { [k: string]: { cpr: boolean, usize: number, crc: number, file: Uint8Array } } = {}; + for(const p in obj) { + const cpr = !_noNeed(p) && !noCmpr, buf = obj[p] + const crcValue = crc(buf, 0, buf.length); + zpd[p] = { + cpr, + usize: buf.length, + crc: crcValue, + file: (cpr ? deflateRaw(buf) : buf) + }; + } + + for(const p in zpd) tot += zpd[p].file.length + 30 + 46 + 2 * sizeUTF8(p); + tot += 22; + + const data = new Uint8Array(tot) + let o = 0; + const fof = [] + + for(const p in zpd) { + const file = zpd[p]; fof.push(o); + o = _writeHeader(data, o, p, file, 0); + } + let i=0, ioff = o; + for(const p in zpd) { + const file = zpd[p]; + fof.push(o); + o = _writeHeader(data, o, p, file, 1, fof[i++]); + } + const csize = o-ioff; + + writeUint(data, o, 0x06054b50); o+=4; + o += 4; // disks + writeUshort(data, o, i); o += 2; + writeUshort(data, o, i); o += 2; // number of c d records + writeUint(data, o, csize); o += 4; + writeUint(data, o, ioff ); o += 4; + o += 2; + return data.buffer; +} + +// no need to compress .PNG, .ZIP, .JPEG .... +function _noNeed(fn: string) { + const ext = fn.split('.').pop()!.toLowerCase(); + return 'png,jpg,jpeg,zip'.indexOf(ext) !== -1; +} + +function _writeHeader(data: Uint8Array, o: number, p: string, obj: { cpr: boolean, usize: number, crc: number, file: Uint8Array }, t: number, roff = 0) { + const file = obj.file; + + writeUint(data, o, t === 0 ? 0x04034b50 : 0x02014b50); o+=4; // sign + if(t === 1) o+=2; // ver made by + writeUshort(data, o, 20); o+=2; // ver + writeUshort(data, o, 0); o+=2; // gflip + writeUshort(data, o, obj.cpr?8:0); o+=2; // cmpr + + writeUint(data, o, 0); o+=4; // time + writeUint(data, o, obj.crc); o+=4; // crc32 + writeUint(data, o, file.length); o+=4; // csize + writeUint(data, o, obj.usize); o+=4; // usize + + writeUshort(data, o, sizeUTF8(p)); o+=2; // nlen + writeUshort(data, o, 0); o+=2; // elen + + if(t === 1) { + o += 2; // comment length + o += 2; // disk number + o += 6; // attributes + writeUint(data, o, roff); o+=4; // usize + } + const nlen = writeUTF8(data, o, p); o+= nlen; + if(t === 0) { + data.set(file, o); + o += file.length; + } + return o; +} \ No newline at end of file