diff --git a/src/mol-util/_spec/zip.spec.ts b/src/mol-util/_spec/zip.spec.ts index 6696274ec0d503bc0c3a14735d31ebdeac321f06..2ef6901d415caa9319dc671e5059db909d55db26 100644 --- a/src/mol-util/_spec/zip.spec.ts +++ b/src/mol-util/_spec/zip.spec.ts @@ -5,22 +5,22 @@ */ import { deflate, inflate, unzip, zip } from '../zip/zip' +import { SyncRuntimeContext } from '../../mol-task/execution/synchronous' describe('zip', () => { - it('roundtrip deflate/inflate', () => { + it('roundtrip deflate/inflate', async () => { const data = new Uint8Array([1, 2, 3, 4, 5, 6, 7]) const deflated = deflate(data) - console.log(deflated) - const inflated = inflate(deflated) - console.log(inflated) + const inflated = await inflate(SyncRuntimeContext, deflated) + expect(inflated).toEqual(data) }) - it('roundtrip zip', () => { - const zipped = zip({ + it('roundtrip zip', async () => { + const data = { 'test.foo': new Uint8Array([1, 2, 3, 4, 5, 6, 7]) - }) - console.log(zipped) - const unzipped = unzip(zipped) - console.log(unzipped) + } + const zipped = zip(data) + const unzipped = await unzip(SyncRuntimeContext, zipped) + expect(unzipped).toEqual(data) }) }) \ No newline at end of file diff --git a/src/mol-util/data-source.ts b/src/mol-util/data-source.ts index 45d7a9dfd83f5f3df93d2cab8b701e8a6a8ada91..e45e3c2678ca3b2268b2117bcd01d11baaddf68b 100644 --- a/src/mol-util/data-source.ts +++ b/src/mol-util/data-source.ts @@ -119,19 +119,19 @@ function getCompression(name: string) { DataCompressionMethod.None } -function decompress(data: Uint8Array, compression: DataCompressionMethod): Uint8Array { +async function decompress(ctx: RuntimeContext, data: Uint8Array, compression: DataCompressionMethod): Promise<Uint8Array> { switch (compression) { case DataCompressionMethod.None: return data - case DataCompressionMethod.Gzip: return ungzip(data) + case DataCompressionMethod.Gzip: return ungzip(ctx, data) case DataCompressionMethod.Zip: - const parsed = unzip(data.buffer) + const parsed = await unzip(ctx, data.buffer) const names = Object.keys(parsed) if (names.length !== 1) throw new Error('can only decompress zip files with a single entry') return parsed[names[0]] as Uint8Array } } -function processFile<T extends DataType>(reader: FileReader, type: T, compression: DataCompressionMethod): DataResponse<T> { +async function processFile<T extends DataType>(ctx: RuntimeContext, reader: FileReader, type: T, compression: DataCompressionMethod): Promise<DataResponse<T>> { const { result } = reader let data = result instanceof ArrayBuffer ? new Uint8Array(result) : result @@ -139,9 +139,10 @@ function processFile<T extends DataType>(reader: FileReader, type: T, compressio if (compression !== DataCompressionMethod.None) { if (!(data instanceof Uint8Array)) throw new Error('need Uint8Array for decompression') - const decompressed = decompress(data, compression); + const decompressed = await decompress(ctx, data, compression); if (type === 'string') { - data = utf8Read(decompressed, 0, decompressed.length); + await ctx.update({ message: 'Decoding text...' }); + data = utf8Read(decompressed, 0, decompressed.length) } else { data = decompressed } @@ -176,8 +177,8 @@ function readFromFileInternal<T extends DataType>(file: File, type: T): Task<Dat await ctx.update({ message: 'Opening file...', canAbort: true }); const fileReader = await readData(ctx, 'Reading...', reader); - await ctx.update({ message: 'Parsing file...', canAbort: false }); - return processFile(fileReader, type, compression); + await ctx.update({ message: 'Processing file...', canAbort: false }); + return await processFile(ctx, fileReader, type, compression); } finally { reader = void 0; } diff --git a/src/mol-util/zip/inflate.ts b/src/mol-util/zip/inflate.ts index dc7132e24e7a707a296603e7226f79de1f0980b5..8b6208810b44fbe29c57ac01c98a122932cd47e3 100644 --- a/src/mol-util/zip/inflate.ts +++ b/src/mol-util/zip/inflate.ts @@ -9,44 +9,65 @@ import { NumberArray } from '../type-helpers'; import { U, makeCodes, codes2map } from './util'; +import { RuntimeContext } from '../../mol-task'; -export function _inflate(data: Uint8Array, buf?: Uint8Array) { - if(data[0] === 3 && data[1] === 0) return (buf ? buf : new Uint8Array(0)); - // var F=UZIP.F, bitsF = F._bitsF, bitsE = F._bitsE, decodeTiny = F._decodeTiny, makeCodes = F.makeCodes, codes2map=F.codes2map, get17 = F._get17; - // var U = F.U; - +function InflateContext(data: Uint8Array, buf?: Uint8Array) { const noBuf = buf === undefined; if(buf === undefined) buf = new Uint8Array((data.length>>>2)<<3); + return { + data, + buf, + noBuf, + BFINAL: 0, + off: 0, + pos: 0 + } +} +type InflateContext = ReturnType<typeof InflateContext> - let BFINAL=0, BTYPE=0, HLIT=0, HDIST=0, HCLEN=0, ML=0, MD=0; - let off = 0, pos = 0; - let lmap, dmap; +function inflateBlocks(ctx: InflateContext, count: number) { + const { data, noBuf } = ctx + let { buf, BFINAL, off, pos } = ctx - while(BFINAL === 0) { - BFINAL = _bitsF(data, pos , 1); - BTYPE = _bitsF(data, pos+1, 2); + let iBlock = 0 + + while(BFINAL === 0 && iBlock < count) { + let lmap, dmap; + let ML = 0, MD = 0; + + BFINAL = _bitsF(data, pos, 1); + iBlock += 1 + const BTYPE = _bitsF(data, pos + 1, 2); pos+=3; if(BTYPE === 0) { - if((pos&7) !== 0) pos+=8-(pos&7); - const p8 = (pos>>>3)+4, len = data[p8-4]|(data[p8-3]<<8); // console.log(len);//bitsF(data, pos, 16), - if(noBuf) buf=_check(buf, off+len); - buf.set(new Uint8Array(data.buffer, data.byteOffset+p8, len), off); - // for(var i=0; i<len; i++) buf[off+i] = data[p8+i]; - // for(var i=0; i<len; i++) if(buf[off+i] != data[p8+i]) throw "e"; - pos = ((p8+len)<<3); off+=len; continue; + // uncompressed block + if((pos&7) !== 0) pos += 8 - (pos&7); + const p8 = (pos>>>3) + 4 + const len = data[p8-4] | (data[p8-3]<<8); + if(noBuf) buf=_check(buf, off + len); + buf.set(new Uint8Array(data.buffer, data.byteOffset + p8, len), off); + pos = ((p8+len)<<3); + off += len; + continue; } - if(noBuf) buf=_check(buf, off+(1<<17)); // really not enough in many cases (but PNG and ZIP provide buffer in advance) + + // grow output buffer if not provided + if(noBuf) buf = _check(buf, off + (1<<17)); + if(BTYPE === 1) { + // block compressed with fixed Huffman codes lmap = U.flmap; dmap = U.fdmap; - ML = (1<<9)-1; - MD = (1<<5)-1; + ML = (1<<9) - 1; + MD = (1<<5) - 1; } else if(BTYPE === 2) { - HLIT = _bitsE(data, pos , 5)+257; - HDIST = _bitsE(data, pos+ 5, 5)+ 1; - HCLEN = _bitsE(data, pos+10, 4)+ 4; pos+=14; - // const ppos = pos; + // block compressed with dynamic Huffman codes + const HLIT = _bitsE(data, pos, 5) + 257; + const HDIST = _bitsE(data, pos + 5, 5) + 1; + const HCLEN = _bitsE(data, pos + 10, 4) + 4; + pos += 14; + for(let i=0; i<38; i+=2) { U.itree[i]=0; U.itree[i+1]=0; @@ -57,32 +78,31 @@ export function _inflate(data: Uint8Array, buf?: Uint8Array) { U.itree[(U.ordr[i]<<1)+1] = l; if(l>tl) tl = l; } - pos+=3*HCLEN; // console.log(itree); + pos += 3 * HCLEN; makeCodes(U.itree, tl); codes2map(U.itree, tl, U.imap); lmap = U.lmap; dmap = U.dmap; pos = _decodeTiny(U.imap, (1<<tl)-1, HLIT+HDIST, data, pos, U.ttree); - const mx0 = _copyOut(U.ttree, 0, HLIT , U.ltree); ML = (1<<mx0)-1; - const mx1 = _copyOut(U.ttree, HLIT, HDIST, U.dtree); MD = (1<<mx1)-1; + const mx0 = _copyOut(U.ttree, 0, HLIT , U.ltree); + ML = (1<<mx0)-1; + const mx1 = _copyOut(U.ttree, HLIT, HDIST, U.dtree); + MD = (1<<mx1)-1; - // var ml = decodeTiny(U.imap, (1<<tl)-1, HLIT , data, pos, U.ltree); ML = (1<<(ml>>>24))-1; pos+=(ml&0xffffff); makeCodes(U.ltree, mx0); codes2map(U.ltree, mx0, lmap); - // var md = decodeTiny(U.imap, (1<<tl)-1, HDIST, data, pos, U.dtree); MD = (1<<(md>>>24))-1; pos+=(md&0xffffff); makeCodes(U.dtree, mx1); codes2map(U.dtree, mx1, dmap); } else { throw new Error(`unknown BTYPE ${BTYPE}`) } - // var ooff=off, opos=pos; while(true) { const code = lmap[_get17(data, pos) & ML]; pos += code&15; - const lit = code >>> 4; // U.lhst[lit]++; + const lit = code >>> 4; if((lit >>> 8) === 0) { buf[off++] = lit; } else if(lit === 256) { @@ -94,32 +114,44 @@ export function _inflate(data: Uint8Array, buf?: Uint8Array) { end = off + (ebs>>>3) + _bitsE(data, pos, ebs&7); pos += ebs&7; } - // UZIP.F.dst[end-off]++; - const dcode = dmap[_get17(data, pos) & MD]; pos += dcode&15; + const dcode = dmap[_get17(data, pos) & MD]; + pos += dcode&15; const dlit = dcode>>>4; - const dbs = U.ddef[dlit], dst = (dbs>>>4) + _bitsF(data, pos, dbs&15); pos += dbs&15; + const dbs = U.ddef[dlit] + const dst = (dbs>>>4) + _bitsF(data, pos, dbs&15); + pos += dbs&15; - // var o0 = off-dst, stp = Math.min(end-off, dst); - // if(stp>20) while(off<end) { buf.copyWithin(off, o0, o0+stp); off+=stp; } else - // if(end-dst<=off) buf.copyWithin(off, off-dst, end-dst); else - // if(dst==1) buf.fill(buf[off-1], off, end); else if(noBuf) buf = _check(buf, off+(1<<17)); while(off<end) { - buf[off]=buf[off++-dst]; - buf[off]=buf[off++-dst]; - buf[off]=buf[off++-dst]; - buf[off]=buf[off++-dst]; + buf[off] = buf[off++-dst]; + buf[off] = buf[off++-dst]; + buf[off] = buf[off++-dst]; + buf[off] = buf[off++-dst]; } - off=end; - // while(off!=end) { buf[off]=buf[off++-dst]; } + off = end; } } - // console.log(off-ooff, (pos-opos)>>>3); } - // console.log(UZIP.F.dst); - // console.log(tlen, dlen, off-tlen+tcnt); - return buf.length === off ? buf : buf.slice(0, off); + + ctx.buf = buf + ctx.BFINAL = BFINAL + ctx.off = off + ctx.pos = pos +} + +// https://tools.ietf.org/html/rfc1951 +export async function _inflate(runtime: RuntimeContext, data: Uint8Array, buf?: Uint8Array) { + if(data[0] === 3 && data[1] === 0) return (buf ? buf : new Uint8Array(0)); + + const ctx = InflateContext(data, buf) + while(ctx.BFINAL === 0) { + if (runtime.shouldUpdate) { + await runtime.update({ message: 'Inflating blocks...', current: ctx.pos, max: data.length }) + } + inflateBlocks(ctx, 100) + } + return ctx.buf.length === ctx.off ? ctx.buf : ctx.buf.slice(0, ctx.off); } function _check(buf: Uint8Array, len: number) { @@ -127,27 +159,30 @@ function _check(buf: Uint8Array, len: number) { if(len <= bl) return buf; const nbuf = new Uint8Array(Math.max(bl << 1, len)); nbuf.set(buf, 0); - // for(var i=0; i<bl; i+=4) { nbuf[i]=buf[i]; nbuf[i+1]=buf[i+1]; nbuf[i+2]=buf[i+2]; nbuf[i+3]=buf[i+3]; } return nbuf; } function _decodeTiny(lmap: NumberArray, LL: number, len: number, data: Uint8Array, pos: number, tree: number[]) { let i = 0; - while(i<len) { + while(i < len) { const code = lmap[_get17(data, pos)&LL]; pos += code&15; const lit = code>>>4; - if(lit<=15) { + if(lit <= 15) { tree[i]=lit; i++; } else { let ll = 0, n = 0; if(lit === 16) { - n = (3 + _bitsE(data, pos, 2)); pos += 2; ll = tree[i-1]; + n = (3 + _bitsE(data, pos, 2)); + pos += 2; + ll = tree[i-1]; } else if(lit === 17) { - n = (3 + _bitsE(data, pos, 3)); pos += 3; + n = (3 + _bitsE(data, pos, 3)); + pos += 3; } else if(lit === 18) { - n = (11 + _bitsE(data, pos, 7)); pos += 7; + n = (11 + _bitsE(data, pos, 7)); + pos += 7; } const ni = i+n; while(i<ni) { @@ -160,15 +195,16 @@ function _decodeTiny(lmap: NumberArray, LL: number, len: number, data: Uint8Arra } function _copyOut(src: number[], off: number, len: number, tree: number[]) { - let mx=0, i=0, tl=tree.length>>>1; - while(i<len) { + let mx=0, i=0 + const tl=tree.length>>>1; + while(i < len) { let v=src[i+off]; tree[(i<<1)]=0; tree[(i<<1)+1]=v; if(v>mx)mx=v; i++; } - while(i<tl ) { + while(i < tl) { tree[(i<<1)]=0; tree[(i<<1)+1]=0; i++; diff --git a/src/mol-util/zip/zip.ts b/src/mol-util/zip/zip.ts index f5f5e4aaf9ba5b652958f741d1e6c43562cad3e3..8d43d47719feab6fe21ab4bd50a7440f279225c0 100644 --- a/src/mol-util/zip/zip.ts +++ b/src/mol-util/zip/zip.ts @@ -13,8 +13,9 @@ import { writeUint, writeUshort, sizeUTF8, writeUTF8, readUshort, readUint, read import { crc, adler } from './checksum'; import { _inflate } from './inflate'; import { _deflateRaw } from './deflate'; +import { RuntimeContext } from '../../mol-task'; -export function unzip(buf: ArrayBuffer, onlyNames = false) { +export async function unzip(runtime: RuntimeContext, buf: ArrayBuffer, onlyNames = false) { const out: { [k: string]: Uint8Array | { size: number, csize: number } } = Object.create(null); const data = new Uint8Array(buf); let eocd = data.length-4; @@ -57,13 +58,13 @@ export function unzip(buf: ArrayBuffer, onlyNames = false) { const roff = readUint(data, o); o+=4; o += nl + el + cl; - _readLocal(data, roff, out, csize, usize, onlyNames); + await _readLocal(runtime, data, roff, out, csize, usize, onlyNames); } // console.log(out); return out; } -function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array | { size: number, csize: number } }, csize: number, usize: number, onlyNames: boolean) { +async function _readLocal(runtime: RuntimeContext, data: Uint8Array, o: number, out: { [k: string]: Uint8Array | { size: number, csize: number } }, csize: number, usize: number, onlyNames: boolean) { // const sign = readUint(data, o); o+=4; // const ver = readUshort(data, o); @@ -83,16 +84,15 @@ function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array // var usize = rUi(data, o); o+=4; o+=8; - const nlen = readUshort(data, o); + const nlen = readUshort(data, o); o+=2; - const elen = readUshort(data, o); + const elen = readUshort(data, o); o+=2; - const name = readUTF8(data, o, nlen); + const name = readUTF8(data, o, nlen); o += nlen; // console.log(name); o += elen; - // console.log(sign.toString(16), ver, gpflg, cmpr, crc32.toString(16), "csize, usize", csize, usize, nlen, elen, name, o); if(onlyNames) { out[name] = { size: usize, csize }; return; @@ -103,33 +103,27 @@ function _readLocal(data: Uint8Array, o: number, out: { [k: string]: Uint8Array out[name] = new Uint8Array(file.buffer.slice(o, o+csize)); } else if(cmpr === 8) { const buf = new Uint8Array(usize); - inflateRaw(file, buf); - // var nbuf = pako["inflateRaw"](file); - // if(usize>8514000) { - // //console.log(PUtils.readASCII(buf , 8514500, 500)); - // //console.log(PUtils.readASCII(nbuf, 8514500, 500)); - // } - // for(var i=0; i<buf.length; i++) if(buf[i]!=nbuf[i]) { console.log(buf.length, nbuf.length, usize, i); throw "e"; } + await inflateRaw(runtime, file, buf); out[name] = buf; } else throw `unknown compression method: ${cmpr}`; } -export function inflateRaw(file: Uint8Array, buf?: Uint8Array) { - return _inflate(file, buf); +export async function inflateRaw(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) { + return _inflate(runtime, file, buf); } -export function inflate(file: Uint8Array, buf?: Uint8Array) { +export function inflate(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) { // const CMF = file[0] // const FLG = file[1] // const CM = (CMF&15) // const CINFO = (CMF>>>4); // console.log(CM, CINFO,CMF,FLG); - return inflateRaw(new Uint8Array(file.buffer, file.byteOffset+2, file.length-6), buf); + return inflateRaw(runtime, new Uint8Array(file.buffer, file.byteOffset+2, file.length-6), buf); } // https://tools.ietf.org/html/rfc1952 -export function ungzip(file: Uint8Array, buf?: Uint8Array) { +export async function ungzip(runtime: RuntimeContext, file: Uint8Array, buf?: Uint8Array) { // const id1 = file[0] // const id2 = file[1] // const cm = file[2] @@ -170,7 +164,7 @@ export function ungzip(file: Uint8Array, buf?: Uint8Array) { if (buf === undefined) buf = new Uint8Array(isize) const blocks = new Uint8Array(file.buffer, file.byteOffset + o, file.length - o - 8) - const inflated = inflateRaw(blocks, buf); + const inflated = await inflateRaw(runtime, blocks, buf); const crcValue = crc(inflated, 0, inflated.length) if (crc32 !== crcValue) { console.error("ungzip: checksums don't match")