1 // SPDX-License-Identifier: GPL-2.0-or-later
3 /* P9 gunzip sample code for demonstrating the P9 NX hardware
4 * interface. Not intended for productive uses or for performance or
5 * compression ratio measurements. Note also that /dev/crypto/gzip,
6 * VAS and skiboot support are required
8 * Copyright 2020 IBM Corp.
10 * Author: Bulent Abali <abali@us.ibm.com>
12 * https://github.com/libnxz/power-gzip for zlib api and other utils
13 * Definitions of acronyms used here. See
14 * P9 NX Gzip Accelerator User's Manual for details:
15 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
17 * adler/crc: 32 bit checksums appended to stream tail
18 * ce: completion extension
19 * cpb: coprocessor parameter block (metadata)
20 * crb: coprocessor request block (command)
21 * csb: coprocessor status block (status)
22 * dht: dynamic huffman table
23 * dde: data descriptor element (address, length)
25 * dh/fh: dynamic and fixed huffman types
26 * fc: coprocessor function code
27 * histlen: history/dictionary length
28 * history: sliding window of up to 32KB of data
29 * lzcount: Deflate LZ symbol counts
30 * rembytecnt: remaining byte count
31 * sfbt: source final block type; last block's type during decomp
32 * spbc: source processed byte count
33 * subc: source unprocessed bit count
34 * tebc: target ending bit count; valid bits in the last byte
35 * tpbc: target processed byte count
36 * vas: virtual accelerator switch; the user mode interface
39 #define _ISOC11_SOURCE // For aligned_alloc()
40 #define _DEFAULT_SOURCE // For endian.h
47 #include <sys/types.h>
50 #include <sys/fcntl.h>
53 #include <bits/endian.h>
54 #include <sys/ioctl.h>
65 #define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
66 #define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
68 #define GETINPC(X) fgetc(X)
69 #define FNAME_MAX 1024
71 /* fifo queue management */
72 #define fifo_used_bytes(used) (used)
73 #define fifo_free_bytes(used, len) ((len)-(used))
74 /* amount of free bytes in the first and last parts */
75 #define fifo_free_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
76 ? (len)-((cur)+(used)) : 0)
77 #define fifo_free_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
78 ? (cur) : (len)-(used))
79 /* amount of used bytes in the first and last parts */
80 #define fifo_used_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
81 ? (used) : (len)-(cur))
82 #define fifo_used_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
83 ? 0 : ((used)+(cur))-(len))
84 /* first and last free parts start here */
85 #define fifo_free_first_offset(cur, used) ((cur)+(used))
86 #define fifo_free_last_offset(cur, used, len) \
87 fifo_used_last_bytes(cur, used, len)
88 /* first and last used parts start here */
89 #define fifo_used_first_offset(cur) (cur)
90 #define fifo_used_last_offset(cur) (0)
92 const int fifo_in_len = 1<<24;
93 const int fifo_out_len = 1<<24;
94 const int page_sz = 1<<16;
95 const int line_sz = 1<<7;
96 const int window_max = 1<<15;
99 * Adds an (address, len) pair to the list of ddes (ddl) and updates
100 * the base dde. ddl[0] is the only dde in a direct dde which
101 * contains a single (addr,len) pair. For more pairs, ddl[0] becomes
102 * the indirect (base) dde that points to a list of direct ddes.
103 * See Section 6.4 of the NX-gzip user manual for DDE description.
104 * Addr=NULL, len=0 clears the ddl[0]. Returns the total number of
105 * bytes in ddl. Caller is responsible for allocting the array of
106 * nx_dde_t *ddl. If N addresses are required in the scatter-gather
107 * list, the ddl array must have N+1 entries minimum.
109 static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
115 if (addr == NULL && len == 0) {
120 NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
123 /* Number of ddes in the dde list ; == 0 when it is a direct dde */
124 ddecnt = getpnn(ddl, dde_count);
125 bytes = getp32(ddl, ddebc);
127 if (ddecnt == 0 && bytes == 0) {
128 /* First dde is unused; make it a direct dde */
130 putp32(ddl, ddebc, bytes);
131 putp64(ddl, ddead, (uint64_t) addr);
132 } else if (ddecnt == 0) {
133 /* Converting direct to indirect dde
134 * ddl[0] becomes head dde of ddl
135 * copy direct to indirect first.
139 /* Add the new dde next */
141 put32(ddl[2], ddebc, len);
142 put64(ddl[2], ddead, (uint64_t) addr);
144 /* Ddl head points to 2 direct ddes */
146 putpnn(ddl, dde_count, ddecnt);
148 putp32(ddl, ddebc, bytes);
149 /* Pointer to the first direct dde */
150 putp64(ddl, ddead, (uint64_t) &ddl[1]);
152 /* Append a dde to an existing indirect ddl */
154 clear_dde(ddl[ddecnt]);
155 put64(ddl[ddecnt], ddead, (uint64_t) addr);
156 put32(ddl[ddecnt], ddebc, len);
158 putpnn(ddl, dde_count, ddecnt);
160 putp32(ddl, ddebc, bytes); /* byte sum of all dde */
166 * Touch specified number of pages represented in number bytes
167 * beginning from the first buffer in a dde list.
168 * Do not touch the pages past buf_sz-th byte's page.
170 * Set buf_sz = 0 to touch all pages described by the ddep.
172 static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
175 uint32_t indirect_count;
179 struct nx_dde_t *dde_list;
184 indirect_count = getpnn(ddep, dde_count);
186 NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
188 NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
190 if (indirect_count == 0) {
192 buf_len = getp32(ddep, ddebc);
193 buf_addr = getp64(ddep, ddead);
195 NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
196 buf_len, (void *)buf_addr));
199 nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
201 nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
202 buf_sz), page_sz, wr);
208 if (indirect_count > MAX_DDE_COUNT)
209 return ERR_NX_EXCESSIVE_DDE;
211 /* First address of the list */
212 dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
215 buf_sz = getp32(ddep, ddebc);
218 for (i = 0; i < indirect_count; i++) {
219 buf_len = get32(dde_list[i], ddebc);
220 buf_addr = get64(dde_list[i], ddead);
223 NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
224 buf_len, (void *)buf_addr));
225 NXPRT(fprintf(stderr, "0x%lx\n", total));
227 /* Touching fewer pages than encoded in the ddebc */
228 if (total > buf_sz) {
229 buf_len = NX_MIN(buf_len, total - buf_sz);
230 nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
231 NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
233 NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
236 nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
242 * Src and dst buffers are supplied in scatter gather lists.
243 * NX function code and other parameters supplied in cmdp.
245 static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
246 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
250 memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
252 cmdp->crb.source_dde = *src;
253 cmdp->crb.target_dde = *dst;
255 /* Status, output byte count in tpbc */
256 csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
257 put64(cmdp->crb, csb_address, csbaddr);
259 /* NX reports input bytes in spbc; cleared */
260 cmdp->cpb.out_spbc_comp_wrap = 0;
261 cmdp->cpb.out_spbc_comp_with_count = 0;
262 cmdp->cpb.out_spbc_decomp = 0;
265 put32(cmdp->cpb, out_crc, INIT_CRC);
266 put32(cmdp->cpb, out_adler, INIT_ADLER);
268 /* Submit the crb, the job descriptor, to the accelerator. */
269 return nxu_submit_job(cmdp, handle);
272 int decompress_file(int argc, char **argv, void *devhandle)
277 int c, expect, i, cc, rc = 0;
278 char gzfname[FNAME_MAX];
280 /* Queuing, file ops, byte counting */
281 char *fifo_in, *fifo_out;
282 int used_in, cur_in, used_out, cur_out, read_sz, n;
283 int first_free, last_free, first_used, last_used;
284 int first_offset, last_offset;
285 int write_sz, free_space, source_sz;
286 int source_sz_estimate, target_sz_estimate;
287 uint64_t last_comp_ratio = 0; /* 1000 max */
288 uint64_t total_out = 0;
289 int is_final, is_eof;
292 int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
294 struct nx_gzip_crb_cpb_t cmd, *cmdp;
295 struct nx_dde_t *ddl_in;
296 struct nx_dde_t dde_in[6] __aligned(128);
297 struct nx_dde_t *ddl_out;
298 struct nx_dde_t dde_out[6] __aligned(128);
301 /* when using mmap'ed files */
302 off_t input_file_offset;
305 fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
306 fprintf(stderr, " writes to stdout or <fname>.nx.gunzip\n");
313 } else if (argc == 2) {
317 inpf = fopen(argv[1], "r");
323 /* Make a new file name to write to. Ignoring '.gz' */
324 wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
326 strcat(w, ".nx.gunzip");
328 outf = fopen(w, "w");
335 /* Decode the gzip header */
336 c = GETINPC(inpf); expect = 0x1f; /* ID1 */
340 c = GETINPC(inpf); expect = 0x8b; /* ID2 */
344 c = GETINPC(inpf); expect = 0x08; /* CM */
348 int flg = GETINPC(inpf); /* FLG */
350 if (flg & 0xE0 || flg & 0x4 || flg == EOF)
353 fprintf(stderr, "gzHeader FLG %x\n", flg);
355 /* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
358 for (i = 0; i < 6; i++) {
361 tmp[i] = GETINPC(inpf);
364 fprintf(stderr, "%02x ", tmp[i]);
366 fprintf(stderr, "\n");
368 fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
376 if (c == EOF || k >= FNAME_MAX)
380 fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
391 fprintf(stderr, "gzHeader FHCRC: ignored\n");
394 used_in = cur_in = used_out = cur_out = 0;
395 is_final = is_eof = 0;
397 /* Allocate one page larger to prevent page faults due to NX
399 * Either do this (char*)(uintptr_t)aligned_alloc or use
400 * -std=c11 flag to make the int-to-pointer warning go away.
402 assert((fifo_in = (char *)(uintptr_t)aligned_alloc(line_sz,
403 fifo_in_len + page_sz)) != NULL);
404 assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
405 fifo_out_len + page_sz + line_sz)) != NULL);
406 /* Leave unused space due to history rounding rules */
407 fifo_out = fifo_out + line_sz;
408 nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
411 ddl_out = &dde_out[0];
413 memset(&cmdp->crb, 0, sizeof(cmdp->crb));
417 /* Read from .gz file */
419 NXPRT(fprintf(stderr, "read_state:\n"));
424 /* We read in to fifo_in in two steps: first: read in to from
425 * cur_in to the end of the buffer. last: if free space wrapped
426 * around, read from fifo_in offset 0 to offset cur_in.
429 /* Reset fifo head to reduce unnecessary wrap arounds */
430 cur_in = (used_in == 0) ? 0 : cur_in;
432 /* Free space total is reduced by a gap */
433 free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
436 /* Free space may wrap around as first and last */
437 first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
438 last_free = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
440 /* Start offsets of the free memory */
441 first_offset = fifo_free_first_offset(cur_in, used_in);
442 last_offset = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
444 /* Reduce read_sz because of the line_sz gap */
445 read_sz = NX_MIN(free_space, first_free);
448 /* Read in to offset cur_in + used_in */
449 n = fread(fifo_in + first_offset, 1, read_sz, inpf);
450 used_in = used_in + n;
451 free_space = free_space - n;
452 assert(n <= read_sz);
454 /* Either EOF or error; exit the read loop */
460 /* If free space wrapped around */
462 /* Reduce read_sz because of the line_sz gap */
463 read_sz = NX_MIN(free_space, last_free);
466 n = fread(fifo_in + last_offset, 1, read_sz, inpf);
467 used_in = used_in + n; /* Increase used space */
468 free_space = free_space - n; /* Decrease free space */
469 assert(n <= read_sz);
471 /* Either EOF or error; exit the read loop */
478 /* At this point we have used_in bytes in fifo_in with the
479 * data head starting at cur_in and possibly wrapping around.
484 /* Write decompressed data to output file */
486 NXPRT(fprintf(stderr, "write_state:\n"));
491 /* If fifo_out has data waiting, write it out to the file to
492 * make free target space for the accelerator used bytes in
493 * the first and last parts of fifo_out.
496 first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
497 last_used = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
499 write_sz = first_used;
503 n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
504 used_out = used_out - n;
505 /* Move head of the fifo */
506 cur_out = (cur_out + n) % fifo_out_len;
507 assert(n <= write_sz);
509 fprintf(stderr, "error: write\n");
515 if (last_used > 0) { /* If more data available in the last part */
516 write_sz = last_used; /* Keep it here for later */
519 n = fwrite(fifo_out, 1, write_sz, outf);
520 used_out = used_out - n;
521 cur_out = (cur_out + n) % fifo_out_len;
522 assert(n <= write_sz);
524 fprintf(stderr, "error: write\n");
533 /* NX decompresses input data */
535 NXPRT(fprintf(stderr, "decomp_state:\n"));
540 /* Address/len lists */
544 /* FC, CRC, HistLen, Table 6-6 */
546 /* Resuming a partially decompressed input.
547 * The key to resume is supplying the 32KB
548 * dictionary (history) to NX, which is basically
549 * the last 32KB of output produced.
551 fc = GZIP_FC_DECOMPRESS_RESUME;
553 cmdp->cpb.in_crc = cmdp->cpb.out_crc;
554 cmdp->cpb.in_adler = cmdp->cpb.out_adler;
556 /* Round up the history size to quadword. Section 2.10 */
557 history_len = (history_len + 15) / 16;
558 putnn(cmdp->cpb, in_histlen, history_len);
559 history_len = history_len * 16; /* bytes */
561 if (history_len > 0) {
562 /* Chain in the history buffer to the DDE list */
563 if (cur_out >= history_len) {
564 nx_append_dde(ddl_in, fifo_out
565 + (cur_out - history_len),
568 nx_append_dde(ddl_in, fifo_out
569 + ((fifo_out_len + cur_out)
571 history_len - cur_out);
572 /* Up to 32KB history wraps around fifo_out */
573 nx_append_dde(ddl_in, fifo_out, cur_out);
578 /* First decompress job */
579 fc = GZIP_FC_DECOMPRESS;
582 /* Writing 0 clears out subc as well */
583 cmdp->cpb.in_histlen = 0;
586 put32(cmdp->cpb, in_crc, INIT_CRC);
587 put32(cmdp->cpb, in_adler, INIT_ADLER);
588 put32(cmdp->cpb, out_crc, INIT_CRC);
589 put32(cmdp->cpb, out_adler, INIT_ADLER);
591 /* Assuming 10% compression ratio initially; use the
592 * most recently measured compression ratio as a
593 * heuristic to estimate the input and output
594 * sizes. If we give too much input, the target buffer
595 * overflows and NX cycles are wasted, and then we
596 * must retry with smaller input size. 1000 is 100%.
598 last_comp_ratio = 100UL;
600 cmdp->crb.gzip_fc = 0;
601 putnn(cmdp->crb, gzip_fc, fc);
606 first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
607 last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
610 nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
613 nx_append_dde(ddl_in, fifo_in, last_used);
618 first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
619 last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
621 /* Reduce output free space amount not to overwrite the history */
622 int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
625 NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
628 first_free = NX_MIN(target_max, first_free);
629 if (first_free > 0) {
630 first_offset = fifo_free_first_offset(cur_out, used_out);
631 nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
635 last_free = NX_MIN(target_max - first_free, last_free);
637 last_offset = fifo_free_last_offset(cur_out, used_out,
639 nx_append_dde(ddl_out, fifo_out + last_offset,
644 /* Target buffer size is used to limit the source data size
645 * based on previous measurements of compression ratio.
648 /* source_sz includes history */
649 source_sz = getp32(ddl_in, ddebc);
650 assert(source_sz > history_len);
651 source_sz = source_sz - history_len;
653 /* Estimating how much source is needed to 3/4 fill a
654 * target_max size target buffer. If we overshoot, then NX
655 * must repeat the job with smaller input and we waste
656 * bandwidth. If we undershoot then we use more NX calls than
660 source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
663 if (source_sz_estimate < source_sz) {
664 /* Target might be small, therefore limiting the
667 source_sz = source_sz_estimate;
668 target_sz_estimate = target_max;
670 /* Source file might be small, therefore limiting target
671 * touch pages to a smaller value to save processor cycles.
673 target_sz_estimate = ((uint64_t)source_sz * 1000UL)
674 / (last_comp_ratio + 1);
675 target_sz_estimate = NX_MIN(2 * target_sz_estimate,
679 source_sz = source_sz + history_len;
681 /* Some NX condition codes require submitting the NX job again.
682 * Kernel doesn't handle NX page faults. Expects user code to
685 pgfault_retries = NX_MAX_FAULTS;
689 putp32(ddl_in, ddebc, source_sz);
692 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
693 nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
694 nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
697 cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
701 case ERR_NX_TRANSLATION:
703 /* We touched the pages ahead of time. In the most common case
704 * we shouldn't be here. But may be some pages were paged out.
705 * Kernel should have placed the faulting address to fsaddr.
707 NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
708 (void *)cmdp->crb.csb.fsaddr));
710 if (pgfault_retries == NX_MAX_FAULTS) {
711 /* Try once with exact number of pages */
714 } else if (pgfault_retries > 0) {
715 /* If still faulting try fewer input pages
716 * assuming memory outage
718 if (source_sz > page_sz)
719 source_sz = NX_MAX(source_sz / 2, page_sz);
723 fprintf(stderr, "cannot make progress; too many ");
724 fprintf(stderr, "page fault retries cc= %d\n", cc);
729 case ERR_NX_DATA_LENGTH:
731 NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
732 NXPRT(fprintf(stderr, "stream may have trailing data\n"));
734 /* Not an error in the most common case; it just says
735 * there is trailing data that we must examine.
737 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
738 * Fig.6-7 and Table 6-8.
740 nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
742 if (!csb_ce_termination(nx_ce) &&
743 csb_ce_partial_completion(nx_ce)) {
744 /* Check CPB for more information
745 * spbc and tpbc are valid
747 sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
748 subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
749 spbc = get32(cmdp->cpb, out_spbc_decomp);
750 tpbc = get32(cmdp->crb.csb, tpbc);
751 assert(target_max >= tpbc);
753 goto ok_cc3; /* not an error */
755 /* History length error when CE(1)=1 CE(0)=0. */
757 fprintf(stderr, "history length error cc= %d\n", cc);
761 case ERR_NX_TARGET_SPACE:
763 /* Target buffer not large enough; retry smaller input
764 * data; give at least 1 byte. SPBC/TPBC are not valid.
766 assert(source_sz > history_len);
767 source_sz = ((source_sz - history_len + 2) / 2) + history_len;
768 NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
769 NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
770 source_sz, history_len));
775 /* This should not happen for gzip formatted data;
776 * we need trailing crc and isize
778 fprintf(stderr, "ERR_NX_OK\n");
779 spbc = get32(cmdp->cpb, out_spbc_decomp);
780 tpbc = get32(cmdp->crb.csb, tpbc);
781 assert(target_max >= tpbc);
782 assert(spbc >= history_len);
783 source_sz = spbc - history_len;
787 fprintf(stderr, "error: cc= %d\n", cc);
794 NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
796 assert(spbc > history_len);
797 source_sz = spbc - history_len;
799 /* Table 6-4: Source Final Block Type (SFBT) describes the
800 * last processed deflate block and clues the software how to
801 * resume the next job. SUBC indicates how many input bits NX
802 * consumed but did not process. SPBC indicates how many
803 * bytes of source were given to the accelerator including
810 case 0x0: /* Deflate final EOB received */
812 /* Calculating the checksum start position. */
814 source_sz = source_sz - subc / 8;
818 /* Resume decompression cases are below. Basically
819 * indicates where NX has suspended and how to resume
823 case 0x8: /* Within a literal block; use rembytecount */
824 case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
826 /* Supply the partially processed source byte again */
827 source_sz = source_sz - ((subc + 7) / 8);
829 /* SUBC LS 3bits: number of bits in the first source byte need
831 * 000 means all 8 bits; Table 6-3
832 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
834 cmdp->cpb.in_subc = 0;
835 cmdp->cpb.in_sfbt = 0;
836 putnn(cmdp->cpb, in_subc, subc % 8);
837 putnn(cmdp->cpb, in_sfbt, sfbt);
838 putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
842 case 0xA: /* Within a FH block; */
843 case 0xB: /* Within a FH block; bfinal=1 */
845 source_sz = source_sz - ((subc + 7) / 8);
847 /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
848 cmdp->cpb.in_subc = 0;
849 cmdp->cpb.in_sfbt = 0;
850 putnn(cmdp->cpb, in_subc, subc % 8);
851 putnn(cmdp->cpb, in_sfbt, sfbt);
854 case 0xC: /* Within a DH block; */
855 case 0xD: /* Within a DH block; bfinal=1 */
857 source_sz = source_sz - ((subc + 7) / 8);
859 /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
860 cmdp->cpb.in_subc = 0;
861 cmdp->cpb.in_sfbt = 0;
862 putnn(cmdp->cpb, in_subc, subc % 8);
863 putnn(cmdp->cpb, in_sfbt, sfbt);
865 dhtlen = getnn(cmdp->cpb, out_dhtlen);
866 putnn(cmdp->cpb, in_dhtlen, dhtlen);
867 assert(dhtlen >= 42);
869 /* Round up to a qword */
870 dhtlen = (dhtlen + 127) / 128;
872 while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
874 cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
878 case 0xE: /* Within a block header; bfinal=0; */
879 /* Also given if source data exactly ends (SUBC=0) with
880 * EOB code with BFINAL=0. Means the next byte will
881 * contain a block header.
883 case 0xF: /* within a block header with BFINAL=1. */
885 source_sz = source_sz - ((subc + 7) / 8);
887 /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
888 cmdp->cpb.in_subc = 0;
889 cmdp->cpb.in_sfbt = 0;
890 putnn(cmdp->cpb, in_subc, subc % 8);
891 putnn(cmdp->cpb, in_sfbt, sfbt);
893 /* Engine did not process any data */
894 if (is_eof && (source_sz == 0))
900 /* Adjust the source and target buffer offsets and lengths */
902 NXPRT(fprintf(stderr, "offsets_state:\n"));
904 /* Delete input data from fifo_in */
905 used_in = used_in - source_sz;
906 cur_in = (cur_in + source_sz) % fifo_in_len;
907 input_file_offset = input_file_offset + source_sz;
909 /* Add output data to fifo_out */
910 used_out = used_out + tpbc;
912 assert(used_out <= fifo_out_len);
914 total_out = total_out + tpbc;
916 /* Deflate history is 32KB max. No need to supply more
917 * than 32KB on a resume.
919 history_len = (total_out > window_max) ? window_max : total_out;
921 /* To estimate expected expansion in the next NX job; 500 means 50%.
922 * Deflate best case is around 1 to 1000.
924 last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
925 / ((uint64_t)tpbc + 1);
926 last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
927 NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
928 last_comp_ratio, source_sz, spbc, tpbc));
934 NXPRT(fprintf(stderr, "finish_state:\n"));
938 goto write_state; /* More data to write out */
939 else if (used_in < 8) {
940 /* Need at least 8 more bytes containing gzip crc
946 /* Compare checksums and exit */
948 unsigned char tail[8];
949 uint32_t cksum, isize;
951 for (i = 0; i < 8; i++)
952 tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
953 fprintf(stderr, "computed checksum %08x isize %08x\n",
954 cmdp->cpb.out_crc, (uint32_t) (total_out
956 cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
957 | (uint32_t) tail[2]<<16
958 | (uint32_t) tail[3]<<24);
959 isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
960 | (uint32_t) tail[6]<<16
961 | (uint32_t) tail[7]<<24);
962 fprintf(stderr, "stored checksum %08x isize %08x\n",
965 if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
966 (total_out % (1ULL<<32))) {
978 fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
983 fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
987 fprintf(stderr, "error: gzip header\n");
991 fprintf(stderr, "error: checksum missing or mismatch\n");
995 fprintf(stderr, "decomp is complete: fclose\n");
1002 int main(int argc, char **argv)
1005 struct sigaction act;
1011 act.sa_sigaction = nxu_sigsegv_handler;
1012 act.sa_flags = SA_SIGINFO;
1013 act.sa_restorer = 0;
1014 sigemptyset(&act.sa_mask);
1015 sigaction(SIGSEGV, &act, NULL);
1017 handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
1019 fprintf(stderr, "Unable to init NX, errno %d\n", errno);
1023 rc = decompress_file(argc, argv, handle);
1025 nx_function_end(handle);