Skip to content

Commit bd9b097

Browse files
author
tansy
committed
use man in doc/
1 parent 3bf20fc commit bd9b097

4 files changed

Lines changed: 247 additions & 7 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ install(TARGETS kanzi
218218
)
219219

220220
# Install the man page (prefer pre-compressed source file).
221-
set(KANZI_MANPAGE_GZ "${CMAKE_CURRENT_SOURCE_DIR}/kanzi.1.gz")
222-
set(KANZI_MANPAGE "${CMAKE_CURRENT_SOURCE_DIR}/kanzi.1")
221+
set(KANZI_MANPAGE_GZ "${CMAKE_CURRENT_SOURCE_DIR}/doc/kanzi.1.gz")
222+
set(KANZI_MANPAGE "${CMAKE_CURRENT_SOURCE_DIR}/doc/kanzi.1")
223223
set(KANZI_MANPAGE_TO_INSTALL "")
224224

225225
if(EXISTS "${KANZI_MANPAGE_GZ}")

doc/kanzi.1

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
.TH "KANZI" "1" "Feb 2026" "kanzi 2.5" "User Commands"
2+
.SH "NAME"
3+
\fBkanzi\fR \- Compress and decompress \.knz files
4+
.SH "SYNOPSIS"
5+
\fBkanzi\fR [\fIOPTIONS\fR] [\-i \fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
6+
.SH "DESCRIPTION"
7+
\fBKanzi\fR is a modern, modular, portable and efficient lossless data compressor\.
8+
9+
Modern algorithms are implemented and multi-core CPUs can take advantage of the built-in multi-threading.
10+
An entropy codec and a combination of transforms can be provided at runtime to best match the kind of data to compress.
11+
The code is optimized for efficiency (trade-off between compression ratio and speed)\.
12+
13+
Unlike the most common lossless data compressors, \fBKanzi\fR uses a variety of different compression algorithms and supports a wider range of compression ratios as a result\.
14+
15+
\fBKanzi\fR is multithreaded by design and uses several threads by default to compress or decompress blocks concurrently\. It is not compatible with standard compression formats such as zip, gz, zstd, br, lz4, xz\. \fBKanzi\fR is a lossless data compressor, not an archiver\. It uses checksums (optional but recommended) to validate data integrity but does not have a mechanism for data recovery. It also lacks data deduplication across files\.
16+
17+
\fBKanzi\fR generates a bitstream that is seekable (one or several consecutive blocks can be decompressed without the need for the whole bitstream to be decompressed)\.
18+
19+
.SH "OPTIONS"
20+
.SS "Operation Mode"
21+
Help\.
22+
23+
\fB-h, --help\fR
24+
display this message
25+
26+
Compression mode\.
27+
28+
\fB-i, --input=<inputName>\fR
29+
Mandatory name of the input file or directory or 'stdin'
30+
When the source is a directory, all files in it will be processed.
31+
Provide /. at the end of the directory name to avoid recursion
32+
(e.g., myDir/. => no recursion)
33+
34+
\fB-o, --output=<outputName>\fR
35+
Optional name of the output file or directory (defaults to
36+
<inputName.knz>) or 'none' or 'stdout'. 'stdout' is not valid
37+
when the number of jobs is greater than 1
38+
39+
\fB-b, --block=<size>\fR
40+
Size of blocks (default 4|8|16|32 MB based on level, max 1 GB, min 1 KB)
41+
'auto' means that the compressor derives the best value
42+
based on input size (when available) and number of jobs
43+
44+
\fB-l, --level=<compression>\fR
45+
Set the compression level [0..9]
46+
Providing this option forces the entropy codec and transform.
47+
See the definitions of the transforms and entropy codecs in the last section.
48+
0 = NONE&NONE (store)
49+
1 = LZX&NONE
50+
2 = DNA+LZ&HUFFMAN
51+
3 = TEXT+UTF+PACK+MM+LZX&HUFFMAN
52+
4 = TEXT+UTF+EXE+PACK+MM+ROLZ&NONE
53+
5 = TEXT+UTF+BWT+RANK+ZRLT&ANS0
54+
6 = TEXT+UTF+BWT+SRT+ZRLT&FPAQ
55+
7 = LZP+TEXT+UTF+BWT+LZP&CM
56+
8 = EXE+RLT+TEXT+UTF+DNA&TPAQ
57+
9 = EXE+RLT+TEXT+UTF+DNA&TPAQX
58+
59+
\fB-e, --entropy=<codec>\fR
60+
entropy codec [None|Huffman|ANS0|ANS1|Range|FPAQ|TPAQ|TPAQX|CM]
61+
62+
\fB-t, --transform=<codec>\fR
63+
transform [None|BWT|BWTS|LZ|LZX|LZP|ROLZ|ROLZX|RLT|ZRLT]
64+
[MTFT|RANK|SRT|TEXT|MM|EXE|UTF|PACK]
65+
e.g., BWT+RANK or BWTS+MTFT (default is BWT+RANK+ZRLT)
66+
67+
\fB-x, -x32, -x64, --checksum=<size>\fR
68+
Enable block checksum (32 or 64 bits). During decompression data is verified against the checksum in each block.
69+
-x is equivalent to -x32.
70+
71+
\fB-s, --skip\fR
72+
copy blocks with high entropy instead of compressing them
73+
74+
\fB--rm\fR
75+
Remove the input file after successful (de)compression.
76+
If the input is a directory, all processed files under the directory are removed.
77+
78+
79+
Decompression mode\.
80+
81+
\fB-i, --input=<inputName>\fR
82+
Mandatory name of the input file or directory or 'stdin'
83+
When the source is a directory, all files in it will be processed.
84+
Provide /. at the end of the directory name to avoid recursion
85+
(e.g., myDir/. => no recursion)
86+
87+
\fB-o, --output=<outputName>\fR
88+
Optional name of the output file or directory (defaults to
89+
<inputName.bak>) or 'none' or 'stdout'. 'stdout' is not valid
90+
when the number of jobs is greater than 1.
91+
92+
\fB--from=blockId\fR
93+
Decompress starting from the provided block (included).
94+
The first block ID is 1.
95+
96+
\fB--to=blockId\fR
97+
Decompress ending at the provided block (excluded).
98+
99+
\fB--rm\fR
100+
Remove the input file after successful (de)compression.
101+
If the input is a directory, all processed files under the directory are removed.
102+
103+
Info mode\.
104+
105+
\fB-i, --input=<inputName>\fR
106+
Mandatory name of the compressed input file.
107+
When the source is a directory, all files in it will be processed.
108+
Provide /. at the end of the directory name to avoid recursion
109+
(e.g., myDir/. => no recursion)
110+
111+
112+
Operation modifiers\.
113+
114+
\fB-j, --jobs=<jobs>\fR
115+
Maximum number of jobs the program may start concurrently
116+
(default is half the available cores, maximum is 64)
117+
118+
\fB-v, --verbose=<level>\fR
119+
0=silent, 1=default, 2=display details, 3=display configuration,
120+
4=display block size and timings, 5=display extra information
121+
Verbosity is reduced to 1 when files are processed concurrently
122+
Verbosity is reduced to 0 when the output is 'stdout'
123+
124+
\fB-f, --force\fR
125+
Overwrite the output file if it already exists
126+
127+
\fB--skip-links\fR
128+
Skip symbolic links
129+
130+
\fB--skip-dot-files\fR
131+
Skip dotfiles
132+
133+
134+
.SS "Examples"
135+
136+
Compress recursively all files under 'dir' in test mode (no output file) using a 4 MB block, compression level 4, and extra verbosity.
137+
kanzi -c -i dir -o none -b 4m -l 4 -v 3
138+
139+
Compress foo.txt to foo.txt.knz (overwrite it if it already exists) using the BWT, MTFT, and ZRLT transforms, the FPAQ entropy codec, and 4 threads; generate a checksum for each 4 MB block.
140+
kanzi -c -i foo.txt -f -t BWT+MTFT+ZRLT -b 4m -e FPAQ -j 4 -x
141+
142+
Compress from stdin (--input option is omitted) to foo.knz using compression level 2, 64 KB blocks, and the default number of threads.
143+
cat foo.txt | kanzi -c -o foo.knz -l 2 -b 64k
144+
145+
Decompress foo.txt.knz to foo.txt.knz.bak using 2 threads.
146+
kanzi -d -i foo.txt.knz -j 2
147+
148+
Decompress foo.txt.knz to stdout and delete the compressed file.
149+
kanzi -d -i foo.txt.knz -o stdout --rm
150+
151+
Decompress foo.txt.knz to foo.txt (overwrite it if it already exists) from block 5 to block 11, using 8 threads and extra verbosity.
152+
kanzi -d -i foo.txt.knz -o foo.txt -f -j 8 --from=5 --to=11 -v 4
153+
154+
155+
.SS "Transforms"
156+
157+
BWT: Burrows-Wheeler Transform is a transform that reorders symbols
158+
in a reversible way that is more amenable to entropy coding.
159+
This implementation uses a linear time forward transform and parallel
160+
inverse transform.
161+
162+
BWTS: Burrows-Wheeler Transform by Scott is a bijective variant of the BWT.
163+
164+
LZ: Lempel-Ziv implementation of the dictionary-based LZ77 transform that
165+
removes redundancy in the data.
166+
167+
LZX: Lempel-Ziv Extra. Same as above with a bigger hash table and more
168+
match searches.
169+
170+
LZP: Lempel-Ziv Prediction can be described as an LZ implementation with only
171+
one possible match (no offset is emitted).
172+
173+
RLT: Run-Length Transform is a simple transform that replaces runs of similar
174+
symbols with a compact representation.
175+
176+
ZRLT: Zero Run-Length Transform. Similar to RLT but only processes runs of 0.
177+
Usually used post-BWT.
178+
179+
MTFT: Move-To-Front Transform is a transform that reduces entropy by assigning
180+
shorter symbols to recent data (like an LRU cache). Usually used post-BWT.
181+
182+
RANK: Rank Transform is a transform that reduces entropy by assigning shorter
183+
symbols based on symbol frequency ranks. Usually used post-BWT.
184+
185+
EXE: A transform that reduces the entropy of executable files (X86 & ARM64)
186+
by replacing relative jump addresses with absolute ones.
187+
188+
TEXT: A text transform that uses a dictionary to replace common words with
189+
their dictionary index.
190+
191+
ROLZ: Reduced Offset Lempel-Ziv is an implementation of LZ that replaces match offsets
192+
with indexes, creating a more compact output with slower decoding speeds.
193+
194+
ROLZX: Extended ROLZ with more match searches and a more compact encoding.
195+
196+
SRT: Sorted Rank Transform is a transform that reduces entropy by assigning
197+
shorter symbols based on symbol frequency ranks. Usually used post-BWT.
198+
199+
MM: Multimedia transform is a fast transform that removes redundancy in correlated
200+
channels in some multimedia files (e.g., wav, pnm).
201+
202+
UTF: A fast transform replacing UTF-8 codewords with aliases based on frequencies.
203+
204+
PACK: A fast transform replacing unused symbols with aliases based on frequencies.
205+
206+
DNA: Same as PACK but triggered only when DNA data is detected.
207+
208+
209+
.SS "Entropy codecs"
210+
211+
Huffman: A fast implementation of canonical Huffman. Both encoder and decoder
212+
use code tables and multiple streams to improve performance.
213+
214+
RANGE: A fast implementation of a static range codec.
215+
216+
ANS: Based on Range Asymmetric Numeral Systems by Jarek Duda (specifically
217+
an implementation by Fabian Giesen). Works in a similar fashion to the Range
218+
codec but uses only one state instead of two, and encodes in reverse byte order.
219+
220+
FPAQ: A binary arithmetic codec based on FPAQ1 by Matt Mahoney. Uses a simple
221+
adaptive order 0 predictor based on frequencies.
222+
223+
CM: A binary arithmetic codec derived from BCM by Ilya Muravyov. Uses context
224+
mixing of counters to generate a prediction of the next bit value.
225+
226+
TPAQ: A binary arithmetic codec based initially on Tangelo 2.4 (itself derived
227+
from FPAQ8). Uses context mixing of predictions produced by one-layer
228+
neural networks. The initial code has been heavily tuned to improve
229+
compression ratio and speed. Slow but usually excellent compression ratio.
230+
231+
TPAQX: Extended TPAQ with more predictions and more memory usage. Slowest but
232+
usually the best compression ratio.
233+
234+
235+
.SH BUGS
236+
Report bugs at: https://github.com/flanglet/kanzi-cpp/issues
237+
.SH AUTHOR
238+
Frederic Langlet
239+
.SH REPORTING BUGS
240+
https://github.com/flanglet/kanzi-cpp

kanzi.1.gz

-3.65 KB
Binary file not shown.

src/Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,13 +270,13 @@ else
270270
install -d $(INSTALL_DIR)/bin
271271
install -m577 ../bin/$(APP)$(PROG_SUFFIX) $(INSTALL_DIR)/bin
272272
install -d $(MAN_DIR)
273-
if [ -f ../kanzi.1.gz ]; then \
274-
install -m 644 ../kanzi.1.gz $(MAN_DIR)/$(APP).1.gz; \
275-
elif [ -f ../kanzi.1 ]; then \
276-
gzip -n -c ../kanzi.1 > $(MAN_DIR)/$(APP).1.gz; \
273+
if [ -f ../doc/kanzi.1.gz ]; then \
274+
install -m 644 ../doc/kanzi.1.gz $(MAN_DIR)/$(APP).1.gz; \
275+
elif [ -f ../doc/kanzi.1 ]; then \
276+
gzip -n -c ../doc/kanzi.1 > $(MAN_DIR)/$(APP).1.gz; \
277277
chmod 644 $(MAN_DIR)/$(APP).1.gz; \
278278
else \
279-
echo "Error: missing ../kanzi.1.gz or ../kanzi.1"; \
279+
echo "Error: missing ../doc/kanzi.1.gz or ../doc/kanzi.1"; \
280280
exit 1; \
281281
fi
282282
endif

0 commit comments

Comments
 (0)