10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. --
------------------------------------------------------------------------------
------------------------------------------------------------------------------
-- Natools.Smaz is a re-implementation of the short string compression --
-- algorithm "Smaz" by Salvatore Sanfilippo --
-- (see https://github.com/antirez/smaz). --
-- Its main selling point is its simplicity and CPU performance. However --
-- the implementation here emphasizes correctness (which greatly benefits --
-- from simplicity) over performance (so no benchmarks have been made). --
-- --
-- The basic idea behind the algorithm is that bytes in the encoded (and --
-- hopefully compressed) message are indexes in a static compiled-in --
-- dictionary, and two special byte values to mark verbatim data. --
-- --
-- For example, using original Smaz dictionary, the string "Athe33" is --
-- encoded as (254, 65, 1, 255, 1, 51, 51), which can be broken down as: --
-- * 254 to mark the following byte as verbatim --
-- * 65 which is verbatim byte for 'A' --
-- * 1 to mark the second word in the dictionary: "the" --
-- * 255 to mark variable-length verbatim escape --
-- * 1 to encoding the length of the verbatim fragment: 2 bytes --
-- * 51, 51 the verbatim bytes for "33". --
-- --
-- Note that the encoder has been improved over the original Smaz encoder, --
-- in that it merges adjacent verbatim fragments when it makes the output --
-- smaller. For example, with the input 5-byte string "33 33", the original --
-- naive encoder would produce the 9-byte output --
-- (255, 1, 51, 51, 0, 255, 1, 51, 51), while encoder here would encode the --
-- whole string in a single verbatim fragment, leading to the 7-byte output --
-- (255, 4, 51, 51, 32, 51, 51). --
------------------------------------------------------------------------------
with Ada.Streams;
package Natools.Smaz is
pragma Pure (Natools.Smaz);
use type Ada.Streams.Stream_Element;
|
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
+
+
+
+
+
+
|
in 1 .. Dictionary.Max_Word_Length));
function Compressed_Upper_Bound
(Dict : in Dictionary;
Input : in String)
return Ada.Streams.Stream_Element_Count;
-- Return the maximum number of bytes needed to encode Input
procedure Compress
(Dict : in Dictionary;
Input : in String;
Output_Buffer : out Ada.Streams.Stream_Element_Array;
Output_Last : out Ada.Streams.Stream_Element_Offset);
-- Encode Input into Output_Buffer
function Compress (Dict : in Dictionary; Input : in String)
return Ada.Streams.Stream_Element_Array;
-- Return an encoded buffer for Input
function Decompressed_Length
(Dict : in Dictionary;
Input : in Ada.Streams.Stream_Element_Array)
return Natural;
-- Return the exact length when Input is decoded
procedure Decompress
(Dict : in Dictionary;
Input : in Ada.Streams.Stream_Element_Array;
Output_Buffer : out String;
Output_Last : out Natural);
-- Decode Input into Output_Buffer
function Decompress
(Dict : in Dictionary; Input : in Ada.Streams.Stream_Element_Array)
return String;
-- Return a decoded buffer for Input
end Natools.Smaz;
|