Natools

Check-in [d98a8c25da]
Login
Overview
Comment:smaz: new package providing a simple compressor for small strings
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d98a8c25daddaeb899e0cdba038c409e1f709ef6
User & Date: nat on 2016-09-02 20:48:31
Other Links: manifest | tags
Context
2016-09-03
21:36
s_expressions: new primitive to update a descriptor until end of list check-in: 14fcb09484 user: nat tags: trunk
2016-09-02
20:48
smaz: new package providing a simple compressor for small strings check-in: d98a8c25da user: nat tags: trunk
2016-01-09
20:43
time_keys-tests: add test for leap second to reach full coverage check-in: a193bf89b9 user: nat tags: trunk
Changes

Added src/natools-smaz.adb version [84a6bd2551].


























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
------------------------------------------------------------------------------
-- Copyright (c) 2016, Natacha Porté                                        --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------

package body Natools.Smaz is

   use type Ada.Streams.Stream_Element_Offset;

   function Dict_Entry
     (Dict : in Dictionary;
      Index : in Ada.Streams.Stream_Element)
     return String
     with Pre => Index <= Dict.Dict_Last;

   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural);

   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String;


   ------------------------------
   -- Local Helper Subprograms --
   ------------------------------

   function Dict_Entry
     (Dict : in Dictionary;
      Index : in Ada.Streams.Stream_Element)
     return String
   is
      First : constant Positive := Dict.Offsets (Index);
      Last : Natural := Dict.Values'Last;
   begin
      if Index + 1 in Dict.Offsets'Range then
         Last := Dict.Offsets (Index + 1) - 1;
      end if;

      return Dict.Values (First .. Last);
   end Dict_Entry;


   procedure Find_Entry
     (Dict : in Dictionary;
      Template : in String;
      Index : out Ada.Streams.Stream_Element;
      Length : out Natural)
   is
      I : Ada.Streams.Stream_Element;
      N : Natural;
   begin
      Index := Ada.Streams.Stream_Element'Last;
      Length := 0;

      for Last in reverse Template'Range loop
         N := Dict.Hash (Template (Template'First .. Last));

         if N <= Natural (Dict.Dict_Last) then
            I := Ada.Streams.Stream_Element (N);
            if Dict_Entry (Dict, I) = Template (Template'First .. Last) then
               Index := I;
               Length := 1 + Last - Template'First;
               return;
            end if;
         end if;
      end loop;
   end Find_Entry;


   function To_String (Data : in Ada.Streams.Stream_Element_Array)
     return String is
   begin
      return Result : String (1 .. Data'Length) do
         for I in Result'Range loop
            Result (I) := Character'Val (Data
              (Data'First + Ada.Streams.Stream_Element_Offset (I - 1)));
         end loop;
      end return;
   end To_String;



   ----------------------
   -- Public Interface --
   ----------------------

   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count
   is
      Verbatim1_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last - Dict.Dict_Last)
         - Boolean'Pos (Dict.Variable_Length_Verbatim);
      Verbatim2_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last)
         + Verbatim1_Max_Size;
   begin
      if Dict.Variable_Length_Verbatim then
         return Ada.Streams.Stream_Element_Count (Input'Length
           + 2 * (Input'Length + Verbatim2_Max_Size - 1) / Verbatim2_Max_Size);
      else
         return Ada.Streams.Stream_Element_Count (Input'Length
           + (Input'Length + Verbatim1_Max_Size - 1) / Verbatim1_Max_Size);
      end if;
   end Compressed_Upper_Bound;


   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset)
   is
      procedure Find_Entry;

      Verbatim1_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last - Dict.Dict_Last)
         - Boolean'Pos (Dict.Variable_Length_Verbatim);
      Verbatim2_Max_Size : constant Natural
        := Natural (Ada.Streams.Stream_Element'Last)
         + Verbatim1_Max_Size;

      Input_Index : Positive := Input'First;
      Length : Natural;
      Word : Ada.Streams.Stream_Element;

      procedure Find_Entry is
      begin
         Find_Entry
           (Dict,
            Input (Input_Index
                   .. Natural'Min (Input_Index + Dict.Max_Word_Length - 1,
                                   Input'Last)),
            Word,
            Length);
      end Find_Entry;
   begin
      Output_Last := Output_Buffer'First - 1;
      Find_Entry;

      Main_Loop :
      while Input_Index in Input'Range loop
         Data_In_Dict :
         while Length > 0 loop
            Output_Last := Output_Last + 1;
            Output_Buffer (Output_Last) := Word;
            Input_Index := Input_Index + Length;
            exit Main_Loop when Input_Index not in Input'Range;
            Find_Entry;
         end loop Data_In_Dict;

         Verbatim_Block :
         declare
            Beginning : Positive := Input_Index;
            Verbatim_Length, Block_Length : Natural;
         begin
            Verbatim_Scan :
            while Length = 0 and Input_Index in Input'Range loop
               Input_Index := Input_Index + 1;
               Find_Entry;
            end loop Verbatim_Scan;

            Verbatim_Length := Input_Index - Beginning;

            Verbatim_Encode :
            while Verbatim_Length > 0 loop
               if Dict.Variable_Length_Verbatim
                 and then Verbatim_Length > Verbatim1_Max_Size
               then
                  Block_Length := Natural'Min
                    (Verbatim_Length, Verbatim2_Max_Size);
                  Output_Buffer (Output_Last + 1)
                    := Ada.Streams.Stream_Element'Last;
                  Output_Buffer (Output_Last + 2) := Ada.Streams.Stream_Element
                    (Block_Length - Verbatim1_Max_Size);
                  Output_Last := Output_Last + 2;
               else
                  Block_Length := Natural'Min
                    (Verbatim_Length, Verbatim1_Max_Size);
                  Output_Last := Output_Last + 1;
                  Output_Buffer (Output_Last)
                    := Ada.Streams.Stream_Element'Last
                     - Ada.Streams.Stream_Element
                        (Block_Length - 1
                          + Boolean'Pos (Dict.Variable_Length_Verbatim));
               end if;

               Verbatim_Copy :
               for I in Beginning .. Beginning + Block_Length - 1 loop
                  Output_Last := Output_Last + 1;
                  Output_Buffer (Output_Last) := Character'Pos (Input (I));
               end loop Verbatim_Copy;

               Verbatim_Length := Verbatim_Length - Block_Length;
               Beginning := Beginning + Block_Length;
            end loop Verbatim_Encode;
         end Verbatim_Block;
      end loop Main_Loop;
   end Compress;


   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural
   is
      Result : Natural := 0;
      Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset
        := Ada.Streams.Stream_Element_Offset
           (Ada.Streams.Stream_Element'Last - Dict.Dict_Last);
      Input_Index : Ada.Streams.Stream_Element_Offset := Input'First;
      Input_Byte : Ada.Streams.Stream_Element;
      Verbatim_Length : Ada.Streams.Stream_Element_Offset;
   begin
      while Input_Index in Input'Range loop
         Input_Byte := Input (Input_Index);

         if Input_Byte in Dict.Offsets'Range then
            Result := Result + Dict_Entry (Dict, Input_Byte)'Length;
            Input_Index := Input_Index + 1;
         else
            if not Dict.Variable_Length_Verbatim then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte) + 1;
            elsif Input_Byte < Ada.Streams.Stream_Element'Last then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte);
            else
               Input_Index := Input_Index + 1;
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Input (Input_Index)) + Verbatim_Code_Count - 1;
            end if;

            Result := Result + Positive (Verbatim_Length);
            Input_Index := Input_Index + Verbatim_Length + 1;
         end if;
      end loop;

      return Result;
   end Decompressed_Length;


   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural)
   is
      procedure Append (S : in String);
      procedure Append (S : in Ada.Streams.Stream_Element_Array);

      procedure Append (S : in String) is
      begin
         Output_Buffer (Output_Last + 1 .. Output_Last + S'Length) := S;
         Output_Last := Output_Last + S'Length;
      end Append;

      procedure Append (S : in Ada.Streams.Stream_Element_Array) is
      begin
         Append (To_String (S));
      end Append;

      Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset
        := Ada.Streams.Stream_Element_Offset
           (Ada.Streams.Stream_Element'Last - Dict.Dict_Last);

      Input_Index : Ada.Streams.Stream_Element_Offset := Input'First;
      Input_Byte : Ada.Streams.Stream_Element;
      Verbatim_Length : Ada.Streams.Stream_Element_Offset;
   begin
      Output_Last := Output_Buffer'First - 1;

      while Input_Index in Input'Range loop
         Input_Byte := Input (Input_Index);

         if Input_Byte in Dict.Offsets'Range then
            Append (Dict_Entry (Dict, Input_Byte));
            Input_Index := Input_Index + 1;
         else
            if not Dict.Variable_Length_Verbatim then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte) + 1;
            elsif Input_Byte < Ada.Streams.Stream_Element'Last then
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Ada.Streams.Stream_Element'Last - Input_Byte);
            else
               Input_Index := Input_Index + 1;
               Verbatim_Length := Ada.Streams.Stream_Element_Offset
                 (Input (Input_Index)) + Verbatim_Code_Count - 1;
            end if;

            Append (Input (Input_Index + 1 .. Input_Index + Verbatim_Length));
            Input_Index := Input_Index + Verbatim_Length + 1;
         end if;
      end loop;
   end Decompress;

end Natools.Smaz;

Added src/natools-smaz.ads version [13c2fce4a1].






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
------------------------------------------------------------------------------
-- Copyright (c) 2016, Natacha Porté                                        --
--                                                                          --
-- Permission to use, copy, modify, and distribute this software for any    --
-- purpose with or without fee is hereby granted, provided that the above   --
-- copyright notice and this permission notice appear in all copies.        --
--                                                                          --
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES --
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF         --
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  --
-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   --
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN    --
-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  --
-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.           --
------------------------------------------------------------------------------

with Ada.Streams;

package Natools.Smaz is
   pragma Pure (Natools.Smaz);

   use type Ada.Streams.Stream_Element;

   type Offset_Array is
     array (Ada.Streams.Stream_Element range <>) of Positive;

   type Dictionary
     (Dict_Last : Ada.Streams.Stream_Element;
      String_Size : Natural)
   is record
      Variable_Length_Verbatim : Boolean;
      Max_Word_Length : Positive;
      Offsets : Offset_Array (0 .. Dict_Last);
      Values : String (1 .. String_Size);
      Hash : not null access function (Value : String) return Natural;
   end record with
      Dynamic_Predicate => (for all I in Dictionary.Offsets'Range
         => Dictionary.Offsets (I) in Dictionary.Values'Range
            and then ((if I = Dictionary.Offsets'Last
                        then Dictionary.Values'Last + 1
                        else Dictionary.Offsets (I + 1))
                      - Dictionary.Offsets (I)
                  in 1 .. Dictionary.Max_Word_Length));


   function Compressed_Upper_Bound
     (Dict : in Dictionary;
      Input : in String)
     return Ada.Streams.Stream_Element_Count;

   procedure Compress
     (Dict : in Dictionary;
      Input : in String;
      Output_Buffer : out Ada.Streams.Stream_Element_Array;
      Output_Last : out Ada.Streams.Stream_Element_Offset);


   function Decompressed_Length
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array)
     return Natural;

   procedure Decompress
     (Dict : in Dictionary;
      Input : in Ada.Streams.Stream_Element_Array;
      Output_Buffer : out String;
      Output_Last : out Natural);

end Natools.Smaz;