ADDED src/natools-smaz.adb Index: src/natools-smaz.adb ================================================================== --- src/natools-smaz.adb +++ src/natools-smaz.adb @@ -0,0 +1,313 @@ +------------------------------------------------------------------------------ +-- Copyright (c) 2016, Natacha Porté -- +-- -- +-- Permission to use, copy, modify, and distribute this software for any -- +-- purpose with or without fee is hereby granted, provided that the above -- +-- copyright notice and this permission notice appear in all copies. -- +-- -- +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -- +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -- +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -- +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- +------------------------------------------------------------------------------ + +package body Natools.Smaz is + + use type Ada.Streams.Stream_Element_Offset; + + function Dict_Entry + (Dict : in Dictionary; + Index : in Ada.Streams.Stream_Element) + return String + with Pre => Index <= Dict.Dict_Last; + + procedure Find_Entry + (Dict : in Dictionary; + Template : in String; + Index : out Ada.Streams.Stream_Element; + Length : out Natural); + + function To_String (Data : in Ada.Streams.Stream_Element_Array) + return String; + + + ------------------------------ + -- Local Helper Subprograms -- + ------------------------------ + + function Dict_Entry + (Dict : in Dictionary; + Index : in Ada.Streams.Stream_Element) + return String + is + First : constant Positive := Dict.Offsets (Index); + Last : Natural := Dict.Values'Last; + begin + if Index + 1 in Dict.Offsets'Range then + Last := Dict.Offsets (Index + 1) - 1; + end if; + + return Dict.Values (First .. Last); + end Dict_Entry; + + + procedure Find_Entry + (Dict : in Dictionary; + Template : in String; + Index : out Ada.Streams.Stream_Element; + Length : out Natural) + is + I : Ada.Streams.Stream_Element; + N : Natural; + begin + Index := Ada.Streams.Stream_Element'Last; + Length := 0; + + for Last in reverse Template'Range loop + N := Dict.Hash (Template (Template'First .. Last)); + + if N <= Natural (Dict.Dict_Last) then + I := Ada.Streams.Stream_Element (N); + if Dict_Entry (Dict, I) = Template (Template'First .. Last) then + Index := I; + Length := 1 + Last - Template'First; + return; + end if; + end if; + end loop; + end Find_Entry; + + + function To_String (Data : in Ada.Streams.Stream_Element_Array) + return String is + begin + return Result : String (1 .. Data'Length) do + for I in Result'Range loop + Result (I) := Character'Val (Data + (Data'First + Ada.Streams.Stream_Element_Offset (I - 1))); + end loop; + end return; + end To_String; + + + + ---------------------- + -- Public Interface -- + ---------------------- + + function Compressed_Upper_Bound + (Dict : in Dictionary; + Input : in String) + return Ada.Streams.Stream_Element_Count + is + Verbatim1_Max_Size : constant Natural + := Natural (Ada.Streams.Stream_Element'Last - Dict.Dict_Last) + - Boolean'Pos (Dict.Variable_Length_Verbatim); + Verbatim2_Max_Size : constant Natural + := Natural (Ada.Streams.Stream_Element'Last) + + Verbatim1_Max_Size; + begin + if Dict.Variable_Length_Verbatim then + return Ada.Streams.Stream_Element_Count (Input'Length + + 2 * (Input'Length + Verbatim2_Max_Size - 1) / Verbatim2_Max_Size); + else + return Ada.Streams.Stream_Element_Count (Input'Length + + (Input'Length + Verbatim1_Max_Size - 1) / Verbatim1_Max_Size); + end if; + end Compressed_Upper_Bound; + + + procedure Compress + (Dict : in Dictionary; + Input : in String; + Output_Buffer : out Ada.Streams.Stream_Element_Array; + Output_Last : out Ada.Streams.Stream_Element_Offset) + is + procedure Find_Entry; + + Verbatim1_Max_Size : constant Natural + := Natural (Ada.Streams.Stream_Element'Last - Dict.Dict_Last) + - Boolean'Pos (Dict.Variable_Length_Verbatim); + Verbatim2_Max_Size : constant Natural + := Natural (Ada.Streams.Stream_Element'Last) + + Verbatim1_Max_Size; + + Input_Index : Positive := Input'First; + Length : Natural; + Word : Ada.Streams.Stream_Element; + + procedure Find_Entry is + begin + Find_Entry + (Dict, + Input (Input_Index + .. Natural'Min (Input_Index + Dict.Max_Word_Length - 1, + Input'Last)), + Word, + Length); + end Find_Entry; + begin + Output_Last := Output_Buffer'First - 1; + Find_Entry; + + Main_Loop : + while Input_Index in Input'Range loop + Data_In_Dict : + while Length > 0 loop + Output_Last := Output_Last + 1; + Output_Buffer (Output_Last) := Word; + Input_Index := Input_Index + Length; + exit Main_Loop when Input_Index not in Input'Range; + Find_Entry; + end loop Data_In_Dict; + + Verbatim_Block : + declare + Beginning : Positive := Input_Index; + Verbatim_Length, Block_Length : Natural; + begin + Verbatim_Scan : + while Length = 0 and Input_Index in Input'Range loop + Input_Index := Input_Index + 1; + Find_Entry; + end loop Verbatim_Scan; + + Verbatim_Length := Input_Index - Beginning; + + Verbatim_Encode : + while Verbatim_Length > 0 loop + if Dict.Variable_Length_Verbatim + and then Verbatim_Length > Verbatim1_Max_Size + then + Block_Length := Natural'Min + (Verbatim_Length, Verbatim2_Max_Size); + Output_Buffer (Output_Last + 1) + := Ada.Streams.Stream_Element'Last; + Output_Buffer (Output_Last + 2) := Ada.Streams.Stream_Element + (Block_Length - Verbatim1_Max_Size); + Output_Last := Output_Last + 2; + else + Block_Length := Natural'Min + (Verbatim_Length, Verbatim1_Max_Size); + Output_Last := Output_Last + 1; + Output_Buffer (Output_Last) + := Ada.Streams.Stream_Element'Last + - Ada.Streams.Stream_Element + (Block_Length - 1 + + Boolean'Pos (Dict.Variable_Length_Verbatim)); + end if; + + Verbatim_Copy : + for I in Beginning .. Beginning + Block_Length - 1 loop + Output_Last := Output_Last + 1; + Output_Buffer (Output_Last) := Character'Pos (Input (I)); + end loop Verbatim_Copy; + + Verbatim_Length := Verbatim_Length - Block_Length; + Beginning := Beginning + Block_Length; + end loop Verbatim_Encode; + end Verbatim_Block; + end loop Main_Loop; + end Compress; + + + function Decompressed_Length + (Dict : in Dictionary; + Input : in Ada.Streams.Stream_Element_Array) + return Natural + is + Result : Natural := 0; + Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset + := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Dict.Dict_Last); + Input_Index : Ada.Streams.Stream_Element_Offset := Input'First; + Input_Byte : Ada.Streams.Stream_Element; + Verbatim_Length : Ada.Streams.Stream_Element_Offset; + begin + while Input_Index in Input'Range loop + Input_Byte := Input (Input_Index); + + if Input_Byte in Dict.Offsets'Range then + Result := Result + Dict_Entry (Dict, Input_Byte)'Length; + Input_Index := Input_Index + 1; + else + if not Dict.Variable_Length_Verbatim then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte) + 1; + elsif Input_Byte < Ada.Streams.Stream_Element'Last then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte); + else + Input_Index := Input_Index + 1; + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Input (Input_Index)) + Verbatim_Code_Count - 1; + end if; + + Result := Result + Positive (Verbatim_Length); + Input_Index := Input_Index + Verbatim_Length + 1; + end if; + end loop; + + return Result; + end Decompressed_Length; + + + procedure Decompress + (Dict : in Dictionary; + Input : in Ada.Streams.Stream_Element_Array; + Output_Buffer : out String; + Output_Last : out Natural) + is + procedure Append (S : in String); + procedure Append (S : in Ada.Streams.Stream_Element_Array); + + procedure Append (S : in String) is + begin + Output_Buffer (Output_Last + 1 .. Output_Last + S'Length) := S; + Output_Last := Output_Last + S'Length; + end Append; + + procedure Append (S : in Ada.Streams.Stream_Element_Array) is + begin + Append (To_String (S)); + end Append; + + Verbatim_Code_Count : constant Ada.Streams.Stream_Element_Offset + := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Dict.Dict_Last); + + Input_Index : Ada.Streams.Stream_Element_Offset := Input'First; + Input_Byte : Ada.Streams.Stream_Element; + Verbatim_Length : Ada.Streams.Stream_Element_Offset; + begin + Output_Last := Output_Buffer'First - 1; + + while Input_Index in Input'Range loop + Input_Byte := Input (Input_Index); + + if Input_Byte in Dict.Offsets'Range then + Append (Dict_Entry (Dict, Input_Byte)); + Input_Index := Input_Index + 1; + else + if not Dict.Variable_Length_Verbatim then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte) + 1; + elsif Input_Byte < Ada.Streams.Stream_Element'Last then + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Ada.Streams.Stream_Element'Last - Input_Byte); + else + Input_Index := Input_Index + 1; + Verbatim_Length := Ada.Streams.Stream_Element_Offset + (Input (Input_Index)) + Verbatim_Code_Count - 1; + end if; + + Append (Input (Input_Index + 1 .. Input_Index + Verbatim_Length)); + Input_Index := Input_Index + Verbatim_Length + 1; + end if; + end loop; + end Decompress; + +end Natools.Smaz; ADDED src/natools-smaz.ads Index: src/natools-smaz.ads ================================================================== --- src/natools-smaz.ads +++ src/natools-smaz.ads @@ -0,0 +1,69 @@ +------------------------------------------------------------------------------ +-- Copyright (c) 2016, Natacha Porté -- +-- -- +-- Permission to use, copy, modify, and distribute this software for any -- +-- purpose with or without fee is hereby granted, provided that the above -- +-- copyright notice and this permission notice appear in all copies. -- +-- -- +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -- +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -- +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -- +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -- +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -- +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- +------------------------------------------------------------------------------ + +with Ada.Streams; + +package Natools.Smaz is + pragma Pure (Natools.Smaz); + + use type Ada.Streams.Stream_Element; + + type Offset_Array is + array (Ada.Streams.Stream_Element range <>) of Positive; + + type Dictionary + (Dict_Last : Ada.Streams.Stream_Element; + String_Size : Natural) + is record + Variable_Length_Verbatim : Boolean; + Max_Word_Length : Positive; + Offsets : Offset_Array (0 .. Dict_Last); + Values : String (1 .. String_Size); + Hash : not null access function (Value : String) return Natural; + end record with + Dynamic_Predicate => (for all I in Dictionary.Offsets'Range + => Dictionary.Offsets (I) in Dictionary.Values'Range + and then ((if I = Dictionary.Offsets'Last + then Dictionary.Values'Last + 1 + else Dictionary.Offsets (I + 1)) + - Dictionary.Offsets (I) + in 1 .. Dictionary.Max_Word_Length)); + + + function Compressed_Upper_Bound + (Dict : in Dictionary; + Input : in String) + return Ada.Streams.Stream_Element_Count; + + procedure Compress + (Dict : in Dictionary; + Input : in String; + Output_Buffer : out Ada.Streams.Stream_Element_Array; + Output_Last : out Ada.Streams.Stream_Element_Offset); + + + function Decompressed_Length + (Dict : in Dictionary; + Input : in Ada.Streams.Stream_Element_Array) + return Natural; + + procedure Decompress + (Dict : in Dictionary; + Input : in Ada.Streams.Stream_Element_Array; + Output_Buffer : out String; + Output_Last : out Natural); + +end Natools.Smaz;