Natools

Check-in [c3f4498cb8]
Login
Overview
Comment:tools/smaz: allow Optimization_Round to reduce dictionary size
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: c3f4498cb891998c22e61fba7a9919253a3a1d8a
User & Date: nat on 2017-05-23 20:15:05
Other Links: manifest | tags
Context
2017-05-24
21:29
tools/smaz: add support for not adding worst word to pending list check-in: d41ee1b20d user: nat tags: trunk
2017-05-23
20:15
tools/smaz: allow Optimization_Round to reduce dictionary size check-in: c3f4498cb8 user: nat tags: trunk
2017-05-22
19:45
tools/smaz: add a dictionary Length accessor check-in: 46f6fa15e8 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [00e77a90e5] to [3c3e05eba8].

440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
440
441
442
443
444
445
446

447
448
449
450
451
452
453







-







         --  Convert the input into a dictionary given the option in Handler

   end Dictionary_Subprograms;



   package body Dictionary_Subprograms is
      pragma Unreferenced (Length);

      function Adjust_Dictionary
        (Handler : in Callback'Class;
         Dict : in Dictionary;
         Corpus : in String_Lists.List;
         Method : in Methods)
        return Dictionary is
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
583
584
585
586
587
588
589

590
591
592
593
594
595
596







-







         Input_Texts : in String_Lists.List;
         Job_Count : in Natural;
         Method : in Methods;
         Min_Dict_Size : in Positive;
         Max_Dict_Size : in Positive;
         Updated : out Boolean)
      is
         pragma Unreferenced (Min_Dict_Size);
         pragma Unreferenced (Max_Dict_Size);
         use type Ada.Streams.Stream_Element_Offset;

         No_Longer_Pending : String_Lists.Cursor;
         Log_Message : Ada.Strings.Unbounded.Unbounded_String;
         Original : constant Dictionary := Dict.Element;
         Worst_Index : constant Dictionary_Entry
635
636
637
638
639
640
641




























642
643
644
645
646
647
648
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







                     & " ("
                     & Ada.Streams.Stream_Element_Offset'Image
                        (Score - Old_Score)
                     & ')');
               end if;
            end;
         end loop;

         if Length (Base) >= Min_Dict_Size then
            declare
               New_Score : Ada.Streams.Stream_Element_Count;
               New_Counts : Dictionary_Counts;
            begin
               Evaluate_Dictionary
                 (Job_Count, Base, Input_Texts, New_Score, New_Counts);

               if New_Score <= Score then
                  Dict := Holders.To_Holder (Base);
                  Score := New_Score;
                  Counts := New_Counts;
                  No_Longer_Pending := String_Lists.No_Element;
                  Updated := True;
                  Log_Message := Ada.Strings.Unbounded.To_Unbounded_String
                    ("Removing"
                     & Worst_Count'Img & "x "
                     & Natools.String_Escapes.C_Escape_Hex (Worst_Value, True)
                     & ", size"
                     & Score'Img
                     & " ("
                     & Ada.Streams.Stream_Element_Offset'Image
                        (Score - Old_Score)
                     & ')');
               end if;
            end;
         end if;

         if Updated then
            if String_Lists.Has_Element (No_Longer_Pending) then
               Pending_Words.Delete (No_Longer_Pending);
            end if;

            Pending_Words.Append (Worst_Value);