Natools

Check-in [bc5e6e89ab]
Login
Overview
Comment:tools/smaz: new command line option to set built dictionary size
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bc5e6e89ab9584b1c428459bc47925e54acd9024
User & Date: nat on 2016-11-05 20:02:52
Other Links: manifest | tags
Context
2016-11-06
20:42
tools/smaz: add options to select variable-length verbatim codes check-in: 2365190245 user: nat tags: trunk
2016-11-05
20:02
tools/smaz: new command line option to set built dictionary size check-in: bc5e6e89ab user: nat tags: trunk
2016-11-04
22:42
tools/smaz: fix the letter of latest command line option check-in: 03959c30d5 user: nat tags: trunk
Changes

Modified tools/smaz.adb from [ddd4d7f220] to [c48a6ca8b4].

71
72
73
74
75
76
77

78
79
80
81
82
83
84
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85







+







         Filter_Threshold,
         Output_Hash,
         Job_Count,
         Help,
         Sx_Dict_Output,
         Min_Sub_Size,
         Max_Sub_Size,
         Dict_Size,
         Max_Pending,
         Stat_Output,
         No_Stat_Output,
         Text_List_Input,
         Fast_Text_Input,
         Max_Word_Size,
         Sx_Output,
93
94
95
96
97
98
99

100
101
102
103
104
105
106
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108







+







      Need_Dictionary : Boolean := False;
      Stat_Output : Boolean := False;
      Sx_Output : Boolean := False;
      Sx_Dict_Output : Boolean := False;
      Min_Sub_Size : Positive := 1;
      Max_Sub_Size : Positive := 3;
      Max_Word_Size : Positive := 10;
      Dict_Size : Positive := 254;
      Max_Pending : Ada.Containers.Count_Type
        := Ada.Containers.Count_Type'Last;
      Job_Count : Natural := 0;
      Filter_Threshold : Natools.Smaz.Tools.String_Count := 0;
      Score_Method : Methods.Enum := Methods.Encoded;
      Action : Actions.Enum := Actions.Nothing;
      Ada_Dictionary : Ada.Strings.Unbounded.Unbounded_String;
313
314
315
316
317
318
319



320
321
322
323
324
325
326
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331







+
+
+







              := Natools.Smaz.Tools.String_Count'Value (Argument);

         when Options.Score_Method =>
            Handler.Score_Method := Methods.Enum'Value (Argument);

         when Options.Max_Pending =>
            Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument);

         when Options.Dict_Size =>
            Handler.Dict_Size := Positive'Value (Argument);
      end case;
   end Option;


   procedure Evaluate_Dictionary
     (Job_Count : in Natural;
      Dict : in Natools.Smaz.Dictionary;
370
371
372
373
374
375
376

377
378
379
380
381
382
383
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389







+







      R.Add_Option ("filter",        'F', Required_Argument, Filter_Threshold);
      R.Add_Option ("help",          'h', No_Argument,       Help);
      R.Add_Option ("hash-pkg",      'H', Required_Argument, Output_Hash);
      R.Add_Option ("jobs",          'j', Required_Argument, Job_Count);
      R.Add_Option ("sx-dict",       'L', No_Argument,       Sx_Dict_Output);
      R.Add_Option ("min-substring", 'm', Required_Argument, Min_Sub_Size);
      R.Add_Option ("max-substring", 'M', Required_Argument, Max_Sub_Size);
      R.Add_Option ("dict-size",     'n', Required_Argument, Dict_Size);
      R.Add_Option ("max-pending",   'N', Required_Argument, Max_Pending);
      R.Add_Option ("stats",         's', No_Argument,       Stat_Output);
      R.Add_Option ("no-stats",      'S', No_Argument,       No_Stat_Output);
      R.Add_Option ("text-list",     't', No_Argument,       Text_List_Input);
      R.Add_Option ("fast-text-list", 'T', No_Argument,       Fast_Text_Input);
      R.Add_Option ("max-word-len",  'W', Required_Argument, Max_Word_Size);
      R.Add_Option ("s-expr",        'x', No_Argument,       Sx_Output);
749
750
751
752
753
754
755





756
757
758
759
760
761
762
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773







+
+
+
+
+







                 & " during optimization");

            when Options.Max_Pending =>
               Put_Line (Output, " <count>");
               Put_Line (Output, Indent & Indent
                 & "Maximum size of candidate list"
                 & " when building a dictionary");

            when Options.Dict_Size =>
               Put_Line (Output, " <count>");
               Put_Line (Output, Indent & Indent
                 & "Number of words in the dictionary to build");
         end case;
      end loop;
   end Print_Help;


   function To_Dictionary
     (Handler : in Callback'Class;
791
792
793
794
795
796
797




798

799
800
801
802
803
804
805
802
803
804
805
806
807
808
809
810
811
812

813
814
815
816
817
818
819
820







+
+
+
+
-
+







               end if;

               if Handler.Dict_Source = Dict_Sources.Text_List then
                  declare
                     Selected, Pending : Natools.Smaz.Tools.String_Lists.List;
                  begin
                     Natools.Smaz.Tools.Simple_Dictionary_And_Pending
                       (Counter,
                        Handler.Dict_Size,
                        Selected,
                        Pending,
                       (Counter, 254, Selected, Pending, Handler.Max_Pending);
                        Handler.Max_Pending);

                     return Optimize_Dictionary
                       (Natools.Smaz.Tools.To_Dictionary (Selected, True),
                        Pending,
                        Input,
                        Handler.Job_Count,
                        Handler.Score_Method);