Overview
Comment: | tools/smaz: add command-line options for min and max dictionary size |
---|---|
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ecae1e85f47d03ea91382dbd78af2fd7 |
User & Date: | nat on 2017-05-19 21:04:57 |
Other Links: | manifest | tags |
Context
2017-05-20
| ||
19:25 | tools/smaz: move log-message construction of Optimization_Round check-in: 032d847343 user: nat tags: trunk | |
2017-05-19
| ||
21:04 | tools/smaz: add command-line options for min and max dictionary size check-in: ecae1e85f4 user: nat tags: trunk | |
2017-05-18
| ||
21:13 | tools/smaz: implement forced words for optimized dictionary generation check-in: 014ca1d01b user: nat tags: trunk | |
Changes
Modified tools/smaz.adb from [4b4875a3ad] to [309a261406].
︙ | ︙ | |||
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | No_Stat_Output, Text_List_Input, Fast_Text_Input, Max_Word_Size, Sx_Output, No_Sx_Output, Force_Word, No_Vlen_Verbatim, Score_Method, Vlen_Verbatim); end Options; package Getopt is new Natools.Getopt_Long (Options.Id); type Callback is new Getopt.Handlers.Callback with record Algorithm : Algorithms.Enum := Algorithms.Base_256; Display_Help : Boolean := False; Need_Dictionary : Boolean := False; Stat_Output : Boolean := False; Sx_Output : Boolean := False; Sx_Dict_Output : Boolean := False; Min_Sub_Size : Positive := 1; Max_Sub_Size : Positive := 3; Max_Word_Size : Positive := 10; | > > | > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | No_Stat_Output, Text_List_Input, Fast_Text_Input, Max_Word_Size, Sx_Output, No_Sx_Output, Force_Word, Max_Dict_Size, Min_Dict_Size, No_Vlen_Verbatim, Score_Method, Vlen_Verbatim); end Options; package Getopt is new Natools.Getopt_Long (Options.Id); type Callback is new Getopt.Handlers.Callback with record Algorithm : Algorithms.Enum := Algorithms.Base_256; Display_Help : Boolean := False; Need_Dictionary : Boolean := False; Stat_Output : Boolean := False; Sx_Output : Boolean := False; Sx_Dict_Output : Boolean := False; Min_Sub_Size : Positive := 1; Max_Sub_Size : Positive := 3; Max_Word_Size : Positive := 10; Max_Dict_Size : Positive := 254; Min_Dict_Size : Positive := 254; Vlen_Verbatim : Boolean := True; Max_Pending : Ada.Containers.Count_Type := Ada.Containers.Count_Type'Last; Job_Count : Natural := 0; Filter_Threshold : Natools.Smaz_Tools.String_Count := 0; Score_Method : Methods.Enum := Methods.Encoded; Action : Actions.Enum := Actions.Nothing; |
︙ | ︙ | |||
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts; First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. function Optimize_Dictionary (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; | > > | > > | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts; First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Min_Dict_Size : in Positive; Max_Dict_Size : in Positive; Updated : out Boolean); -- Try to improve on Dict by replacing a single entry from it with -- one of the substring in Pending_Words. function Optimize_Dictionary (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Min_Dict_Size : in Positive; Max_Dict_Size : in Positive) return Dictionary; -- Optimize the dictionary on Input_Texts, starting with Base and -- adding substrings from Pending_Words. Operates only on words -- at First and beyond. procedure Parallel_Evaluate_Dictionary (Job_Count : in Positive; |
︙ | ︙ | |||
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts; First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Updated : out Boolean) is use type Ada.Streams.Stream_Element_Offset; New_Value : Ada.Strings.Unbounded.Unbounded_String; New_Position : String_Lists.Cursor; Worst_Index : constant Dictionary_Entry := Worst_Element (Dict.Element, Counts, Method, First, Last_Code (Dict.Element)); | > > > > | 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 | Score : in out Ada.Streams.Stream_Element_Count; Counts : in out Dictionary_Counts; First : in Dictionary_Entry; Pending_Words : in out String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Min_Dict_Size : in Positive; Max_Dict_Size : in Positive; Updated : out Boolean) is pragma Unreferenced (Min_Dict_Size); pragma Unreferenced (Max_Dict_Size); use type Ada.Streams.Stream_Element_Offset; New_Value : Ada.Strings.Unbounded.Unbounded_String; New_Position : String_Lists.Cursor; Worst_Index : constant Dictionary_Entry := Worst_Element (Dict.Element, Counts, Method, First, Last_Code (Dict.Element)); |
︙ | ︙ | |||
628 629 630 631 632 633 634 | function Optimize_Dictionary (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; | | > > | 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 | function Optimize_Dictionary (Base : in Dictionary; First : in Dictionary_Entry; Pending_Words : in String_Lists.List; Input_Texts : in String_Lists.List; Job_Count : in Natural; Method : in Methods; Min_Dict_Size : in Positive; Max_Dict_Size : in Positive) return Dictionary is Holder : Holders.Holder := Holders.To_Holder (Base); Pending : String_Lists.List := Pending_Words; Score : Ada.Streams.Stream_Element_Count; Counts : Dictionary_Counts; Running : Boolean := True; |
︙ | ︙ | |||
650 651 652 653 654 655 656 657 658 659 660 661 662 663 | Score, Counts, First, Pending, Input_Texts, Job_Count, Method, Running); end loop; return Holder.Element; end Optimize_Dictionary; | > > | 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 | Score, Counts, First, Pending, Input_Texts, Job_Count, Method, Min_Dict_Size, Max_Dict_Size, Running); end loop; return Holder.Element; end Optimize_Dictionary; |
︙ | ︙ | |||
1095 1096 1097 1098 1099 1100 1101 | To_Dictionary (Input, Handler.Vlen_Verbatim), Data_List, Method); when Dict_Sources.Text_List => declare Needed : constant Integer | | | < | < > | 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 | To_Dictionary (Input, Handler.Vlen_Verbatim), Data_List, Method); when Dict_Sources.Text_List => declare Needed : constant Integer := Handler.Max_Dict_Size - Natural (Handler.Forced_Words.Length); Selected, Pending : String_Lists.List; First : Dictionary_Entry := Dictionary_Entry'First; begin if Needed <= 0 then for Word of reverse Handler.Forced_Words loop Selected.Prepend (Word); exit when Positive (Selected.Length) = Handler.Max_Dict_Size; end loop; return To_Dictionary (Selected, Handler.Vlen_Verbatim); end if; Simple_Dictionary_And_Pending (Make_Word_Counter (Handler, Input), Needed, Selected, Pending, |
︙ | ︙ | |||
1129 1130 1131 1132 1133 1134 1135 | return Optimize_Dictionary (To_Dictionary (Selected, Handler.Vlen_Verbatim), First, Pending, Input, Handler.Job_Count, | | > > | | | 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 | return Optimize_Dictionary (To_Dictionary (Selected, Handler.Vlen_Verbatim), First, Pending, Input, Handler.Job_Count, Method, Handler.Min_Dict_Size, Handler.Max_Dict_Size); end; when Dict_Sources.Unoptimized_Text_List => declare Needed : constant Integer := Handler.Max_Dict_Size - Natural (Handler.Forced_Words.Length); All_Words : String_Lists.List; begin if Needed > 0 then All_Words := Simple_Dictionary (Make_Word_Counter (Handler, Input), Needed, Method); for Word of reverse Handler.Forced_Words loop All_Words.Prepend (Word); end loop; else for Word of reverse Handler.Forced_Words loop All_Words.Prepend (Word); exit when Positive (All_Words.Length) >= Handler.Max_Dict_Size; end loop; end if; return To_Dictionary (All_Words, Handler.Vlen_Verbatim); end; end case; end To_Dictionary; |
︙ | ︙ | |||
1377 1378 1379 1380 1381 1382 1383 | when Options.Score_Method => Handler.Score_Method := Methods.Enum'Value (Argument); when Options.Max_Pending => Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument); when Options.Dict_Size => | | > | 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 | when Options.Score_Method => Handler.Score_Method := Methods.Enum'Value (Argument); when Options.Max_Pending => Handler.Max_Pending := Ada.Containers.Count_Type'Value (Argument); when Options.Dict_Size => Handler.Min_Dict_Size := Positive'Value (Argument); Handler.Max_Dict_Size := Positive'Value (Argument); when Options.Vlen_Verbatim => Handler.Vlen_Verbatim := True; when Options.No_Vlen_Verbatim => Handler.Vlen_Verbatim := False; |
︙ | ︙ | |||
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 | Handler.Need_Dictionary := True; Handler.Forced_Words.Append (Argument); if Handler.Action in Actions.Nothing then Handler.Action := Actions.Adjust_Dictionary; end if; end if; end case; end Option; function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary) return Natools.Smaz_256.Dictionary is | > > > > > > | 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 | Handler.Need_Dictionary := True; Handler.Forced_Words.Append (Argument); if Handler.Action in Actions.Nothing then Handler.Action := Actions.Adjust_Dictionary; end if; end if; when Options.Max_Dict_Size => Handler.Max_Dict_Size := Positive'Value (Argument); when Options.Min_Dict_Size => Handler.Min_Dict_Size := Positive'Value (Argument); end case; end Option; function Activate_Dictionary (Dict : in Natools.Smaz_256.Dictionary) return Natools.Smaz_256.Dictionary is |
︙ | ︙ | |||
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 | R.Add_Option ("no-stats", 'S', No_Argument, No_Stat_Output); R.Add_Option ("text-list", 't', No_Argument, Text_List_Input); R.Add_Option ("fast-text-list", 'T', No_Argument, Fast_Text_Input); R.Add_Option ("max-word-len", 'W', Required_Argument, Max_Word_Size); R.Add_Option ("s-expr", 'x', No_Argument, Sx_Output); R.Add_Option ("no-s-expr", 'X', No_Argument, No_Sx_Output); R.Add_Option ("force-word", Required_Argument, Force_Word); R.Add_Option ("no-vlen-verbatim", No_Argument, No_Vlen_Verbatim); R.Add_Option ("score-method", Required_Argument, Score_Method); R.Add_Option ("vlen-verbatim", No_Argument, Vlen_Verbatim); return R; end Getopt_Config; | > > | 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 | R.Add_Option ("no-stats", 'S', No_Argument, No_Stat_Output); R.Add_Option ("text-list", 't', No_Argument, Text_List_Input); R.Add_Option ("fast-text-list", 'T', No_Argument, Fast_Text_Input); R.Add_Option ("max-word-len", 'W', Required_Argument, Max_Word_Size); R.Add_Option ("s-expr", 'x', No_Argument, Sx_Output); R.Add_Option ("no-s-expr", 'X', No_Argument, No_Sx_Output); R.Add_Option ("force-word", Required_Argument, Force_Word); R.Add_Option ("max-dict-size", Required_Argument, Max_Dict_Size); R.Add_Option ("min-dict-size", Required_Argument, Min_Dict_Size); R.Add_Option ("no-vlen-verbatim", No_Argument, No_Vlen_Verbatim); R.Add_Option ("score-method", Required_Argument, Score_Method); R.Add_Option ("vlen-verbatim", No_Argument, Vlen_Verbatim); return R; end Getopt_Config; |
︙ | ︙ | |||
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 | when Options.Force_Word => Put_Line (Output, " <word>"); Put_Line (Output, Indent & Indent & "Force <word> into the dictionary," & " replacing the worst entry"); Put_Line (Output, Indent & Indent & "Can be specified multiple times to force many words."); end case; end loop; end Print_Help; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; | > > > > > > > > > > | 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 | when Options.Force_Word => Put_Line (Output, " <word>"); Put_Line (Output, Indent & Indent & "Force <word> into the dictionary," & " replacing the worst entry"); Put_Line (Output, Indent & Indent & "Can be specified multiple times to force many words."); when Options.Max_Dict_Size => Put_Line (Output, " <count>"); Put_Line (Output, Indent & Indent & "Maximum number of words in the dictionary to build"); when Options.Min_Dict_Size => Put_Line (Output, " <count>"); Put_Line (Output, Indent & Indent & "Minimum number of words in the dictionary to build"); end case; end loop; end Print_Help; Opt_Config : constant Getopt.Configuration := Getopt_Config; Handler : Callback; |
︙ | ︙ |