Natools

Check-in [f651755fea]
Login
Overview
Comment:smaz-tools: new facility for faster hashing of dynamic dictionaries
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f651755fea4f52039b03835e44edb649cab56180
User & Date: nat on 2016-10-15 18:50:00
Other Links: manifest | tags
Context
2016-10-16
17:21
tools/smaz.adb: use the new map-based dictionary hash in evaluation check-in: 562e1cf9fc user: nat tags: trunk
2016-10-15
18:50
smaz-tools: new facility for faster hashing of dynamic dictionaries check-in: f651755fea user: nat tags: trunk
2016-10-14
21:01
tools/smaz: use the new parallelization framework check-in: 1cdd0709b0 user: nat tags: trunk
Changes

Modified src/natools-smaz-tools.adb from [c548ec8fb3] to [537cf082aa].

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
47
48
49
50
51
52
53












54
55
56
57
58
59
60







-
-
-
-
-
-
-
-
-
-
-
-







   end Image;



   ----------------------
   -- Public Interface --
   ----------------------

   function Linear_Search (Value : String) return Natural is
      Result : Ada.Streams.Stream_Element := 0;
   begin
      for S of List_For_Linear_Search loop
         exit when S = Value;
         Result := Result + 1;
      end loop;

      return Natural (Result);
   end Linear_Search;


   procedure Print_Dictionary_In_Ada
     (Dict : in Dictionary;
      Hash_Image : in String := "TODO";
      Max_Width : in Positive := 70;
      First_Prefix : in String := "     := (";
      Prefix : in String := "         ";
344
345
346
347
348
349
350







































351
352
353
354
355
356
357
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







            Max_Word_Length => Max_Word_Length,
            Offsets => Offsets,
            Values => Values,
            Hash => Dummy_Hash'Access);
      end;
   end To_Dictionary;



   ---------------------------------
   -- Dynamic Dictionary Searches --
   ---------------------------------

   function Linear_Search (Value : String) return Natural is
      Result : Ada.Streams.Stream_Element := 0;
   begin
      for S of List_For_Linear_Search loop
         exit when S = Value;
         Result := Result + 1;
      end loop;

      return Natural (Result);
   end Linear_Search;


   function Map_Search (Value : String) return Natural is
      Cursor : constant Dictionary_Maps.Cursor
        := Dictionary_Maps.Find (Search_Map, Value);
   begin
      if Dictionary_Maps.Has_Element (Cursor) then
         return Natural (Dictionary_Maps.Element (Cursor));
      else
         return Natural (Ada.Streams.Stream_Element'Last);
      end if;
   end Map_Search;


   procedure Set_Dictionary_For_Map_Search (Dict : in Dictionary) is
   begin
      Dictionary_Maps.Clear (Search_Map);

      for I in Dict.Offsets'Range loop
         Dictionary_Maps.Insert (Search_Map, Dict_Entry (Dict, I), I);
      end loop;
   end Set_Dictionary_For_Map_Search;



   -------------------
   -- Word Counting --
   -------------------

   procedure Add_Substrings

Modified src/natools-smaz-tools.ads from [33e77934d4] to [09ce0f055f].

65
66
67
68
69
70
71





72
73
74
75
76
77
78
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83







+
+
+
+
+







      --  in Natools.Smaz.Original.

   List_For_Linear_Search : String_Lists.List;
   function Linear_Search (Value : String) return Natural;
      --  Function and data source for inefficient but dynamic function
      --  that can be used with Dictionary.Hash.

   procedure Set_Dictionary_For_Map_Search (Dict : in Dictionary);
   function Map_Search (Value : String) return Natural;
      --  Function and data source for logarithmic search using standard
      --  ordered map, that can be used with Dictionary.Hash.

   type String_Count is range 0 .. 2 ** 31 - 1;
      --  Type for a number of substring occurrences

   type Word_Counter is private;
      --  Accumulate frequency/occurrence counts for a set of strings

   procedure Add_Word
144
145
146
147
148
149
150
151





152
149
150
151
152
153
154
155
156
157
158
159
160
161
162








+
+
+
+
+

         or else (Left.Score = Right.Score and then Left.Word < Right.Word));

   function To_Scored_Word (Cursor : in Word_Maps.Cursor)
     return Scored_Word;

   package Scored_Word_Sets is new Ada.Containers.Indefinite_Ordered_Sets
     (Scored_Word);

   package Dictionary_Maps is new Ada.Containers.Indefinite_Ordered_Maps
     (String, Ada.Streams.Stream_Element);

   Search_Map : Dictionary_Maps.Map;

end Natools.Smaz.Tools;