bm64.pl
1 #!/usr/local/bin/perl
2 # from
3 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1062.html
4
5 use strict;
6 use warnings;
7 use Benchmark qw( :all);
8
9 # use Text::Table;
10 my $DEBUG = $ENV{DEBUG};
11
12 my %Funcs = (
13 direct => 'direct( int rand 2**64)',
14 decr => 'decr( int rand 2**64)',
15 ilya => 'ilya( int rand 2**64)',
16 table => 'table( int rand 2**64)',
17 unroll => 'unroll( int rand 2**64)',
18 unrmsk => 'unrmsk( int rand 2**64)',
19 tabunr => 'tabunr( int rand 2**64)',
20 katz => 'katz( int rand 2**64)',
21 packtut => 'packtut( int rand 2**64)',
22 ls1b => 'ls1b( int rand 2**64)',
23 parmod => 'parmod( int rand 2**64)',
24 rec_n => 'rec_n( int rand 2**64)',
25 sprtr => 'sprtr( int rand 2**64)',
26
27 # hakmem_169 => 'hakmem_169( int rand 2**64)',
28 hakmem3 => 'hakmem3( int rand 2**64)',
29 hakmem4 => 'hakmem4( int rand 2**64)',
30
31 # via_bitvec => 'via_bitvec( int rand 2**64)',
32 );
33
34 my @Funcs =
35 sort keys %Funcs;
36
37 # qw/table tabunr/
38 # qw/ilya unroll unrmsk/
39 # qw/direct hakmem3 hakmem4/
40 # qw/direct packtut katz /
41 # qw/direct ilya unroll /
42 # qw/parmod /
43 # qw/tabunr /
44 #
45
46 if ( $ENV{bench} ) {
47 cmpthese( -10, { ( map { ( $_ => $Funcs{$_} ) } @Funcs ) } );
48 exit;
49 }
50
51 use Test::More qw/no_plan/;
52 my %Tests = (
53 1 => 1,
54 7 => 3,
55 ( ( 1 << 28 ) + ( 1 << 4 ) ) => 2,
56 ( ( 1 << 44 ) + ( 1 << 28 ) + ( 1 << 4 ) ) => 3,
57
58 # from http://wiki.cs.pdx.edu/gitweb?p=popcount.git;a=tree
59 0x00000080, 1,
60 0x000000f0, 4,
61 0x00008000, 1,
62 0x0000f000, 4,
63 0x00800000, 1,
64 0x00f00000, 4,
65 0x80000000, 1,
66 0xf0000000, 4,
67 0xff000000, 8,
68 0x000000ff, 8,
69 0x01fe0000, 8,
70 0xea9031e8, 14,
71 0x2e8eb2b2, 16,
72 0x9b8be5b7, 20,
73
74 # ~0, 32, # if 32 bit
75 ~0, 64, # if 64 bit
76 0, 0,
77
78 #
79 0xf000000f => 8,
80
81 );
82
83 while ( my ( $i, $ci ) = each %Tests ) {
84
85 # is(direct($i),$ci,qq{Test-direct-$i-$ci})
86 for my $f (@Funcs) {
87 no strict "refs";
88 is( &$f($i), $ci, qq{Test-$f-$i-$ci} . sprintf( qq{=x%x}, $i ) );
89 }
90 }
91
92 if (1) {
93 for my $n ( 1 .. 100 ) {
94 my $cn = direct($n); ## assumed to work
95 my $i = int rand 2**32;
96 my $ci = direct($i); ## assumed to work
97 for my $f (@Funcs) {
98 no strict "refs";
99 is( &$f($n), $cn, qq{$f-n-$n} . sprintf( qq{=x%x}, $n ) );
100 is( &$f($i), $ci, qq{$f-ri-$i} . sprintf( qq{=x%x}, $i ) );
101 is( &$f( 1 << $n ),
102 1, qq{$f-2**n-$n} . sprintf( qq{=x%x}, 1 << $n ) )
103 if $n < 64;
104
105 }
106 }
107 }
108
109 ###################################################################
110
111 # bit counting
112 # see also
113 # http://infolab.stanford.edu/~manku/bitcount/bitcount.html
114 # http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&p=7629
115 # http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
116
117 sub hakmem3 {
118
119 # octal 11111111111 = decimal 1227133513 = hex 0x49249249 = (as time_t) Wed Nov 19 22:25:13 UTC 2008
120 # This number is sacred to HAKMEM 169, bitcount.
121 # http://blogs.msdn.com/jeuge/archive/2005/06/08/HAKMEM-Bit-Count.aspx
122 # http://www.inwap.com/pdp10/hbaker/hakmem/hakmem.html
123 # http://www.inwap.com/pdp10/hbaker/hakmem/hacks.html#item169
124 # For 64-bit numbers, we would have to add triples of octal digits and use modulus 1023
125 ### WRONG, it's 511 !
126 ## does that make it loglog N ?
127 # printf("\n") if $DEBUG;
128 my $u = shift;
129
130 # print sprintf(qq{0%022o\n},$u) if $DEBUG;
131 #### a+b+c in each octet via cute hack
132 my $uCount =
133 $u -
134 ( ( $u >> 1 ) & 0333333333333333333333 ) -
135 ( ( $u >> 2 ) & 0111111111111111111111 );
136
137 # print sprintf(qq{0%022o\n},$uCount) if $DEBUG;
138
139 $uCount = (
140 ( ($uCount) & 01007007007007007007007 ) ## could be 3's
141 + ( ( $uCount >> 3 ) & 00007007007007007007007 ) +
142 ( ( $uCount >> 6 ) & 00007007007007007007007 )
143 ) & 01017017017017017017017; # max 9=3+3+3
144
145 # print sprintf(qq{0%022o\n},$uCount) if $DEBUG;
146 my $mod = 511;
147
148 # print sprintf(qq{0%022o=%d (mod $mod)\n},$uCount % $mod ,$uCount % $mod ) if $DEBUG;
149 return $uCount % $mod;
150
151 # note that can not replace % with & trivially ... % is not always cheap
152
153 }
154
155 sub hakmem4 {
156
157 # octal 11111111111 = decimal 1227133513 = hex 0x49249249 = (as time_t) Wed Nov 19 22:25:13 UTC 2008
158 # This number is sacred to HAKMEM 169, bitcount.
159 # http://blogs.msdn.com/jeuge/archive/2005/06/08/HAKMEM-Bit-Count.aspx
160 # http://www.inwap.com/pdp10/hbaker/hakmem/hakmem.html
161 # http://www.inwap.com/pdp10/hbaker/hakmem/hacks.html#item169
162 # they said
163 # For 64-bit numbers, we would have to add triples of octal digits and use modulus 1023
164 ### That's wrong btw ...
165 # but this does quads of octals modulus 4095 instead ...
166 ## does that make it loglog N ?
167 # my $DEBUG= 0 || $ENV{DEBUG};
168 my $u = shift;
169
170 # print sprintf(qq{0%022o\n},$u) if $DEBUG;
171 my $uCount =
172 $u -
173 ( ( $u >> 1 ) & 0333333333333333333333 ) -
174 ( ( $u >> 2 ) & 0111111111111111111111 );
175
176 # print sprintf(qq{0%022o\n},$uCount) if $DEBUG;
177 $uCount = ( ( $uCount + ( $uCount >> 3 ) ) & 0707070707070707070707 );
178
179 # print sprintf(qq{0%022o\n},$uCount) if $DEBUG;
180 $uCount = ( ( $uCount + ( $uCount >> 6 ) ) & 0700770077007700770077 );
181
182 # return $uCount % 0x3f; # 63
183 # print sprintf(qq{0%022o\n},$uCount) if $DEBUG;
184 # print sprintf(qq{0%022o (mod 4095)\n},$uCount % 4095 ) if $DEBUG;
185 return $uCount % 4095;
186
187 # note that can not replace % with & trivially ... % is not always cheap
188 # why modulus? Casting out nines sum of digits.
189 #### modulus base N-1 = sum digits base N
190 }
191
192 sub rec_n {
193
194 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0642.html
195 # Recursive approach, terse and elegant IMHO.
196 # An iterative one is OK as well.
197 my $n = shift;
198 return 0 unless $n;
199 return ( $n & 1 ) + rec_n( $n >> 1 );
200 }
201
202 sub sprtr {
203
204 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0785.html
205 qq.@{[map sprintf('%b',$_),@_]}. =~ tr/1//;
206 }
207
208 sub direct {
209 my $x = shift;
210 my $count = 0;
211 while ($x) {
212 $count += $x & 1;
213 $x >>= 1;
214 }
215 $count;
216 }
217
218 sub decr {
219 my $x = shift;
220 my $count = 0;
221 while ($x) {
222 $count++;
223 $x &= $x - 1;
224 }
225 $count;
226 }
227
228 sub ilya {
229
230 # claims log N time
231 # classic parallel bit counting
232 my $x = shift;
233 my $shift = 1;
234
235 # for my $mask (0x55555555, 0x33333333, 0x0f0f0f0f, 0x00ff00ff, 0x0000ffff) {
236 for my $mask (
237 0x5555555555555555,
238 0x3333333333333333,
239 0x0f0f0f0f0f0f0f0f,
240 0x00ff00ff00ff00ff,
241 0x0000ffff0000ffff,
242 0x00000000ffffffff, ### ADDED for 64
243 )
244 {
245 $x = ( $x & $mask ) + ( ( $x >> $shift ) & $mask );
246 $shift *= 2;
247 }
248 $x;
249 }
250
251 sub unroll # ilya unrolled
252 {
253
254 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
255 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
256 my $n = shift;
257
258 # can use same mask twice if shift $n then mask
259 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
260 $n = ( $n & 0x5555555555555555 ) + ( ( $n & 0xaaaaaaaaaaaaaaaa ) >> 1 );
261 $n = ( $n & 0x3333333333333333 ) + ( ( $n & 0xcccccccccccccccc ) >> 2 );
262 $n = ( $n & 0x0f0f0f0f0f0f0f0f ) + ( ( $n & 0xf0f0f0f0f0f0f0f0 ) >> 4 );
263 $n = ( $n & 0x00ff00ff00ff00ff ) + ( ( $n & 0xff00ff00ff00ff00 ) >> 8 );
264 $n = ( $n & 0x0000ffff0000ffff ) + ( ( $n & 0xffff0000ffff0000 ) >> 16 );
265 $n = ( $n & 0x00000000ffffffff ) + ( ( $n & 0xffffffff00000000 ) >> 32 );
266 ### Last couple can be combined as
267 ## += >> without the masks and a final mask taken since only 7 bits of 64 are needed
268
269 $n;
270 }
271
272 sub unrmsk {
273
274 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
275 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
276 # (for 64bit) as modified per the 32bit at
277 # http://aggregate.ee.engr.uky.edu/MAGIC/#Population%20Count%20(Ones%20Count)
278 my $n = shift;
279
280 # can use same mask twice if shift $n then mask
281 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
282 $n =
283 ( $n & 0x5555555555555555 ) +
284 ( ( $n & 0xaaaaaaaaaaaaaaaa ) >> 1 ); # 1+1=2 in 2
285 $n =
286 ( $n & 0x3333333333333333 ) +
287 ( ( $n & 0xcccccccccccccccc ) >> 2 ); # max 4:3 of 4
288 $n =
289 ( $n & 0x0f0f0f0f0f0f0f0f ) +
290 ( ( $n & 0xf0f0f0f0f0f0f0f0 ) >> 4 ); # max 8:4 of 8
291 ### f's could be 7's since max value 4, but could carry to 0x8
292 $n += ( $n >> 8 ); # max 16:5of16 -- no mask needed
293 $n += ( $n >> 16 ); # max 32:6:of32
294 $n += ( $n >> 32 ); # max 64:7of64
295 # this optimization saves ~ 8% off the literal unrolling
296 ## += >> without the masks and a final mask taken
297 ## since only 7 bits of 64 are needed
298 ## note that this could be accumulated to result of a many-word scan here
299 ## just have to mask to the point of maxresult.
300
301 $n & 0x7f;
302 }
303
304 sub parmod # ilya unrolled plus modulus
305 {
306
307 # #5 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
308 my $n = shift;
309
310 # can use same mask twice if shift $n then mask
311 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
312 $n = ( $n & 0x5555555555555555 ) + ( ( $n & 0xaaaaaaaaaaaaaaaa ) >> 1 );
313 $n = ( $n & 0x3333333333333333 ) + ( ( $n & 0xcccccccccccccccc ) >> 2 );
314 $n = ( $n & 0x0f0f0f0f0f0f0f0f ) + ( ( $n & 0xf0f0f0f0f0f0f0f0 ) >> 4 );
315
316 # $n = ($n & 0x00ff00ff) + (($n & 0xff00ff00) >> 8);
317 # $n = ($n & 0x0000ffff) + (($n & 0xffff0000) >> 16);
318 return $n % 255;
319 ### same casting-out-9's trick to sum digits base 256
320 }
321
322 sub katz {
323
324 # from ftp.port80.se/pub/CPAN/authors/id/Y/YA/YARBER/kat.pl-1.03
325 #my $list = unpack('B*', pack('I*',shift) );
326 my $list = unpack( 'B*', pack( 'Q*', shift ) );
327 $list =~ tr/0//d;
328
329 # my $len = length($data);
330 my $total = length($list);
331
332 #my $avg = $len * 4;
333 #my $dif = $total - $avg;
334 #print("$len bytes, $total 1-bits ($avg + $dif)\n\n");
335 return $total;
336 }
337
338 sub packtut {
339
340 # http://perldoc.perl.org/perlpacktut.html#Doing-Sums
341 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
342 # my $bitcount = unpack( '%32b*', pack('I*',shift ));
343 my $bitcount = unpack( '%64b*', pack( 'Q*', shift ) );
344
345 # ^^ ^
346
347 }
348
349 sub ls1b {
350
351 # size2 http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
352 # Wegner-Kernighan
353 my $n = shift;
354 my $size = $n ? 1 : 0;
355 $size++ while $n ^= ( ( $n - 1 ) ^ $n ) & $n;
356 $size;
357 }
358
359 ###############
360 # trade space for time ...
361 # and still is proprotional to wordsize N linear
362 #
363 my @table;
364
365 BEGIN {
366 $table[$_] = direct($_)
367 for 0 .. 255; # not the most efficient pre-computation either ...
368 }
369
370 sub table {
371 my $x = shift;
372 my $count = 0;
373 while ($x) {
374 $count += $table[ $x & 255 ];
375 $x >>= 8;
376 }
377 $count;
378 }
379
380 sub tabunr {
381
382 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1685.html
383 my $x = shift;
384 $table[ $x & 0xff ] +
385 $table[ ( $x >> 8 ) & 0xff ] +
386 $table[ ( $x >> 16 ) & 0xff ] +
387 $table[ ( $x >> 24 ) & 0xff ] +
388 $table[ ( $x >> 32 ) & 0xff ] +
389 $table[ ( $x >> 40 ) & 0xff ] +
390 $table[ ( $x >> 48 ) & 0xff ] +
391 $table[ ( $x >> 56 ) & 0xff ];
392 }
393
394 ################################
395 ## module bitvec
396 __END__
397 use Bit::Vector;
398
399 my $vec = Bit::Vector->new(Bit::Vector->Word_Bits());
400
401
402
403 sub via_bitvec {
404 $vec->Word_Store(0,$_[0]);
405 $vec->Norm();
406 }
407
408
409