bm32.pl
1 #!/usr/local/bin/perl
2 # from
3 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1062.html
4
5 use strict;
6 use warnings;
7 use Benchmark qw( :all);
8
9 # use Text::Table;
10 my $DEBUG = $ENV{DEBUG};
11
12 my %Funcs = (
13 direct => 'direct( int rand 2**32)',
14 decr => 'decr( int rand 2**32)',
15 ilya => 'ilya( int rand 2**32)',
16 table => 'table( int rand 2**32)',
17 unroll => 'unroll( int rand 2**32)',
18 unrmsk => 'unrmsk( int rand 2**32)',
19 tabunr => 'tabunr( int rand 2**32)',
20 katz => 'katz( int rand 2**32)',
21 packtut => 'packtut( int rand 2**32)',
22 ls1b => 'ls1b( int rand 2**32)',
23 parmod => 'parmod( int rand 2**32)',
24 rec_n => 'rec_n( int rand 2**32)',
25 sprtr => 'sprtr( int rand 2**32)',
26 hakmem_169 => 'hakmem_169( int rand 2**32)',
27 ones32 => 'ones32( int rand 2**32)',
28
29 # hakmem3 => 'hakmem3( int rand 2**32)', ## 64
30 # hakmem4 => 'hakmem4( int rand 2**32)', ## 64
31 # via_bitvec => 'via_bitvec( int rand 2**32)',
32 );
33
34 my @Funcs =
35 sort keys %Funcs;
36
37 # qw/ilya unroll unrmsk/
38 # qw/direct hakmem3 hakmem4/
39 # qw/direct packtut katz /
40 # qw/direct ilya unroll /
41 # qw/parmod /
42 # qw/tabunr /
43 #
44
45 if ( $ENV{bench} ) {
46 cmpthese( -10, { ( map { ( $_ => $Funcs{$_} ) } @Funcs ) } );
47 exit;
48 }
49
50 use Test::More qw/no_plan/;
51 my %Tests = (
52 1 => 1,
53 7 => 3,
54 ( ( 1 << 28 ) + ( 1 << 4 ) ) => 2,
55 ( ( 1 << 44 ) + ( 1 << 28 ) + ( 1 << 4 ) ) => 3,
56
57 # from http://wiki.cs.pdx.edu/gitweb?p=popcount.git;a=tree
58 0x00000080, 1,
59 0x000000f0, 4,
60 0x00008000, 1,
61 0x0000f000, 4,
62 0x00800000, 1,
63 0x00f00000, 4,
64 0x80000000, 1,
65 0xf0000000, 4,
66 0xff000000, 8,
67 0x000000ff, 8,
68 0x01fe0000, 8,
69 0xea9031e8, 14,
70 0x2e8eb2b2, 16,
71 0x9b8be5b7, 20,
72 ~0, 32, # if 32 bit
73
74 # ~0, 64, # if 64 bit
75 0, 0,
76
77 #
78 0xf000000f => 8,
79
80 );
81
82 while ( my ( $i, $ci ) = each %Tests ) {
83
84 # is(direct($i),$ci,qq{Test-direct-$i-$ci})
85 for my $f (@Funcs) {
86 no strict "refs";
87 is( &$f($i), $ci, qq{Test-$f-$i-$ci} . sprintf( qq{=x%x}, $i ) );
88 }
89 }
90
91 if (1) {
92 for my $n ( 1 .. 100 ) {
93 my $cn = direct($n); ## assumed to work
94 my $i = int rand 2**32;
95 my $ci = direct($i); ## assumed to work
96 for my $f (@Funcs) {
97 no strict "refs";
98 is( &$f($n), $cn, qq{$f-n-$n} . sprintf( qq{=x%x}, $n ) );
99 is( &$f($i), $ci, qq{$f-ri-$i} . sprintf( qq{=x%x}, $i ) );
100 is( &$f( 1 << $n ),
101 1, qq{$f-2**n-$n} . sprintf( qq{=x%x}, 1 << $n ) )
102 if $n < 64;
103
104 }
105 }
106 }
107
108 ###################################################################
109
110 # bit counting
111 # see also
112 # http://infolab.stanford.edu/~manku/bitcount/bitcount.html
113 # http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&p=7629
114 # http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
115
116 sub hakmem_169 {
117
118 # octal 11111111111 = decimal 1227133513 = hex 0x49249249 = (as time_t) Wed Nov 19 22:25:13 UTC 2008
119 # This number is sacred to HAKMEM 169, bitcount.
120 # http://blogs.msdn.com/jeuge/archive/2005/06/08/HAKMEM-Bit-Count.aspx
121 # http://www.inwap.com/pdp10/hbaker/hakmem/hakmem.html
122 # http://www.inwap.com/pdp10/hbaker/hakmem/hacks.html#item169
123 # For 64-bit numbers, we would have to add triples of octal digits and use modulus 1023
124 ### WRONG, it's 511 !
125 ## does that make it loglog N ?
126 # printf("\n") if $DEBUG;
127 my $u = shift;
128 my $uCount =
129 $u - ( ( $u >> 1 ) & 033333333333 ) - ( ( $u >> 2 ) & 011111111111 );
130 return ( ( $uCount + ( $uCount >> 3 ) ) & 030707070707 ) % 63;
131
132 # note that can not replace % with & trivially ... % is not always cheap
133
134 }
135
136 sub rec_n {
137
138 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0642.html
139 # Recursive approach, terse and elegant IMHO.
140 # An iterative one is OK as well.
141 my $n = shift;
142 return 0 unless $n;
143 return ( $n & 1 ) + rec_n( $n >> 1 );
144 }
145
146 sub sprtr {
147
148 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0785.html
149 qq.@{[map sprintf('%b',$_),@_]}. =~ tr/1//;
150 }
151
152 sub direct {
153 my $x = shift;
154 my $count = 0;
155 while ($x) {
156 $count += $x & 1;
157 $x >>= 1;
158 }
159 $count;
160 }
161
162 sub decr {
163 my $x = shift;
164 my $count = 0;
165 while ($x) {
166 $count++;
167 $x &= $x - 1;
168 }
169 $count;
170 }
171
172 sub ilya {
173
174 # claims log N time
175 # classic parallel bit counting
176 my $x = shift;
177 my $shift = 1;
178 for my $mask ( 0x55555555, 0x33333333, 0x0f0f0f0f, 0x00ff00ff, 0x0000ffff )
179 {
180 $x = ( $x & $mask ) + ( ( $x >> $shift ) & $mask );
181 $shift *= 2;
182 }
183 $x;
184 }
185
186 sub unroll # ilya unrolled
187 {
188
189 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
190 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
191 my $n = shift;
192
193 # can use same mask twice if shift $n then mask
194 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
195 $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 );
196 $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 );
197 $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 );
198 $n = ( $n & 0x00ff00ff ) + ( ( $n & 0xff00ff00 ) >> 8 );
199 $n = ( $n & 0x0000ffff ) + ( ( $n & 0xffff0000 ) >> 16 );
200 ### Last couple can be combined as
201 ## += >> without the masks and a final mask taken since only 7 bits of 64 are needed
202
203 $n;
204 }
205
206 sub unrmsk {
207
208 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
209 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
210 # (for 64bit) as modified per the 32bit at
211 # http://aggregate.ee.engr.uky.edu/MAGIC/#Population%20Count%20(Ones%20Count)
212 my $n = shift;
213
214 # can use same mask twice if shift $n then mask
215 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
216 $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 ); # 1+1=2 in 2
217 $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 ); # max 4:3 of 4
218 $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 ); # max 8:4 of 8
219 ### f's could be 7's since max value 4, but could carry to 0x8
220 $n += ( $n >> 8 ); # max 16:5of16 -- no mask needed
221 $n += ( $n >> 16 ); # max 32:6:of32
222 ## $n += ($n >> 32);# max 64:7of64 ## 64
223 # this optimization saves ~ 8% off the literal unrolling
224 ## += >> without the masks and a final mask taken
225 ## since only 7 bits of 64 are needed
226 ## note that this could be accumulated to result of a many-word scan here
227 ## just have to mask to the point of maxresult.
228
229 $n & 0x7f;
230 }
231
232 sub parmod # ilya unrolled plus modulus
233 {
234
235 # #5 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
236 my $n = shift;
237
238 # can use same mask twice if shift $n then mask
239 # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
240 $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 );
241 $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 );
242 $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 );
243
244 # # $n = ($n & 0x00ff00ff) + (($n & 0xff00ff00) >> 8);
245 # # $n = ($n & 0x0000ffff) + (($n & 0xffff0000) >> 16);
246 return $n % 255;
247 ### casting-out-9's trick to sum digits base 256
248 }
249
250 # Population Count (Ones Count)
251
252 # http://aggregate.ee.engr.uky.edu/MAGIC/#Next%20Largest%20Power%20of%202
253 # The population count of a binary integer value x is the number
254 # of one bits in the value. Although many machines have single
255 # instructions for this, the single instructions are usually
256 # microcoded loops that test a bit per cycle; a log-time
257 # algorithm coded in C is often faster. The following code uses a
258 # variable-precision SWAR algorithm to perform a tree reduction
259 # adding the bits in a 32-bit value:
260 #
261 sub ones32 #(register unsigned int x)
262 {
263
264 # 32-bit recursive reduction using SWAR...
265 # but first step is mapping 2-bit values
266 # into sum of 2 1-bit values in sneaky way
267
268 my $x = shift;
269 $x -= ( ( $x >> 1 ) & 0x55555555 );
270 $x = ( ( ( $x >> 2 ) & 0x33333333 ) + ( $x & 0x33333333 ) );
271 $x = ( ( ( $x >> 4 ) + $x ) & 0x0f0f0f0f );
272 $x += ( $x >> 8 );
273 $x += ( $x >> 16 );
274 return ( $x & 0x0000003f );
275 }
276
277 sub katz {
278
279 # from ftp.port80.se/pub/CPAN/authors/id/Y/YA/YARBER/kat.pl-1.03
280 my $list = unpack( 'B*', pack( 'I*', shift ) ); ## 32
281 # my $list = unpack('B*', pack('Q*',shift) ); ## 64
282 $list =~ tr/0//d;
283
284 # my $len = length($data);
285 my $total = length($list);
286
287 #my $avg = $len * 4;
288 #my $dif = $total - $avg;
289 #print("$len bytes, $total 1-bits ($avg + $dif)\n\n");
290 return $total;
291 }
292
293 sub packtut {
294
295 # http://perldoc.perl.org/perlpacktut.html#Doing-Sums
296 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
297 my $bitcount = unpack( '%32b*', pack( 'I*', shift ) ); ## 32
298
299 # my $bitcount = unpack( '%64b*', pack('Q*',shift )); ## 64
300 # ^^ ^
301
302 }
303
304 sub ls1b {
305
306 # size2 http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
307 # Wegner-Kernighan
308 my $n = shift;
309 my $size = $n ? 1 : 0;
310 $size++ while $n ^= ( ( $n - 1 ) ^ $n ) & $n;
311 $size;
312 }
313
314 ###############
315 # trade space for time ...
316 # and still is proprotional to wordsize N linear
317 #
318 my @table;
319
320 BEGIN {
321 $table[$_] = direct($_)
322 for 0 .. 255; # not the most efficient pre-computation either ...
323 }
324
325 sub table {
326 my $x = shift;
327 my $count = 0;
328 while ($x) {
329 $count += $table[ $x & 255 ];
330 $x >>= 8;
331 }
332 $count;
333 }
334
335 sub tabunr {
336
337 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1685.html
338 my $x = shift;
339 $table[ $x & 0xff ] +
340 $table[ ( $x >> 8 ) & 0xff ] +
341 $table[ ( $x >> 16 ) & 0xff ] +
342 $table[ ( $x >> 24 ) & 0xff ]
343
344 # + $table [ ($x >> 32) & 0xff ] + $table [ ($x >> 40) & 0xff ] ## 64
345 # + $table [ ($x >> 48) & 0xff ] + $table [ ($x >> 56) & 0xff ] ## 64
346 ;
347 }
348
349 ################################
350 ## module bitvec
351 __END__
352 use Bit::Vector;
353
354 my $vec = Bit::Vector->new(Bit::Vector->Word_Bits());
355
356
357
358 sub via_bitvec {
359 $vec->Word_Store(0,$_[0]);
360 $vec->Norm();
361 }
362
363
364