bm32.pl


   1 #!/usr/local/bin/perl
   2 # from
   3 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1062.html
   4 
   5 use strict;
   6 use warnings;
   7 use Benchmark qw( :all);
   8 
   9 # use Text::Table;
  10 my $DEBUG = $ENV{DEBUG};
  11 
  12 my %Funcs = (
  13     direct     => 'direct( int rand 2**32)',
  14     decr       => 'decr( int rand 2**32)',
  15     ilya       => 'ilya( int rand 2**32)',
  16     table      => 'table( int rand 2**32)',
  17     unroll     => 'unroll( int rand 2**32)',
  18     unrmsk     => 'unrmsk( int rand 2**32)',
  19     tabunr     => 'tabunr( int rand 2**32)',
  20     katz       => 'katz( int rand 2**32)',
  21     packtut    => 'packtut( int rand 2**32)',
  22     ls1b       => 'ls1b( int rand 2**32)',
  23     parmod     => 'parmod( int rand 2**32)',
  24     rec_n      => 'rec_n( int rand 2**32)',
  25     sprtr      => 'sprtr( int rand 2**32)',
  26     hakmem_169 => 'hakmem_169( int rand 2**32)',
  27     ones32     => 'ones32( int rand 2**32)',
  28 
  29     # hakmem3 => 'hakmem3( int rand 2**32)', ## 64
  30     # hakmem4 => 'hakmem4( int rand 2**32)', ## 64
  31     #    via_bitvec => 'via_bitvec( int rand 2**32)',
  32 );
  33 
  34 my @Funcs =
  35   sort keys %Funcs;
  36 
  37 #	qw/ilya unroll unrmsk/
  38 #	qw/direct hakmem3 hakmem4/
  39 #	qw/direct packtut katz /
  40 #	qw/direct ilya unroll /
  41 #	qw/parmod /
  42 #	qw/tabunr /
  43 #
  44 
  45 if ( $ENV{bench} ) {
  46     cmpthese( -10, { ( map { ( $_ => $Funcs{$_} ) } @Funcs ) } );
  47     exit;
  48 }
  49 
  50 use Test::More qw/no_plan/;
  51 my %Tests = (
  52     1 => 1,
  53     7 => 3,
  54     ( ( 1 << 28 ) + ( 1 << 4 ) ) => 2,
  55     ( ( 1 << 44 ) + ( 1 << 28 ) + ( 1 << 4 ) ) => 3,
  56 
  57     # from  http://wiki.cs.pdx.edu/gitweb?p=popcount.git;a=tree
  58     0x00000080, 1,
  59     0x000000f0, 4,
  60     0x00008000, 1,
  61     0x0000f000, 4,
  62     0x00800000, 1,
  63     0x00f00000, 4,
  64     0x80000000, 1,
  65     0xf0000000, 4,
  66     0xff000000, 8,
  67     0x000000ff, 8,
  68     0x01fe0000, 8,
  69     0xea9031e8, 14,
  70     0x2e8eb2b2, 16,
  71     0x9b8be5b7, 20,
  72     ~0,         32,    # if 32 bit
  73 
  74     #   ~0, 64, # if 64 bit
  75     0, 0,
  76 
  77     #
  78     0xf000000f => 8,
  79 
  80 );
  81 
  82 while ( my ( $i, $ci ) = each %Tests ) {
  83 
  84     # is(direct($i),$ci,qq{Test-direct-$i-$ci})
  85     for my $f (@Funcs) {
  86         no strict "refs";
  87         is( &$f($i), $ci, qq{Test-$f-$i-$ci} . sprintf( qq{=x%x}, $i ) );
  88     }
  89 }
  90 
  91 if (1) {
  92     for my $n ( 1 .. 100 ) {
  93         my $cn = direct($n);       ## assumed to work
  94         my $i  = int rand 2**32;
  95         my $ci = direct($i);       ## assumed to work
  96         for my $f (@Funcs) {
  97             no strict "refs";
  98             is( &$f($n), $cn, qq{$f-n-$n} . sprintf( qq{=x%x},  $n ) );
  99             is( &$f($i), $ci, qq{$f-ri-$i} . sprintf( qq{=x%x}, $i ) );
 100             is( &$f( 1 << $n ),
 101                 1, qq{$f-2**n-$n} . sprintf( qq{=x%x}, 1 << $n ) )
 102               if $n < 64;
 103 
 104         }
 105     }
 106 }
 107 
 108 ###################################################################
 109 
 110 # bit counting
 111 # see also
 112 # http://infolab.stanford.edu/~manku/bitcount/bitcount.html
 113 # http://www.setbb.com/phpbb/viewtopic.php?mforum=sudoku&p=7629
 114 # http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
 115 
 116 sub hakmem_169 {
 117 
 118 # octal 11111111111 = decimal 1227133513 =  hex 0x49249249 = (as time_t) Wed Nov 19 22:25:13 UTC 2008
 119 # This number is sacred to HAKMEM 169, bitcount.
 120 # http://blogs.msdn.com/jeuge/archive/2005/06/08/HAKMEM-Bit-Count.aspx
 121 # http://www.inwap.com/pdp10/hbaker/hakmem/hakmem.html
 122 # http://www.inwap.com/pdp10/hbaker/hakmem/hacks.html#item169
 123 # For 64-bit numbers, we would have to add triples of octal digits and use modulus 1023
 124 ### WRONG, it's 511 !
 125 ## does that make it loglog N ?
 126     #		printf("\n") if $DEBUG;
 127     my $u = shift;
 128     my $uCount =
 129       $u - ( ( $u >> 1 ) & 033333333333 ) - ( ( $u >> 2 ) & 011111111111 );
 130     return ( ( $uCount + ( $uCount >> 3 ) ) & 030707070707 ) % 63;
 131 
 132     # note that can not replace % with & trivially ... % is not always cheap
 133 
 134 }
 135 
 136 sub rec_n {
 137 
 138 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0642.html
 139 # Recursive approach, terse and elegant IMHO.
 140 # An iterative one is OK as well.
 141     my $n = shift;
 142     return 0 unless $n;
 143     return ( $n & 1 ) + rec_n( $n >> 1 );
 144 }
 145 
 146 sub sprtr {
 147 
 148 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0785.html
 149     qq.@{[map sprintf('%b',$_),@_]}. =~ tr/1//;
 150 }
 151 
 152 sub direct {
 153     my $x     = shift;
 154     my $count = 0;
 155     while ($x) {
 156         $count += $x & 1;
 157         $x >>= 1;
 158     }
 159     $count;
 160 }
 161 
 162 sub decr {
 163     my $x     = shift;
 164     my $count = 0;
 165     while ($x) {
 166         $count++;
 167         $x &= $x - 1;
 168     }
 169     $count;
 170 }
 171 
 172 sub ilya {
 173 
 174     # claims log N time
 175     # classic parallel bit counting
 176     my $x     = shift;
 177     my $shift = 1;
 178     for my $mask ( 0x55555555, 0x33333333, 0x0f0f0f0f, 0x00ff00ff, 0x0000ffff )
 179     {
 180         $x = ( $x & $mask ) + ( ( $x >> $shift ) & $mask );
 181         $shift *= 2;
 182     }
 183     $x;
 184 }
 185 
 186 sub unroll    # ilya unrolled
 187 {
 188 
 189 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
 190 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
 191     my $n = shift;
 192 
 193     # can use same mask twice if shift $n then mask
 194     # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
 195     $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 );
 196     $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 );
 197     $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 );
 198     $n = ( $n & 0x00ff00ff ) + ( ( $n & 0xff00ff00 ) >> 8 );
 199     $n = ( $n & 0x0000ffff ) + ( ( $n & 0xffff0000 ) >> 16 );
 200 ### Last couple can be combined as
 201 ## += >> without the masks and a final mask taken since only 7 bits of 64 are needed
 202 
 203     $n;
 204 }
 205 
 206 sub unrmsk {
 207 
 208 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1067.html
 209 # aka #4 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
 210 # (for 64bit) as modified per the 32bit at
 211 # http://aggregate.ee.engr.uky.edu/MAGIC/#Population%20Count%20(Ones%20Count)
 212     my $n = shift;
 213 
 214     # can use same mask twice if shift $n then mask
 215     # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
 216     $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 );    # 1+1=2 in 2
 217     $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 );    # max 4:3 of 4
 218     $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 );    # max 8:4 of 8
 219     ### f's could be 7's since max value 4, but could carry to 0x8
 220     $n += ( $n >> 8 );     # max 16:5of16 -- no mask needed
 221     $n += ( $n >> 16 );    # max 32:6:of32
 222     ## $n +=  ($n  >> 32);# max 64:7of64 ## 64
 223     # this optimization saves ~ 8% off the literal unrolling
 224     ## += >> without the masks and a final mask taken
 225     ## since only 7 bits of 64 are needed
 226     ## note that this could be accumulated to result of a many-word scan here
 227     ## just have to mask to the point of maxresult.
 228 
 229     $n & 0x7f;
 230 }
 231 
 232 sub parmod                 # ilya unrolled plus modulus
 233 {
 234 
 235 # #5 on http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
 236     my $n = shift;
 237 
 238     # can use same mask twice if shift $n then mask
 239     # n = (n & MASK_01010101) + ((n >> 1) & MASK_01010101) ;
 240     $n = ( $n & 0x55555555 ) + ( ( $n & 0xaaaaaaaa ) >> 1 );
 241     $n = ( $n & 0x33333333 ) + ( ( $n & 0xcccccccc ) >> 2 );
 242     $n = ( $n & 0x0f0f0f0f ) + ( ( $n & 0xf0f0f0f0 ) >> 4 );
 243 
 244     #       # $n = ($n & 0x00ff00ff) + (($n & 0xff00ff00) >> 8);
 245     #       # $n = ($n & 0x0000ffff) + (($n & 0xffff0000) >> 16);
 246     return $n % 255;
 247     ### casting-out-9's trick to sum digits base 256
 248 }
 249 
 250 # Population Count (Ones Count)
 251 
 252 # http://aggregate.ee.engr.uky.edu/MAGIC/#Next%20Largest%20Power%20of%202
 253 # The population count of a binary integer value x is the number
 254 # of one bits in the value. Although many machines have single
 255 # instructions for this, the single instructions are usually
 256 # microcoded loops that test a bit per cycle; a log-time
 257 # algorithm coded in C is often faster. The following code uses a
 258 # variable-precision SWAR algorithm to perform a tree reduction
 259 # adding the bits in a 32-bit value:
 260 #
 261 sub ones32    #(register unsigned int x)
 262 {
 263 
 264     # 32-bit recursive reduction using SWAR...
 265     #   but first step is mapping 2-bit values
 266     #   into sum of 2 1-bit values in sneaky way
 267 
 268     my $x = shift;
 269     $x -= ( ( $x >> 1 ) & 0x55555555 );
 270     $x = ( ( ( $x >> 2 ) & 0x33333333 ) + ( $x & 0x33333333 ) );
 271     $x = ( ( ( $x >> 4 ) + $x ) & 0x0f0f0f0f );
 272     $x += ( $x >> 8 );
 273     $x += ( $x >> 16 );
 274     return ( $x & 0x0000003f );
 275 }
 276 
 277 sub katz {
 278 
 279     # from ftp.port80.se/pub/CPAN/authors/id/Y/YA/YARBER/kat.pl-1.03
 280     my $list = unpack( 'B*', pack( 'I*', shift ) );    ## 32
 281            # my $list = unpack('B*', pack('Q*',shift) ); ## 64
 282     $list =~ tr/0//d;
 283 
 284     # my $len = length($data);
 285     my $total = length($list);
 286 
 287     #my $avg = $len * 4;
 288     #my $dif = $total - $avg;
 289     #print("$len bytes, $total 1-bits ($avg + $dif)\n\n");
 290     return $total;
 291 }
 292 
 293 sub packtut {
 294 
 295 # http://perldoc.perl.org/perlpacktut.html#Doing-Sums
 296 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
 297     my $bitcount = unpack( '%32b*', pack( 'I*', shift ) );    ## 32
 298 
 299     #   my $bitcount = unpack( '%64b*', pack('Q*',shift ));  ## 64
 300     #                            ^^           ^
 301 
 302 }
 303 
 304 sub ls1b {
 305 
 306 # size2 http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/0656.html
 307 # Wegner-Kernighan
 308     my $n = shift;
 309     my $size = $n ? 1 : 0;
 310     $size++ while $n ^= ( ( $n - 1 ) ^ $n ) & $n;
 311     $size;
 312 }
 313 
 314 ###############
 315 # trade space for time ...
 316 # and still is proprotional to wordsize N linear
 317 #
 318 my @table;
 319 
 320 BEGIN {
 321     $table[$_] = direct($_)
 322       for 0 .. 255;    # not the most efficient pre-computation either ...
 323 }
 324 
 325 sub table {
 326     my $x     = shift;
 327     my $count = 0;
 328     while ($x) {
 329         $count += $table[ $x & 255 ];
 330         $x >>= 8;
 331     }
 332     $count;
 333 }
 334 
 335 sub tabunr {
 336 
 337 # http://coding.derkeiler.com/Archive/Perl/comp.lang.perl.misc/2003-11/1685.html
 338     my $x = shift;
 339     $table[ $x & 0xff ] +
 340       $table[ ( $x >> 8 ) & 0xff ] +
 341       $table[ ( $x >> 16 ) & 0xff ] +
 342       $table[ ( $x >> 24 ) & 0xff ]
 343 
 344      #       + $table [ ($x >> 32) & 0xff ] + $table [ ($x >> 40) & 0xff ] ## 64
 345      #       + $table [ ($x >> 48) & 0xff ] + $table [ ($x >> 56) & 0xff ] ## 64
 346       ;
 347 }
 348 
 349 ################################
 350 ## module bitvec
 351 __END__
 352   use Bit::Vector;
 353 
 354   my $vec = Bit::Vector->new(Bit::Vector->Word_Bits());
 355 
 356 
 357 
 358   sub via_bitvec {
 359       $vec->Word_Store(0,$_[0]);
 360       $vec->Norm();
 361   }
 362 
 363 
 364