博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
computing the number of leading zeros in a word
阅读量:6037 次
发布时间:2019-06-20

本文共 11002 字,大约阅读时间需要 36 分钟。

// This has the programs for computing the number of leading zeros// in a word.// Max line length is 57, to fit in hacker.book.// Compile with g++, not gcc.#include 
#include
// To define "exit", req'd by XLC.#define LE 1 // 1 for little-endian, 0 for big-endian.int pop(unsigned x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; x = x + (x << 8); x = x + (x << 16); return x >> 24;}int nlz1(unsigned x) { int n; if (x == 0) return(32); n = 0; if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} if (x <= 0x7FFFFFFF) {n = n + 1;} return n;}int nlz1a(unsigned x) { int n;/* if (x == 0) return(32); */ if ((int)x <= 0) return (~x >> 26) & 32; n = 1; if ((x >> 16) == 0) {n = n +16; x = x <<16;} if ((x >> 24) == 0) {n = n + 8; x = x << 8;} if ((x >> 28) == 0) {n = n + 4; x = x << 4;} if ((x >> 30) == 0) {n = n + 2; x = x << 2;} n = n - (x >> 31); return n;}// On basic Risc, 12 to 20 instructions.int nlz2(unsigned x) { unsigned y; int n; n = 32; y = x >>16; if (y != 0) {n = n -16; x = y;} y = x >> 8; if (y != 0) {n = n - 8; x = y;} y = x >> 4; if (y != 0) {n = n - 4; x = y;} y = x >> 2; if (y != 0) {n = n - 2; x = y;} y = x >> 1; if (y != 0) return n - 2; return n - x;}// As above but coded as a loop for compactness:// 23 to 33 basic Risc instructions.int nlz2a(unsigned x) { unsigned y; int n, c; n = 32; c = 16; do { y = x >> c; if (y != 0) {n = n - c; x = y;} c = c >> 1; } while (c != 0); return n - x;}int nlz3(int x) { int y, n; n = 0; y = x;L: if (x < 0) return n; if (y == 0) return 32 - n; n = n + 1; x = x << 1; y = y >> 1; goto L;}int nlz4(unsigned x) { int y, m, n; y = -(x >> 16); // If left half of x is 0, m = (y >> 16) & 16; // set n = 16. If left half n = 16 - m; // is nonzero, set n = 0 and x = x >> m; // shift x right 16. // Now x is of the form 0000xxxx. y = x - 0x100; // If positions 8-15 are 0, m = (y >> 16) & 8; // add 8 to n and shift x left 8. n = n + m; x = x << m; y = x - 0x1000; // If positions 12-15 are 0, m = (y >> 16) & 4; // add 4 to n and shift x left 4. n = n + m; x = x << m; y = x - 0x4000; // If positions 14-15 are 0, m = (y >> 16) & 2; // add 2 to n and shift x left 2. n = n + m; x = x << m; y = x >> 14; // Set y = 0, 1, 2, or 3. m = y & ~(y >> 1); // Set m = 0, 1, 2, or 2 resp. return n + 2 - m;}int nlz5(unsigned x) { int pop(unsigned x); x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >>16); return pop(~x);}/* The four programs below are not valid ANSI C programs. This isbecause they refer to the same storage locations as two different types.However, they work with xlc/AIX, gcc/AIX, and gcc/NT. If you try tocode them more compactly by declaring a variable xx to be "double," andthen using n = 1054 - (*((unsigned *)&xx + LE) >> 20);then you are violating not only the rule above, but also the ANSI Crule that pointer arithmetic can be performed only on pointers toarray elements. When coded with the above statement, the program fails with xlc,gcc/AIX, and gcc/NT, at some optimization levels. BTW, these programs use the "anonymous union" feature of C++, notavailable in C. */int nlz6(unsigned k) { union { unsigned asInt[2]; double asDouble; }; int n; asDouble = (double)k + 0.5; n = 1054 - (asInt[LE] >> 20); return n;}int nlz7(unsigned k) { union { unsigned asInt[2]; double asDouble; }; int n; asDouble = (double)k; n = 1054 - (asInt[LE] >> 20); n = (n & 31) + (n >> 9); return n;} /* In single precision, round-to-nearest mode, the basic method fails for: k = 0, k = 01FFFFFF, 03FFFFFE <= k <= 03FFFFFF, 07FFFFFC <= k <= 07FFFFFF, 0FFFFFF8 <= k <= 0FFFFFFF, ... 7FFFFFC0 <= k <= 7FFFFFFF. FFFFFF80 <= k <= FFFFFFFF. For k = 0 it gives 158, and for the other values it is too low by 1. */int nlz8(unsigned k) { union { unsigned asInt; float asFloat; }; int n; k = k & ~(k >> 1); /* Fix problem with rounding. */ asFloat = (float)k + 0.5f; n = 158 - (asInt >> 23); return n;}/* The example below shows how to make a macro for nlz. It uses anextension to the C and C++ languages that is provided by the GNU C/C++compiler, namely, that of allowing statements and declarations inexpressions (see "Using and Porting GNU CC", by Richard M. Stallman(1998). The underscores are necessary to protect against thepossibility that the macro argument will conflict with one of its localvariables, e.g., NLZ(k). */#define NLZ(kp) \ ({union {unsigned _asInt; float _asFloat;}; \ unsigned _k = (kp), _kk = _k & ~(_k >> 1); \ _asFloat = (float)_kk + 0.5f; \ 158 - (_asInt >> 23);})int nlz8a(unsigned k) { return NLZ(k) + NLZ(-5 + 1);}int nlz9(unsigned k) { union { unsigned asInt; float asFloat; }; int n; k = k & ~(k >> 1); /* Fix problem with rounding. */ asFloat = (float)k; n = 158 - (asInt >> 23); n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */ return n;}/* Below are three nearly equivalent programs for computing the numberof leading zeros in a word. This material is not in HD, but may be in afuture edition. Immediately below is Robert Harley's algorithm, found at thecomp.arch newsgroup entry dated 7/12/96, pointed out to me by NorbertJuffa. Table entries marked "u" are unused. 14 ops including a multiply,plus an indexed load. The smallest multiplier that works is 0x045BCED1 = 17*65*129*513 (allof form 2**k + 1). There are no multipliers of three terms of the form2**k +- 1 that work, with a table size of 64 or 128. There are some,with a table size of 64, if you precede the multiplication with x = x -(x >> 1), but that seems less elegant. There are also some if you use atable size of 256, the smallest is 0x01033CBF = 65*255*1025 (this wouldsave two instructions in the form of this algorithm with themultiplication expanded into shifts and adds, but the table size isgetting a bit large). */#define u 99int nlz10(unsigned x) { static char table[64] = {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; x = x | (x >> 1); // Propagate leftmost x = x | (x >> 2); // 1-bit to the right. x = x | (x >> 4); x = x | (x >> 8); x = x | (x >>16); x = x*0x06EB14F9; // Multiplier is 7*255**3. return table[x >> 26];}/* Harley's algorithm with multiply expanded.19 elementary ops plus an indexed load. */int nlz10a(unsigned x) { static char table[64] = {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u, u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u, 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18, 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u}; x = x | (x >> 1); // Propagate leftmost x = x | (x >> 2); // 1-bit to the right. x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); x = (x << 3) - x; // Multiply by 7. x = (x << 8) - x; // Multiply by 255. x = (x << 8) - x; // Again. x = (x << 8) - x; // Again. return table[x >> 26];}/* Julius Goryavsky's version of Harley's algorithm.17 elementary ops plus an indexed load, if the machinehas "and not." */int nlz10b(unsigned x) { static char table[64] = {32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u, u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u, u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u, 22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31}; x = x | (x >> 1); // Propagate leftmost x = x | (x >> 2); // 1-bit to the right. x = x | (x >> 4); x = x | (x >> 8); x = x & ~(x >> 16); x = x*0xFD7049FF; // Activate this line or the following 3.// x = (x << 9) - x; // Multiply by 511.// x = (x << 11) - x; // Multiply by 2047.// x = (x << 14) - x; // Multiply by 16383. return table[x >> 26];}int errors;void error(int x, int y) { errors = errors + 1; printf("Error for x = %08x, got %d\n", x, y);}int main() { int i, n; static unsigned test[] = {0,32, 1,31, 2,30, 3,30, 4,29, 5,29, 6,29, 7,29, 8,28, 9,28, 16,27, 32,26, 64,25, 128,24, 255,24, 256,23, 512,22, 1024,21, 2048,20, 4096,19, 8192,18, 16384,17, 32768,16, 65536,15, 0x20000,14, 0x40000,13, 0x80000,12, 0x100000,11, 0x200000,10, 0x400000,9, 0x800000,8, 0x1000000,7, 0x2000000,6, 0x4000000,5, 0x8000000,4, 0x0FFFFFFF,4, 0x10000000,3, 0x3000FFFF,2, 0x50003333,1, 0x7FFFFFFF,1, 0x80000000,0, 0xFFFFFFFF,0}; n = sizeof(test)/4; printf("nlz1:\n"); for (i = 0; i < n; i += 2) { if (nlz1(test[i]) != test[i+1]) error(test[i], nlz1(test[i]));} printf("nlz1a:\n"); for (i = 0; i < n; i += 2) { if (nlz1a(test[i]) != test[i+1]) error(test[i], nlz1a(test[i]));} printf("nlz2:\n"); for (i = 0; i < n; i += 2) { if (nlz2(test[i]) != test[i+1]) error(test[i], nlz2(test[i]));} printf("nlz2a:\n"); for (i = 0; i < n; i += 2) { if (nlz2a(test[i]) != test[i+1]) error(test[i], nlz2a(test[i]));} printf("nlz3:\n"); for (i = 0; i < n; i += 2) { if (nlz3(test[i]) != test[i+1]) error(test[i], nlz3(test[i]));} printf("nlz4:\n"); for (i = 0; i < n; i += 2) { if (nlz4(test[i]) != test[i+1]) error(test[i], nlz4(test[i]));} printf("nlz5:\n"); for (i = 0; i < n; i += 2) { if (nlz5(test[i]) != test[i+1]) error(test[i], nlz5(test[i]));} printf("nlz6:\n"); for (i = 0; i < n; i += 2) { if (nlz6(test[i]) != test[i+1]) error(test[i], nlz6(test[i]));} printf("nlz7:\n"); for (i = 0; i < n; i += 2) { if (nlz7(test[i]) != test[i+1]) error(test[i], nlz7(test[i]));} printf("nlz8:\n"); for (i = 0; i < n; i += 2) { if (nlz8(test[i]) != test[i+1]) error(test[i], nlz8(test[i]));} printf("nlz8a:\n"); for (i = 0; i < n; i += 2) { if (nlz8a(test[i]) != test[i+1]) error(test[i], nlz8a(test[i]));} printf("nlz9:\n"); for (i = 0; i < n; i += 2) { if (nlz9(test[i]) != test[i+1]) error(test[i], nlz9(test[i]));} printf("nlz10:\n"); for (i = 0; i < n; i += 2) { if (nlz10(test[i]) != test[i+1]) error(test[i], nlz10(test[i]));} printf("nlz10a:\n"); for (i = 0; i < n; i += 2) { if (nlz10a(test[i]) != test[i+1]) error(test[i], nlz10a(test[i]));} printf("nlz10b:\n"); for (i = 0; i < n; i += 2) { if (nlz10b(test[i]) != test[i+1]) error(test[i], nlz10b(test[i]));} if (errors == 0) printf("Passed all %d cases.\n", sizeof(test)/8);}

转载地址:http://fcohx.baihongyu.com/

你可能感兴趣的文章
ProGuard详解
查看>>
学习如何看懂SQL Server执行计划——基本知识篇
查看>>
哈希查找之链地址法解决冲突(代码封装实现)
查看>>
素数筛选(模板)
查看>>
【转】安卓下微信内置浏览器视频出现解析错误
查看>>
[转载]---HiveQL详解
查看>>
剖析IE浏览器子系统的性能权重,互联网营销
查看>>
MPAA组织遭遇尴尬 网页存在XSS攻击漏洞
查看>>
MySQL表数据迁移自动化
查看>>
腰间盘突出方(刘力红)
查看>>
C#加密解密方法(转)
查看>>
code only 和 code first的关系 !! code only 就是 code first !!
查看>>
WCF入门(九)——未处理异常
查看>>
集合划分问题
查看>>
程序执行vhdl中延时器的编写
查看>>
导致flash屏幕重绘的几种方式及避免重绘的方法
查看>>
解读思维导图(一)误区
查看>>
[2014AMC]Navier-Stokes equations with regularity in two entries of the velocity gradient tensor
查看>>
java多线程:ReentrantReadWriteLock读写锁使用
查看>>
salesforce 零基础开发入门学习(三)sObject简单介绍以及简单DML操作(SOQL)
查看>>