#include "stdafx.h"
#include <nmmintrin.h>
/*
 *通过移位计算1的个数
 *每一位都要判断和处理
 */
int BitCount(unsigned int n)
{
	unsigned int c = 0;
	while (n >0)
	{
		if ((n & 1) == 1)++c;
		n >>= 1;
	}
	return c;
}
/*
 *通过减法及位运算,保证每次至少消除一个1
 *只处理1的位,0的位不处理
 */
int BitCountWithMinus(unsigned int n)
{
	unsigned int c = 0;
	for (c = 0; n; ++c)
	{
		n &= (n - 1);
	}
	return c;
}
/*
*将32位数字,截为4个8位数
*通过查表,得到每个8位数中1的个数,然后求和
*/
int BitCountLUT8(unsigned int n)
{
	unsigned char BitsSetTable256[256] = { 0 };
	for (int i = 0; i <256; i++)
	{
		BitsSetTable256[i] = (i & 1) + BitsSetTable256[i / 2];
	}
	unsigned int c = 0;
	unsigned char* p = (unsigned char*)&n;
	c = BitsSetTable256[p[0]] +
		BitsSetTable256[p[1]] +
		BitsSetTable256[p[2]] +
		BitsSetTable256[p[3]];
	return c;
}
/*
*将32位数字,截为4个8位数
*通过查表,得到每个8位数中1的个数,然后求和
*LUT表已经计算好
*/
int BitCountLUT8Static(unsigned int n)
{
	unsigned int table[256] =
	{
		0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
		1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
		1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
		1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
		2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
		3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
		3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
		4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
	};
	return table[n & 0xff] +
		table[(n >> 8) & 0xff] +
		table[(n >> 16) & 0xff] +
		table[(n >> 24) & 0xff];
}
/*
*将32位数字,截为8个4位数
*通过查表,得到每个4位数中1的个数,然后求和
*LUT表已经计算好
*/
int BitCountLUT4Static(unsigned int n)
{
	unsigned int table[16] =
	{
		0, 1, 1, 2,
		1, 2, 2, 3,
		1, 2, 2, 3,
		2, 3, 3, 4
	};
	unsigned int count = 0;
	while (n)
	{
		count += table[n & 0xf];
		n >>= 4;
	}
	return count;
}
/*
*将32位数字,相邻的2位求和,然后相邻的4位求和
*然后相邻的8位、16位、32位求和
*/
int BitCountParallel(unsigned int n)
{
	n = (n & 0x55555555) + ((n >> 1) & 0x55555555);
	n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
	n = (n & 0x0f0f0f0f) + ((n >> 4) & 0x0f0f0f0f);
	n = (n & 0x00ff00ff) + ((n >> 8) & 0x00ff00ff);
	n = (n & 0x0000ffff) + ((n >> 16) & 0x0000ffff);
	return n;
}
/*
*第一行,计算每三位的1的个数(其实11*3是33位,但最高一位可以假设为0,所以没问题)
*第二行,实际上是先计算了相邻6位中1的个数(不会产生进位,结果最多占到3位),并将前三位置为0
*然后通过取模,相当于将每6位数字做了加法
*/
int BitCountMagic(unsigned int n)
{
	unsigned int tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
	return ((tmp + (tmp >> 3)) & 030707070707) % 63;
}
unsigned int n = 127;
unsigned int bitCount = _mm_popcnt_u32(n);
/*
*将8位数字,转换为MY_UNSIGHED_CHAR结构体,然后求和
*/
struct MY_UNSIGHED_CHAR
{
	unsigned a : 1;
	unsigned b : 1;
	unsigned c : 1;
	unsigned d : 1;
	unsigned e : 1;
	unsigned f : 1;
	unsigned g : 1;
	unsigned h : 1;
};
long BitCountStuct(unsigned char b)
{
	struct MY_UNSIGHED_CHAR *by = (struct MY_UNSIGHED_CHAR*)&b;
	return (by->a + by->b + by->c + by->d + by->e + by->f + by->g + by->h);
}
int _tmain(int argc, _TCHAR* argv[])
{
	printf("There are %d 1 in 133\n", BitCount(133));
	printf("There are %d 1 in 133\n", BitCountWithMinus(133));
	printf("There are %d 1 in 133\n", BitCountLUT8(133));
	printf("There are %d 1 in 133\n", BitCountLUT8Static(133));
	printf("There are %d 1 in 133\n", BitCountLUT4Static(133));
	printf("There are %d 1 in 133\n", BitCountParallel(133));
	printf("There are %d 1 in 133\n", BitCountMagic(133));
	printf("There are %d 1 in 133\n", BitCountStuct(133));
	return 0;
}