首页 > 编程语言 >搜索引擎的那些事(32位MD5算法)

搜索引擎的那些事(32位MD5算法)

时间:2022-11-23 13:07:16浏览次数:56  
标签:F1 MD5STEP 32 ctx unsigned 搜索引擎 char buf MD5



    对于学过密码学的同学来说,md5算法肯定不会很陌生。但是,对于我来说,md5是一个新的命题。那什么是md5呢?md5就是对已有的数据进行加密处理。当然,它还有别的用处,什么呢?比如说,可以验证下载的软件是否完整,可以验证保存的字符串是否发生重名等等。我在这里提到这个算法,主要是为了后面一个目的,防止url重名使用的。整个算法的内容其实比较复杂的,我们自己只要学会使用就可以了。这里使用的就两个文件,一个是md5.h,另外一个是md5.c,头文件内容如下所示,

/* See md5.c for explanation and copyright information.  */

#ifndef MD5_H
#define MD5_H

/* Unlike previous versions of this code, uint32 need not be exactly
32 bits, merely 32 bits or more. Choosing a data type which is 32
bits instead of 64 is not important; speed is considerably more
important. ANSI guarantees that "unsigned long" will be big enough,
and always using it seems to have few disadvantages. */
typedef unsigned long uint32;

struct MD5Context {
uint32 buf[4];
uint32 bits[2];
unsigned char in[64];
};

void MD5Init(struct MD5Context *context);
void MD5Update(struct MD5Context *context, unsigned char const *buf, unsigned len);
void MD5Final(unsigned char digest[16], struct MD5Context *context);
void MD5Transform(uint32 buf[4], const unsigned char in[64]);

/*
* This is needed to make RSAREF happy on some MS-DOS compilers.
*/
typedef struct MD5Context MD5_CTX;

#endif /* !MD5_H */

    源文件内容如下所示,


/*
* This code implements the MD5 message-digest algorithm.
* The algorithm is due to Ron Rivest. This code was
* written by Colin Plumb in 1993, no copyright is claimed.
* This code is in the public domain; do with it what you wish.
*
* Equivalent code is available from RSA Data Security, Inc.
* This code has been tested against that, and is equivalent,
* except that you don't need to include two pages of legalese
* with every copy.
*
* To compute the message digest of a chunk of bytes, declare an
* MD5Context structure, pass it to MD5Init, call MD5Update as
* needed on buffers full of bytes, and then call MD5Final, which
* will fill a supplied 16-byte array with the digest.
*/

/* This code was modified in 1997 by Jim Kingdon of Cyclic Software to
not require an integer type which is exactly 32 bits. This work
draws on the changes for the same purpose by Tatu Ylonen
<[email protected]> as part of SSH, but since I didn't actually use
that code, there is no copyright issue. I hereby disclaim
copyright in any changes I have made; this code remains in the
public domain. */

#include <stdio.h>
#include <memory.h>
#include <string.h>
#include <sys/types.h>

#include "md5.h"

/* Little-endian byte-swapping routines. Note that these do not
depend on the size of datatypes such as uint32, nor do they require
us to detect the endianness of the machine we are running on. It
is possible they should be macros for speed, but I would be
surprised if they were a performance bottleneck for MD5. */

static uint32
getu32 (addr)
const unsigned char *addr;
{
return (((((unsigned long)addr[3] << 8) | addr[2]) << 8)
| addr[1]) << 8 | addr[0];
}

static void
putu32 (data, addr)
uint32 data;
unsigned char *addr;
{
addr[0] = (unsigned char)data;
addr[1] = (unsigned char)(data >> 8);
addr[2] = (unsigned char)(data >> 16);
addr[3] = (unsigned char)(data >> 24);
}

/*
* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
* initialization constants.
*/
void
MD5Init(ctx)
struct MD5Context *ctx;
{
ctx->buf[0] = 0x67452301;
ctx->buf[1] = 0xefcdab89;
ctx->buf[2] = 0x98badcfe;
ctx->buf[3] = 0x10325476;

ctx->bits[0] = 0;
ctx->bits[1] = 0;
}

/*
* Update context to reflect the concatenation of another buffer full
* of bytes.
*/
void
MD5Update(ctx, buf, len)
struct MD5Context *ctx;
unsigned char const *buf;
unsigned len;
{
uint32 t;

/* Update bitcount */

t = ctx->bits[0];
if ((ctx->bits[0] = (t + ((uint32)len << 3)) & 0xffffffff) < t)
ctx->bits[1]++; /* Carry from low to high */
ctx->bits[1] += len >> 29;

t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */

/* Handle any leading odd-sized chunks */

if ( t ) {
unsigned char *p = ctx->in + t;

t = 64-t;
if (len < t) {
memcpy(p, buf, len);
return;
}
memcpy(p, buf, t);
MD5Transform(ctx->buf, ctx->in);
buf += t;
len -= t;
}

/* Process data in 64-byte chunks */

while (len >= 64) {
memcpy(ctx->in, buf, 64);
MD5Transform(ctx->buf, ctx->in);
buf += 64;
len -= 64;
}

/* Handle any remaining bytes of data. */

memcpy(ctx->in, buf, len);
}

/*
* Final wrapup - pad to 64-byte boundary with the bit pattern
* 1 0* (64-bit count of bits processed, MSB-first)
*/
void
MD5Final(digest, ctx)
unsigned char digest[16];
struct MD5Context *ctx;
{
unsigned count;
unsigned char *p;

/* Compute number of bytes mod 64 */
count = (ctx->bits[0] >> 3) & 0x3F;

/* Set the first char of padding to 0x80. This is safe since there is
always at least one byte free */
p = ctx->in + count;
*p++ = 0x80;

/* Bytes of padding needed to make 64 bytes */
count = 64 - 1 - count;

/* Pad out to 56 mod 64 */
if (count < 8) {
/* Two lots of padding: Pad the first block to 64 bytes */
memset(p, 0, count);
MD5Transform(ctx->buf, ctx->in);

/* Now fill the next block with 56 bytes */
memset(ctx->in, 0, 56);
} else {
/* Pad block to 56 bytes */
memset(p, 0, count-8);
}

/* Append length in bits and transform */
putu32(ctx->bits[0], ctx->in + 56);
putu32(ctx->bits[1], ctx->in + 60);

MD5Transform(ctx->buf, ctx->in);
putu32(ctx->buf[0], digest);
putu32(ctx->buf[1], digest + 4);
putu32(ctx->buf[2], digest + 8);
putu32(ctx->buf[3], digest + 12);
memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */
}

#ifndef ASM_MD5

/* The four core functions - F1 is optimized somewhat */

/* #define F1(x, y, z) (x & y | ~x & z) */
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))

/* This is the central step in the MD5 algorithm. */
#define MD5STEP(f, w, x, y, z, data, s) \
( w += f(x, y, z) + data, w &= 0xffffffff, w = w<<s | w>>(32-s), w += x )

/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
void
MD5Transform(buf, inraw)
uint32 buf[4];
const unsigned char inraw[64];
{
register uint32 a, b, c, d;
uint32 in[16];
int i;

for (i = 0; i < 16; ++i)
in[i] = getu32 (inraw + 4 * i);

a = buf[0];
b = buf[1];
c = buf[2];
d = buf[3];

MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7);
MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12);
MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17);
MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22);
MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7);
MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12);
MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17);
MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22);
MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7);
MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12);
MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17);
MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22);
MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7);
MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12);
MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17);
MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22);

MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5);
MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9);
MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14);
MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20);
MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5);
MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9);
MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14);
MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20);
MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5);
MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9);
MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14);
MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20);
MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5);
MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9);
MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14);
MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20);

MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4);
MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11);
MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16);
MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23);
MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4);
MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11);
MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16);
MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23);
MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4);
MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11);
MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16);
MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23);
MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4);
MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11);
MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16);
MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23);

MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6);
MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10);
MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15);
MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21);
MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6);
MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10);
MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15);
MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21);
MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6);
MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10);
MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15);
MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21);
MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6);
MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10);
MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15);
MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21);

buf[0] += a;
buf[1] += b;
buf[2] += c;
buf[3] += d;
}
#endif

/* Simple test program. Can use it to manually run the tests from
RFC1321 for example. */

int
main (int argc, char **argv)
{
struct MD5Context context;
unsigned char checksum[16];
int i;

if (argc < 2)
{
fprintf (stderr, "usage: %s string-to-hash\n", argv[0]);
exit (1);
}

printf ("MD5 (\"%s\") = ", argv[1]);
MD5Init (&context);
MD5Update (&context, argv[1], strlen (argv[1]));
MD5Final (checksum, &context);
for (i = 0; i < 16; i++)
{
printf ("%02x", (unsigned int) checksum[i]);
}
printf ("\n");

return 0;
}

    整个程序使用也非常简单,就是md5 + 加密字符串。比如说md5  

​www.163.com​​,那么加密得到的数字就是cc4e427b15d6ddd711b304a48f80bdbe。为了验证我们的算法是否正确,可以寻找一个md5在线加密网站,输入加密的字符串数据,看看生成的字符数字是不是这个。






标签:F1,MD5STEP,32,ctx,unsigned,搜索引擎,char,buf,MD5
From: https://blog.51cto.com/feixiaoxing/5881000

相关文章

  • 搜索引擎的那些事(web遍历)
      写搜索引擎对我来说是一件有趣的事情,做的多好谈不上,但是至少可以一步一步做出来。当然做的怎么样,还得大家来判断了。在开始今天的话题之前,我们可以扯一些别的东西。......
  • 搜索引擎的那些事(中文分词)
      前面,我们在介绍搜索引擎的时候也谈到过中文分词。和英文不一样,中文上所有的汉字都是连在一起的,所以我们的一项工作就是把这些词语拆分成一个一个词组。因为只有这样才......
  • 搜索引擎的那些事(多线程web遍历)
       上面一篇博客当中,我们可以利用单一的线程完成网页的下载。今天,我们打算在此基础上完成多线程的访问和加载操作。使用多线程,倒不是因为这项技术有多牛,主要是因为我们......
  • 搜索引擎的那些事(title信息提取)
        前面我们在谈到搜索引擎的时候,说到了网页下载、说到了分词、说到了多线程。但是,我们要清楚这一切的目的都是为了在网页中获得重要的信息。如何从网页或者从链接中......
  • 132令非终结符B表示布尔表达式,为B设置两个继承属性 true和 false。B.true是一个地址,地
      关键:true和false为继承属性,语义动作必须出现在非终结符的右边   ......
  • CodeForces - 320E Kalila and Dimna in the Logging Industry
    题意:你有要拿一把锯子砍树。锯子有有电和没电两个状态,只有在有电的时候才能工作,每次工作都可以砍1单位高度的树,然后就会没电。没电后要充电才能工作。充电有代价,代价为,当前......
  • STM32CubeMX之串口配置
    STM32CubeMX之串口配置  串口通信是一种设备间非常常用的​​串行通信​​方式,以比特位的形式发送或接收数据,电子工程师经常使用这种方式来调试数据。本章节我们将学习串......
  • [Leetcode Weekly Contest]320
    链接:LeetCode[Leetcode]2475.数组中不等三元组的数目给你一个下标从0开始的正整数数组nums。请你找出并统计满足下述条件的三元组(i,j,k)的数目:0<=i<j<......
  • BZOJ1036-[ZJOI2008]树的统计Count&&POJ3237-Tree
    为什么把这两题放在一起呢,因为这两题其实是一道题,只是输入顺序不一样。这里主要以BZOJ1036为例。1036:[ZJOI2008]树的统计CountTimeLimit: 10Sec  MemoryLimit: ......
  • BZOJ3223-Tyvj 1729 文艺平衡树
    Description您需要写一种数据结构(可参考题目标题),来维护一个有序数列,其中需要提供以下操作:翻转一个区间,例如原有序序列是5 4 3 2 1,翻转区间是[2,4]的话,结果是5 2 3 4......