/*
Copyright (C) 2015 John Tse

This file is part of Libknit.

Libknit is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Libknit is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Libknit.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#define F(X,Y,Z) ((X & Y) | ((~X) & Z))
#define G(X,Y,Z) ((X & Z) | (Y & (~Z)))
#define H(X,Y,Z) (X ^ Y ^ Z)
#define I(X,Y,Z) (Y ^ (X | (~Z)))

#define rol32(X,Y) ((X << Y) | (X >> (32 - Y)))

uint32_t T[] = {
	0,
	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
	0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
	0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
	0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
	0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
	0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
	0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
	0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
	0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
	0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
	0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
	0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
	0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
};

static uint8_t * Append(uint8_t *t, uint64_t b, uint32_t *N) {
	uint32_t n;
	uint8_t *m;

	n = b + (64 - ((b - 56) % 64)) + 8;

	m = calloc(n, 1);

	memcpy(m, t, b);

	m[b] = 0x80;

	b *= 8;

	for (uint16_t i = 8, j = 0; i > 0; i--, j += 8)
		m[n - i] = (uint8_t) (b >> j) & 0xff;

	*N = n / 4;

	return m;
}

uint8_t * md5(uint8_t *m, uint64_t b) {
	uint32_t N;
	uint32_t *M;
	uint32_t X[16];
	uint32_t A, B, C, D;
	uint32_t AA, BB, CC, DD;
	uint32_t *MD[4];
	uint8_t *h;

	m = Append(m, b, &N);

	M = calloc(N, 4);

	for (uint32_t i = 0, j = 0; i < N; i++, j += 4)
		M[i] = (uint32_t) ((int32_t) m[j + 3] << 24) | ((int32_t) m[j + 2] << 16) | ((int32_t) m[j + 1] << 8) | ((int32_t) m[j]);

	A = 0x67452301;
	B = 0xefcdab89;
	C = 0x98badcfe;
	D = 0x10325476;

	MD[0] = &A;
	MD[1] = &B;
	MD[2] = &C;
	MD[3] = &D;

	for (uint32_t i = 0; i < N / 16; i++) {
		for (uint8_t j = 0; j < 16; j++)
			X[j] = M[i * 16 + j];

		AA = A;
		BB = B;
		CC = C;
		DD = D;

		// Round 1
		A = B + rol32((A + F(B,C,D) + X[0] + T[1]), 7);
		D = A + rol32((D + F(A,B,C) + X[1] + T[2]), 12);
		C = D + rol32((C + F(D,A,B) + X[2] + T[3]), 17);
		B = C + rol32((B + F(C,D,A) + X[3] + T[4]), 22);

		A = B + rol32((A + F(B,C,D) + X[4] + T[5]), 7);
		D = A + rol32((D + F(A,B,C) + X[5] + T[6]), 12);
		C = D + rol32((C + F(D,A,B) + X[6] + T[7]), 17);
		B = C + rol32((B + F(C,D,A) + X[7] + T[8]), 22);

		A = B + rol32((A + F(B,C,D) + X[8]  + T[9]),  7);
		D = A + rol32((D + F(A,B,C) + X[9]  + T[10]), 12);
		C = D + rol32((C + F(D,A,B) + X[10] + T[11]), 17);
		B = C + rol32((B + F(C,D,A) + X[11] + T[12]), 22);

		A = B + rol32((A + F(B,C,D) + X[12] + T[13]), 7);
		D = A + rol32((D + F(A,B,C) + X[13] + T[14]), 12);
		C = D + rol32((C + F(D,A,B) + X[14] + T[15]), 17);
		B = C + rol32((B + F(C,D,A) + X[15] + T[16]), 22);

		// Round 2
		A = B + rol32((A + G(B,C,D) + X[1]  + T[17]), 5);
		D = A + rol32((D + G(A,B,C) + X[6]  + T[18]), 9);
		C = D + rol32((C + G(D,A,B) + X[11] + T[19]), 14);
		B = C + rol32((B + G(C,D,A) + X[0]  + T[20]), 20);

		A = B + rol32((A + G(B,C,D) + X[5]  + T[21]), 5);
		D = A + rol32((D + G(A,B,C) + X[10] + T[22]), 9);
		C = D + rol32((C + G(D,A,B) + X[15] + T[23]), 14);
		B = C + rol32((B + G(C,D,A) + X[4]  + T[24]), 20);

		A = B + rol32((A + G(B,C,D) + X[9]  + T[25]), 5);
		D = A + rol32((D + G(A,B,C) + X[14] + T[26]), 9);
		C = D + rol32((C + G(D,A,B) + X[3]  + T[27]), 14);
		B = C + rol32((B + G(C,D,A) + X[8]  + T[28]), 20);

		A = B + rol32((A + G(B,C,D) + X[13] + T[29]), 5);
		D = A + rol32((D + G(A,B,C) + X[2]  + T[30]), 9);
		C = D + rol32((C + G(D,A,B) + X[7]  + T[31]), 14);
		B = C + rol32((B + G(C,D,A) + X[12] + T[32]), 20);

		// Round 3
		A = B + rol32((A + H(B,C,D) + X[5]  + T[33]), 4);
		D = A + rol32((D + H(A,B,C) + X[8]  + T[34]), 11);
		C = D + rol32((C + H(D,A,B) + X[11] + T[35]), 16);
		B = C + rol32((B + H(C,D,A) + X[14] + T[36]), 23);

		A = B + rol32((A + H(B,C,D) + X[1]  + T[37]), 4);
		D = A + rol32((D + H(A,B,C) + X[4]  + T[38]), 11);
		C = D + rol32((C + H(D,A,B) + X[7]  + T[39]), 16);
		B = C + rol32((B + H(C,D,A) + X[10] + T[40]), 23);

		A = B + rol32((A + H(B,C,D) + X[13] + T[41]), 4);
		D = A + rol32((D + H(A,B,C) + X[0]  + T[42]), 11);
		C = D + rol32((C + H(D,A,B) + X[3]  + T[43]), 16);
		B = C + rol32((B + H(C,D,A) + X[6]  + T[44]), 23);

		A = B + rol32((A + H(B,C,D) + X[9]  + T[45]), 4);
		D = A + rol32((D + H(A,B,C) + X[12] + T[46]), 11);
		C = D + rol32((C + H(D,A,B) + X[15] + T[47]), 16);
		B = C + rol32((B + H(C,D,A) + X[2]  + T[48]), 23);

		// Round 4
		A = B + rol32((A + I(B,C,D) + X[0]  + T[49]), 6);
		D = A + rol32((D + I(A,B,C) + X[7]  + T[50]), 10);
		C = D + rol32((C + I(D,A,B) + X[14] + T[51]), 15);
		B = C + rol32((B + I(C,D,A) + X[5]  + T[52]), 21);

		A = B + rol32((A + I(B,C,D) + X[12] + T[53]), 6);
		D = A + rol32((D + I(A,B,C) + X[3]  + T[54]), 10);
		C = D + rol32((C + I(D,A,B) + X[10] + T[55]), 15);
		B = C + rol32((B + I(C,D,A) + X[1]  + T[56]), 21);

		A = B + rol32((A + I(B,C,D) + X[8]  + T[57]), 6);
		D = A + rol32((D + I(A,B,C) + X[15] + T[58]), 10);
		C = D + rol32((C + I(D,A,B) + X[6]  + T[59]), 15);
		B = C + rol32((B + I(C,D,A) + X[13] + T[60]), 21);

		A = B + rol32((A + I(B,C,D) + X[4]  + T[61]), 6);
		D = A + rol32((D + I(A,B,C) + X[11] + T[62]), 10);
		C = D + rol32((C + I(D,A,B) + X[2]  + T[63]), 15);
		B = C + rol32((B + I(C,D,A) + X[9]  + T[64]), 21);

		A = A + AA;
		B = B + BB;
		C = C + CC;
		D = D + DD;
	}

	free(M);

	h = calloc(16, 1);

	for (uint8_t i = 0; i < 4; i++)
		for (uint8_t j = 0; j < 4; j++)
			h[i * 4 + j] = (uint8_t) (*MD[i] >> (j * 8)) & 0xff;

	return h;
}
