/*
--             This file is part of the New World OS project
--                 Copyright (C) 2006-2008  QRW Software
--           J. Scott Edwards - j.scott.edwards.nwos@gmail.com 
--                      http://www.qrwsoftware.com
--                      http://nwos.sourceforge.com
--
--   This program is free software: you can redistribute it and/or modify
--   it under the terms of the GNU General Public License as published by
--   the Free Software Foundation, either version 3 of the License, or
--   (at your option) any later version.
--
--   This program is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--   GNU General Public License for more details.
--
--   You should have received a copy of the GNU General Public License
--   along with this program, in the file LICENSE.  If not, see 
--   <http://www.gnu.org/licenses/>.
--
--   You can also contact me via paper mail at:
--
--      QRW Software
--      P.O. Box 27511
--      Salt Lake City, UT 84127-0511, USA.
--
--
-- $Log: compress_sparse.c,v $
-- Revision 1.21  2008/02/03 01:15:40  jsedwards
-- Change to use nwos_get_private_objects_path function instead of DEFAULT_FILE.
--
-- Revision 1.20  2007/08/10 00:03:35  jsedwards
-- Removed defintion of _LARGEFILE64_SOURCE, now using _FILE_OFFSET_BITS=64.
-- Also removed using O_LARGEFILE from open call.
--
-- Revision 1.19  2007/08/02 18:51:42  jsedwards
-- Change to use the new index in the chunk_info table to compute the address
-- of the chunk in disk storage.
--
-- Revision 1.18  2007/07/15 18:32:34  jsedwards
-- Fix screwup on previous checkin (left function bswap_uint32 in).
--
-- Revision 1.17  2007/07/15 17:36:25  jsedwards
-- Changed to use WORDS_BIGENDING instead of __BYTE_ORDER == __LITTLE_ENDIAN
-- to determine endianess and byteswap_uint16 and 32 functions in objectify.h
-- instead of bswap_16 and 32 to make more platform independant.
--
-- Revision 1.16  2007/07/01 19:44:11  jsedwards
-- Upgrade to GPLv3.
--
-- Revision 1.15  2007/06/28 18:50:25  jsedwards
-- Modified for 0023 disk layout.
--
-- Revision 1.14  2007/06/21 16:25:24  jsedwards
-- Changed so that if you don't specify an output file it just computes the
-- MD5 and SHA1 checksums and outputs them.
--
-- Revision 1.13  2007/06/21 15:12:51  jsedwards
-- Added calculation of MD5 and SHA1 checksums of output file.
--
-- Revision 1.12  2007/06/20 00:29:24  jsedwards
-- Include block_offset_to_chunks in calculations so all chunks get stored.
--
-- Revision 1.11  2007/06/19 18:58:53  jsedwards
-- Remove stuff for public blocks because they are now stored separately from
-- the private objects.
--
-- Revision 1.10  2007/03/03 13:46:25  jsedwards
-- Added code to keep a count of distribution of disk blocks over the 32-bit
-- range and print them at the end.  This shows how the randomness is
-- distributed.
--
-- Revision 1.9  2007/02/11 15:15:20  jsedwards
-- Change 'sprintf' calls to 'snprintf' calls so the OpenBSD linker will stop
-- whining.
--
-- Revision 1.8  2007/02/11 14:41:26  jsedwards
-- Change all 'off64_t' and 'lseek64' references to 'off_t' and 'lseek',
-- because BSD doesn't dig the whole brain damaged 64 bit thing.
--
-- Revision 1.7  2007/01/09 13:17:02  jsedwards
-- Fix indexes when printing version string.
--
-- Revision 1.6  2007/01/09 13:07:13  jsedwards
-- Change to use Disk_Header structure.
--
-- Revision 1.5  2006/11/11 12:01:01  jsedwards
-- Update e-mail address to something that works.
--
-- Revision 1.4  2006/11/06 13:52:54  jsedwards
-- Changed to skip over public blocks for now.
--
-- Revision 1.3  2006/11/02 11:49:28  jsedwards
-- Fixed all cases where 'z' was used as a format for 'off64_t' values because
-- the older compiler complains.
--
-- Revision 1.2  2006/10/26 01:51:23  jsedwards
-- Merged alpha_05_branch back into main trunk.
--
-- Revision 1.1.2.8  2006/10/22 12:45:21  jsedwards
-- Change to use the number of blocks stored on disk instead of
-- BLOCKS_ON_DISK #define.
--
-- Revision 1.1.2.7  2006/10/22 12:40:47  jsedwards
-- Corrected version string error message.
--
-- Revision 1.1.2.6  2006/10/19 01:42:47  jsedwards
-- Fixed format specifiers for uint32, which is now an int instead of a long,
-- and off64_t.
--
-- Revision 1.1.2.5  2006/10/15 16:31:42  jsedwards
-- Changed to use block maps to find blocks to write instead of scanning
-- the entire drive.  Time reduced from 87 to 63 minutes.
--
-- Revision 1.1.2.4  2006/10/15 12:10:08  jsedwards
-- Change to read an entire chunk at a time and to skip over the block maps.
--
-- Revision 1.1.2.3  2006/09/19 14:11:57  jsedwards
-- Added printing of number of blocks.
--
-- Revision 1.1.2.2  2006/09/17 13:52:11  jsedwards
-- Fix argument count bug and open file read only instead of read-write.
--
-- Revision 1.1.2.1  2006/09/17 13:21:39  jsedwards
-- Program to compress the sparse objectify file into a non-sparse file.
--
*/


#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>

#include "md5/md5.h"
#include "objectify_private.h"
#include "sha1/sha1.h"


static void print_usage(char *program)
{
    fprintf(stderr, "usage: %s [output-file]\n", program);
    fprintf(stderr, " if no output file is specified it just outputs the checksums.\n");
}


#define SIZE_COUNTS 16

int main(int argc, char* argv[])
{
    int obj_file_desc;
    const char* obj_file_path;
    off_t chunk;
    uint8 block_map[BIT_MAP_BYTES];
    uint8 block[FILE_BLOCK_SIZE];
    int i;
    size_t bytes_read;
    FILE* ofp = NULL;
    int num_blocks;
    uint32 total_private_blocks;
    uint32 used_private_blocks;
    uint32 chunk_block_offset;
    uint32 blocks_on_disk;
    char msg[128];
    Disk_Header disk_header;
    uint32 counts[SIZE_COUNTS];
    uint32 ref;
    MD5_CTX md5_context;    /* MD5 checksum context */
    struct sha1_ctx sha1_context;
    uint8 md5_digest[16];
    uint8 sha1_digest[20];
    int chunk_num;
    uint32 used_chunks;
    Chunk_Info* chunk_info;


    for (i = 0; i < SIZE_COUNTS; i++) counts[i] = 0;

    if (argc > 2)
    {
	print_usage(argv[0]);
	exit(1);
    }

    if (argc == 2 && *argv[1] == '-')
    {
	fprintf(stderr, "error: this program doesn't have any options\n");
	print_usage(argv[0]);
	exit(1);
    }


    /* Open the storage drive and verify the header info */

    nwos_log_arguments(argc, argv);

    obj_file_path = nwos_get_private_objects_path();

    obj_file_desc = open(obj_file_path, O_RDONLY);

    if (obj_file_desc < 0)
    {
	perror(obj_file_path);
	exit(1);
    }

    bytes_read = read(obj_file_desc, block, sizeof(block));

    if (bytes_read != sizeof(block))
    {
	perror("reading first block");
	exit(1);
    }

    if (memcmp(&block[0], MAGIC_NUMBER, 4) != 0)
    {
	fprintf(stderr, "Missing magic number in disk header\n");
	exit(1);
    }

    if (memcmp(&block[4], VERSION_STRING, 4) != 0)
    {
	fprintf(stderr, "Incorrect version string in disk header\n");
	exit(1);
    }

    memcpy(&disk_header, block, sizeof(disk_header));

    nwos_4_uint8_to_uint32(disk_header.total_blocks, &total_private_blocks);
    nwos_4_uint8_to_uint32(disk_header.used_blocks, &used_private_blocks);
    nwos_4_uint8_to_uint32(disk_header.block_offset_to_chunks, &chunk_block_offset);
    nwos_4_uint8_to_uint32(disk_header.used_chunks, &used_chunks);

    assert(used_chunks > 0);

    chunk_info = malloc(used_chunks * sizeof(Chunk_Info));
    assert(chunk_info != NULL);

    bytes_read = read(obj_file_desc, chunk_info, used_chunks * sizeof(Chunk_Info));

    if (bytes_read != used_chunks * sizeof(Chunk_Info))
    {
	perror("reading chunk info");
	exit(1);
    }

    /* fix the byte order on little endian machines */
#ifndef WORDS_BIGENDIAN
	{
	  int i;
	  for (i = 0; i < used_chunks; i++)
	  {
	      chunk_info[i].ref = byteswap_uint32(chunk_info[i].ref);
	      chunk_info[i].used = byteswap_uint16(chunk_info[i].used);
	      chunk_info[i].index = byteswap_uint16(chunk_info[i].index);
	  }
	}
#endif

    blocks_on_disk = total_private_blocks; 

    if (argc == 2)
    {
	ofp = fopen(argv[1], "w");

	if (ofp == NULL)
	{
	    perror(argv[1]);
	    exit(1);
	}
    }

    MD5Init(&md5_context);   /* initialize the MD5 checksum context */
    sha1_init_ctx(&sha1_context);

    printf("header: %c%c%c%c %c%c%c%c\n",
	   disk_header.magic_number[0], disk_header.magic_number[1], disk_header.magic_number[2], disk_header.magic_number[3], 
	   disk_header.version_string[0], disk_header.version_string[1], disk_header.version_string[2], disk_header.version_string[3]);

    printf("total blocks on disk: %08u  chunks_used: %d\n", blocks_on_disk, used_chunks);
    fflush(stdout);

    /* write the first 256 bytes always */
    if (ofp != NULL && fwrite(block, 1, sizeof(block), ofp) != sizeof(block))
    {
	perror(argv[1]);
	close(obj_file_desc);
	exit(1);
    }

    MD5Update(&md5_context, block, (unsigned)sizeof(block));    /* include this data in the md5 checksum */
    sha1_process_bytes(block, sizeof(block), &sha1_context);    /* include this data in the sha1 checksum */

    num_blocks = 0;

    // for now we can skip over public blocks because they should always stay the same
    for (chunk_num = 0; chunk_num < used_chunks; chunk_num++)
    {
	chunk = chunk_block_offset + chunk_info[chunk_num].index * BLOCKS_IN_CHUNK;

	if (lseek(obj_file_desc, chunk << 8, SEEK_SET) < 0)
	{
	    snprintf(msg, sizeof(msg), "lseek chunk:%08x", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	bytes_read = read(obj_file_desc, block_map, sizeof(block_map));

	if (bytes_read != sizeof(block_map))
	{
	    snprintf(msg, sizeof(msg), "reading block map: %u", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	/* scan block map (skip over the blocks for the block map itself) */
	for (i = BIT_MAP_BLOCKS; i < BLOCKS_IN_CHUNK; i++)
	{
	    if ((block_map[i/8] & (0x80 >> (i%8))) != 0)
	    {
		if (lseek(obj_file_desc, (chunk + i) << 8, SEEK_SET) < 0)
		{
		    snprintf(msg, sizeof(msg), "lseek block:%08x", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}

		bytes_read = read(obj_file_desc, block, sizeof(block));

		if (bytes_read != sizeof(block))
		{
		    snprintf(msg, sizeof(msg), "reading block: %u", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}
		
		ref = (uint32)block[4] << 24 | (uint32)block[5] << 16 | (uint32)block[6] << 8 | (uint32)block[7];

		printf("id: %08x  block: %08x\n", ref, (uint32)chunk+i);
	     /* printf("id: %08x\n", ref); */
		fflush(stdout);

		counts[ref >> 28]++;

		if (ofp != NULL && fwrite(block, 1, sizeof(block), ofp) != sizeof(block))
		{
		    perror(argv[1]);
		    close(obj_file_desc);
		    exit(1);
		}

		MD5Update(&md5_context, block, (unsigned)sizeof(block));    /* include this data in the md5 checksum */
		sha1_process_bytes(block, sizeof(block), &sha1_context);    /* include this data in the sha1 checksum */
		num_blocks++;
	    }
	}
    }

    printf("Number of blocks: %d\n", num_blocks);

    MD5Final(md5_digest, &md5_context);   /* finish computing the md5 sum */
    sha1_finish_ctx(&sha1_context, sha1_digest);

    printf("MD5: ");
    for (i = 0; i < sizeof(md5_digest); i++) printf("%02x", md5_digest[i]);
    printf("\n");

    printf("SHA1: ");
    for (i = 0; i < sizeof(sha1_digest); i++) printf("%02x", sha1_digest[i]);
    printf("\n");

    for (i = 0; i < SIZE_COUNTS; i++)
    {
	if (counts[i] > 0) printf("  %d: %u\n", i, counts[i]);
    }

    if (ofp != NULL && fclose(ofp) != 0)
    {
	perror(argv[1]);
	exit(1);
    }

    close(obj_file_desc);

    return 0;
}

