/*             This file is part of the New World OS project
--                   Copyright (C) 2006  QRW Software
--           J. Scott Edwards - j.scott.edwards.nwos@gmail.com 
--                      http://www.qrwsoftware.com
--                      http://nwos.sourceforge.com
--
-- NWOS is free software;  you can redistribute it and/or modify it under the
-- terms of the GNU General Public License  as published by the Free Software
-- Foundation; either version 2, or (at your option) any later version.  This
-- software is distributed with the hope that it will be useful,  but WITHOUT
-- ANY WARRANTY;  without  even the  implied warranty  of MERCHANTABILITY  or
-- FITNESS FOR A PARTICULAR PURPOSE.   See the GNU General Public License for
-- more  details.  You should have received a copy  of the GNU General Public
-- License along with this package;  see the file LICENSE.  If not, write to:
--
--      Free Software Foundation, Inc.
--      59 Temple Place - Suite 330
--      Boston, MA 02111-1307, USA.
--
-- $Log: compress_sparse.c,v $
-- Revision 1.10  2007/03/03 13:46:25  jsedwards
-- Added code to keep a count of distribution of disk blocks over the 32-bit
-- range and print them at the end.  This shows how the randomness is
-- distributed.
--
-- Revision 1.9  2007/02/11 15:15:20  jsedwards
-- Change 'sprintf' calls to 'snprintf' calls so the OpenBSD linker will stop
-- whining.
--
-- Revision 1.8  2007/02/11 14:41:26  jsedwards
-- Change all 'off64_t' and 'lseek64' references to 'off_t' and 'lseek',
-- because BSD doesn't dig the whole brain damaged 64 bit thing.
--
-- Revision 1.7  2007/01/09 13:17:02  jsedwards
-- Fix indexes when printing version string.
--
-- Revision 1.6  2007/01/09 13:07:13  jsedwards
-- Change to use Disk_Header structure.
--
-- Revision 1.5  2006/11/11 12:01:01  jsedwards
-- Update e-mail address to something that works.
--
-- Revision 1.4  2006/11/06 13:52:54  jsedwards
-- Changed to skip over public blocks for now.
--
-- Revision 1.3  2006/11/02 11:49:28  jsedwards
-- Fixed all cases where 'z' was used as a format for 'off64_t' values because
-- the older compiler complains.
--
-- Revision 1.2  2006/10/26 01:51:23  jsedwards
-- Merged alpha_05_branch back into main trunk.
--
-- Revision 1.1.2.8  2006/10/22 12:45:21  jsedwards
-- Change to use the number of blocks stored on disk instead of
-- BLOCKS_ON_DISK #define.
--
-- Revision 1.1.2.7  2006/10/22 12:40:47  jsedwards
-- Corrected version string error message.
--
-- Revision 1.1.2.6  2006/10/19 01:42:47  jsedwards
-- Fixed format specifiers for uint32, which is now an int instead of a long,
-- and off64_t.
--
-- Revision 1.1.2.5  2006/10/15 16:31:42  jsedwards
-- Changed to use block maps to find blocks to write instead of scanning
-- the entire drive.  Time reduced from 87 to 63 minutes.
--
-- Revision 1.1.2.4  2006/10/15 12:10:08  jsedwards
-- Change to read an entire chunk at a time and to skip over the block maps.
--
-- Revision 1.1.2.3  2006/09/19 14:11:57  jsedwards
-- Added printing of number of blocks.
--
-- Revision 1.1.2.2  2006/09/17 13:52:11  jsedwards
-- Fix argument count bug and open file read only instead of read-write.
--
-- Revision 1.1.2.1  2006/09/17 13:21:39  jsedwards
-- Program to compress the sparse objectify file into a non-sparse file.
--
*/


#define _LARGEFILE64_SOURCE

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>

#include "objectify_private.h"

static void print_usage(char *program)
{
    fprintf(stderr, "usage: %s output-file\n", program);
}


int main(int argc, char* argv[])
{
    int obj_file_desc;
    off_t chunk;
    uint8 block_map[BIT_MAP_BYTES];
    uint8 block[FILE_BLOCK_SIZE];
    int i;
    size_t bytes_read;
    FILE* ofp;
    int num_blocks;
    uint32 total_public_blocks;
    uint32 total_private_blocks;
    uint32 used_public_blocks;
    uint32 used_private_blocks;
    uint32 blocks_on_disk;
    char msg[128];
    Disk_Header disk_header;
    uint32 counts[1024];
    uint32 ref;

    for (i = 0; i < 1024; i++) counts[i] = 0;

    if (argc != 2)
    {
	print_usage(argv[0]);
	exit(1);
    }

    if (*argv[1] == '-')
    {
	fprintf(stderr, "error: this program doesn't have any options\n");
	print_usage(argv[0]);
	exit(1);
    }


    /* Open the storage drive and verify the header info */

    obj_file_desc = open(DEFAULT_FILE, O_RDONLY | O_LARGEFILE);

    if (obj_file_desc < 0)
    {
	perror(DEFAULT_FILE);
	exit(1);
    }

    bytes_read = read(obj_file_desc, block, sizeof(block));

    if (bytes_read != sizeof(block))
    {
	perror("reading first block");
	exit(1);
    }

    if (memcmp(&block[0], MAGIC_NUMBER, 4) != 0)
    {
	fprintf(stderr, "Missing magic number in disk header\n");
	exit(1);
    }

    if (memcmp(&block[4], VERSION_STRING, 4) != 0)
    {
	fprintf(stderr, "Incorrect version string in disk header\n");
	exit(1);
    }

    memcpy(&disk_header, block, sizeof(disk_header));

    nwos_4_uint8_to_uint32(disk_header.total_public_blocks, &total_public_blocks);
    nwos_4_uint8_to_uint32(disk_header.total_private_blocks, &total_private_blocks);

    nwos_4_uint8_to_uint32(disk_header.used_public_blocks, &used_public_blocks);
    nwos_4_uint8_to_uint32(disk_header.used_private_blocks, &used_private_blocks);

    blocks_on_disk = total_public_blocks + total_private_blocks; 

    ofp = fopen(argv[1], "w");

    if (ofp == NULL)
    {
	perror(argv[1]);
	exit(1);
    }

    printf("header: %c%c%c%c %c%c%c%c\n",
	   disk_header.magic_number[0], disk_header.magic_number[1], disk_header.magic_number[2], disk_header.magic_number[3], 
	   disk_header.version_string[0], disk_header.version_string[1], disk_header.version_string[2], disk_header.version_string[3]);

    printf("total blocks on disk: %08u  public blocks: %08u\n",
	   blocks_on_disk, total_public_blocks);
    fflush(stdout);

    fwrite(block, 1, FILE_BLOCK_SIZE, ofp);   /* write the first 256 bytes always */

    num_blocks = 0;

    // for now we can skip over public blocks because they should always stay the same
    for (chunk = total_public_blocks; chunk < blocks_on_disk; chunk += BLOCKS_IN_CHUNK)
    {
	if (lseek(obj_file_desc, (off_t)chunk << 8, SEEK_SET) < 0)
	{
	    snprintf(msg, sizeof(msg), "lseek chunk:%08x", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	bytes_read = read(obj_file_desc, block_map, sizeof(block_map));

	if (bytes_read != sizeof(block_map))
	{
	    snprintf(msg, sizeof(msg), "reading block map: %u", (uint32)chunk);
	    perror(msg);
	    exit(1);
	}

	/* scan block map (skip over the blocks for the block map itself) */
	for (i = BIT_MAP_BLOCKS; i < BLOCKS_IN_CHUNK; i++)
	{
	    if ((block_map[i/8] & (0x80 >> (i%8))) != 0)
	    {
		if (lseek(obj_file_desc, (chunk + i) << 8, SEEK_SET) < 0)
		{
		    snprintf(msg, sizeof(msg), "lseek block:%08x", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}

		bytes_read = read(obj_file_desc, block, sizeof(block));

		if (bytes_read != sizeof(block))
		{
		    snprintf(msg, sizeof(msg), "reading block: %u", (uint32)(chunk + i));
		    perror(msg);
		    exit(1);
		}
		
		ref = (uint32)block[4] << 24 | (uint32)block[5] << 16 | (uint32)block[6] << 8 | (uint32)block[7];

		printf("id: %08x\n", ref);
		fflush(stdout);

		ref = (ref - RESERVED_PUBLIC_BLOCKS) / total_private_blocks;
		if (ref > 1023) ref = 1023;
		counts[ref]++;

		if (fwrite(block, 1, sizeof(block), ofp) != sizeof(block))
		{
		    perror(argv[1]);
		    close(obj_file_desc);
		    exit(1);
		}

		num_blocks++;
	    }
	}
    }

    printf("Number of blocks: %d\n", num_blocks);

    for (i = 0; i < 1024; i++)
    {
	if (counts[i] > 0) printf("  %d: %u\n", i, counts[i]);
    }

    if (fclose(ofp) != 0)
    {
	perror(argv[1]);
	exit(1);
    }

    close(obj_file_desc);

    return 0;
}

