
#include "filereader.h"

#include <unistd.h>
#include "fileio.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <errno.h>

#include <limits>

#include "md5.h"
#include "sha1.h"


#define FLAG_PLAIN 1
#define FLAG_GZ 2
#define FLAG_BZ2 4
#define FLAG_EOF 8
#define FLAG_ERROR 16
#define FLAG_COMPEOF 32

//#include <fstream>
#include <iostream>

#define BUFSIZE 20000

using namespace MYSTD;

filereader::filereader() 
:
	flags(0),
	m_szFileBuf((char*)MAP_FAILED), 
	m_nCurLine(0),
	m_fd(-1)
{
};
	

string filereader::GetPureFilename()
{
	string x=m_sOrigName;
	tStrPos p=x.rfind('/'); 
	if(stmiss!=p)
		x.erase(0, p+1);
	if(flags&FLAG_GZ)
		x.erase(x.length()-3);
	else if(flags&FLAG_BZ2)
		x.erase(x.length()-4);
	return x;
}

bool filereader::OpenFile(const string & sFilename, bool bNoMagic)
{
	Close(); // reset to clean state
	
	m_sOrigName=sFilename;
	m_fd=open(sFilename.c_str(), O_RDONLY);
	
	if(m_fd<0)
		goto ofail;

	if(bNoMagic)
		flags|=FLAG_PLAIN;
	else if(endsWithSzAr(sFilename, ".bz2"))
	{
#ifdef HAVE_LIBBZ2
		m_bzStream.bzalloc = NULL;
		m_bzStream.bzfree = NULL;
		m_bzStream.opaque = NULL;
		if(BZ_OK!=BZ2_bzDecompressInit ( & m_bzStream, 1, EXTREME_MEMORY_SAVING))
			goto ofail;
		
		// ok, now can mark for processing and cleanup
		flags |= FLAG_BZ2;
		m_UncompBuf.init(BUFSIZE);
#else
		goto ofail;
#endif
	}
	else if(endsWithSzAr(sFilename, ".gz"))
	{
#ifdef HAVE_ZLIB
		memset(&m_zStream, 0, sizeof(m_zStream));
				
		if(Z_OK != inflateInit2(&m_zStream, 47))
	    	goto ofail;
		
	    flags |= FLAG_GZ;
	    m_UncompBuf.init(BUFSIZE);
#else
	    goto ofail;
#endif
	}
	else
		flags |= FLAG_PLAIN;
		
	
	struct stat statbuf;
	if(0!=fstat(m_fd, &statbuf))
		goto ofail;

	// LFS on 32bit? That's not good for mmap. Don't risk incorrect behaviour.
	if(uint64_t(statbuf.st_size) >  MYSTD::numeric_limits<size_t>::max())
    {
        errno=EFBIG;
		goto ofail; 
    }
	
	m_nBufSize = statbuf.st_size;
	m_szFileBuf = (char*) mmap(0, m_nBufSize, PROT_READ, MAP_SHARED, m_fd, 0);
	
	if(m_szFileBuf==MAP_FAILED)
		goto ofail;
	
#ifdef HAVE_MADVISE
	// if possible, prepare to read that
	posix_madvise(m_szFileBuf, statbuf.st_size, POSIX_MADV_SEQUENTIAL);
#endif
	
	m_nBufPos=0;
	m_nCurLine=0;
	return true;
	
	ofail:
	
	flags |= (FLAG_ERROR|FLAG_EOF);
	return false;
}

bool filereader::CheckGoodState(bool bErrorsConsiderFatal)
{	
	if (flags&FLAG_ERROR)
	{
		if(bErrorsConsiderFatal)
		{
			cerr << "Error opening file: "<< m_sOrigName << ", terminating."<<endl;
			exit(EXIT_FAILURE);
		}
		return false;
	}
	return true;
}

string filereader::GetPositionDescription() {
	char buf[22];
	sprintf(buf, ":%u", m_nCurLine);
	return m_sOrigName+buf;
}

void filereader::Close()
{
	m_nCurLine=0;
	
	if(m_szFileBuf!=MAP_FAILED)
	{
		munmap(m_szFileBuf, m_nBufSize);
		m_szFileBuf=(char*)MAP_FAILED;
	}
			
	if(m_fd>=0)
	{
		fsync(m_fd);
		forceclose(m_fd);
	}
	
#ifdef HAVE_LIBBZ2
	if(flags&FLAG_BZ2)
		BZ2_bzDecompressEnd (&m_bzStream);
#endif

#ifdef HAVE_ZLIB
	if(flags&FLAG_GZ)
		deflateEnd(&m_zStream);
#endif
	flags=0;
}

filereader::~filereader() {
	Close();
}

// TODO: can split up a line when it doesn't fit into one buffer
bool filereader::GetOneLine(string & sOut) {
	
	sOut.clear();
	
	// stop flags set in previous run
	if(flags & (FLAG_EOF|FLAG_ERROR))
		return false;
	
	//cout<< "wtf, m_pos: " << m_pos <<endl;
	
	char *rbuf;
	size_t nRest;
	bool bCanRetry=true;

	maybe_got_more:
	
	if(flags&FLAG_PLAIN)
	{
		bCanRetry=false;
		
		if(m_nBufPos>=m_nBufSize)
			flags|=FLAG_EOF;
		// detect eof and remember that, for now or later calls
		nRest = (flags&FLAG_EOF) ? 0 : m_nBufSize-m_nBufPos;
		rbuf=m_szFileBuf+m_nBufPos;
	}
	else 
	{
		nRest=m_UncompBuf.size();
		
		if(nRest==0 && (flags&(FLAG_COMPEOF|FLAG_ERROR|FLAG_EOF)))
			return false;
		
		rbuf=m_UncompBuf.rptr();
	}
	
	// look for end in the rest of buffer (may even be nullsized then it fails implicitely, newline decides), 
	// on miss -> try to get more, check whether the available size changed, 
	// on success -> retry
	
	char *newline=mempbrk(rbuf, "\r\n", nRest);
	
	tStrPos nLineLen, nDropLen;
	
	if(newline)
	{
		nLineLen=newline-rbuf;
		nDropLen=nLineLen+1;
		// cut optional \r or \n but only when it's from another kind
		if(nRest > nDropLen &&  newline[0]+newline[1]== '\r'+'\n')
			nDropLen++;
	}
	else
	{
		if(bCanRetry)
		{
			bCanRetry=false;
			_UncompressMoreData();
			goto maybe_got_more;
		}
		
		// otherwise can continue to the finish 
		nDropLen=nLineLen=nRest;
	}
	
	sOut.assign(rbuf, nLineLen);
	
	if(flags&FLAG_PLAIN)
		m_nBufPos+=nDropLen;
	else
		m_UncompBuf.drop(nDropLen);
	
	m_nCurLine++;
	return true;
}

//! @return: new text buffer size
inline void filereader::_UncompressMoreData() {

	// work with uncompressed buffer/window...
	m_UncompBuf.move(); // get unused space if possible
		
	if(	flags&FLAG_COMPEOF // cannot uncompress more
		|| m_UncompBuf.freecapa()==0 )
		return;
	
	if(m_nBufPos>=m_nBufSize )
	{
		// shouldn't be here. Decompressor's errors or eof must have been handled before. Undiscovered somehow?
		flags|=(FLAG_ERROR|FLAG_EOF);
		return;
	}
	
	unsigned int nFeedLen=m_nBufSize-m_nBufPos;
	
#ifdef HAVE_LIBBZ2
	if(flags&FLAG_BZ2)
	{
		m_bzStream.next_in=m_szFileBuf+m_nBufPos;
		m_bzStream.avail_in=nFeedLen;
		m_bzStream.next_out=m_UncompBuf.wptr();
		m_bzStream.avail_out=m_UncompBuf.freecapa();

		int ret=BZ2_bzDecompress(&m_bzStream);
		if(ret==BZ_STREAM_END)
		{
			// remember this later
			flags|=FLAG_COMPEOF;
			ret=BZ_OK;
		}
		if(ret==BZ_OK)
		{
			m_nBufPos += (nFeedLen-m_bzStream.avail_in);
			unsigned int nGotBytes= m_UncompBuf.freecapa() - m_bzStream.avail_out;
			m_UncompBuf.got(nGotBytes);
		}
		// or corrupted data?
		else flags|=(FLAG_COMPEOF|FLAG_ERROR);
	}
#endif
#ifdef HAVE_ZLIB
	if(flags&FLAG_GZ)
	{
		m_zStream.next_in=(Bytef*) m_szFileBuf+m_nBufPos;
		m_zStream.avail_in=nFeedLen;
		m_zStream.next_out=(Bytef*) m_UncompBuf.wptr();
		m_zStream.avail_out=m_UncompBuf.freecapa();

		int ret=inflate(&m_zStream, Z_NO_FLUSH);
		if(ret==Z_STREAM_END)
		{
			flags|=FLAG_COMPEOF;
			ret=Z_OK;
		}
		if(ret==Z_OK)
		{ //ok, accept the data
			m_nBufPos += (nFeedLen-m_zStream.avail_in);
			unsigned int nGotBytes= m_UncompBuf.freecapa() - m_zStream.avail_out;
			m_UncompBuf.got(nGotBytes);
		}
		// or corrupted data?
		else flags|=(FLAG_COMPEOF|FLAG_ERROR);
	}
#endif
}

/*
// TODO: make this use a string?
bool filereader::GetMd5String(const MYSTD::string & sFileName, char out[])
{
	uint8_t buf[16];
	if(!GetMd5Sum(sFileName, buf))
		return false;
	
	for(UINT i=0;i<16;i++)
		sprintf(&out[2*i], "%02x", buf[i]);
	
	return true;
}
*/

bool filereader::GetMd5Sum(const MYSTD::string & sFileName, uint8_t out[], bool bTryUnpack, off_t &scannedSize)
{
	md5_state_s ctx;
	md5_init(&ctx);
	filereader f;
	scannedSize=0;
	if (!f.OpenFile(sFileName, !bTryUnpack))
		return false;
	if (f.flags&FLAG_PLAIN)
	{
		md5_append(&ctx, (md5_byte_t*) f.m_szFileBuf, f.m_nBufSize);
		scannedSize=f.m_nBufSize;
	}
	else
	{
		while(true)
		{
			f._UncompressMoreData();
			if(f.flags&FLAG_ERROR)
				return false;
			UINT nRest=f.m_UncompBuf.size();
			if(nRest==0)
				break;
			md5_append(&ctx, (md5_byte_t*) f.m_UncompBuf.rptr(), nRest);
			scannedSize+=nRest;
			f.m_UncompBuf.clear();
		}
		
	}
	
	md5_finish(&ctx, out);
	return f.CheckGoodState(false);
}

bool filereader::GetSha1Sum(const MYSTD::string & sFileName, uint8_t out[], 
		bool bTryUnpack, off_t &scannedSize, const char *szUnpStoreFile)
{
	filereader f;
	return (f.OpenFile(sFileName, !bTryUnpack)
			&& f.GetSha1Sum(out, scannedSize, szUnpStoreFile));
}


bool filereader::GetSha1Sum(uint8_t out[], 
		off_t &scannedSize, const char *szUnpStoreFile)
{
	SHA_INFO ctx;
	sha_init(&ctx);
	scannedSize=0;
	
	FILE *fDump = NULL;
	if(szUnpStoreFile)
	{
		fDump = fopen(szUnpStoreFile, "w");
		if(!fDump)
			return false;
	}
	if (flags&FLAG_PLAIN)
	{
		sha_update(&ctx, (SHA_BYTE*) m_szFileBuf, m_nBufSize);
		if(fDump)
			fwrite(m_szFileBuf, sizeof(char), m_nBufSize, fDump);
		scannedSize=m_nBufSize;
	}
	else
	{
		while (true)
		{
			_UncompressMoreData();
			if (flags&FLAG_ERROR)
				return false;
			UINT nRest=m_UncompBuf.size();
			if (nRest==0)
				break;
			sha_update(&ctx, (md5_byte_t*) m_UncompBuf.rptr(), nRest);
			if(fDump)
				fwrite(m_UncompBuf.rptr(), sizeof(char), nRest, fDump);
			scannedSize+=nRest;
			m_UncompBuf.clear();
		}

	}
	sha_final(out, &ctx);

	if(fDump)
	{
		
		// reliable closing
		
		int fd=fileno(fDump);
		
		for(int i=0; 0 != ::fflush(fDump) && i<10; i++) ;
		
		if(0!=::fclose(fDump))
		{
			if(errno == EBADF)
				return false;
			
			while(0 != close(fd))
			{
				if(errno != EINTR)
					return false;
			}
		}
	}
	
	return CheckGoodState(false);
}

void check_algos()
{
	const char testvec[]="abc";
	uint8_t out[20];
	 
	{
		SHA_INFO ctx;
		sha_init(&ctx);
		sha_update(&ctx, (SHA_BYTE*) testvec, sizeof(testvec)-1);
		sha_final(out, &ctx);
		if (CsBinToString(out, 20)!="a9993e364706816aba3e25717850c26c9cd0d89d")
		{
			cerr
					<< "Incorrect SHA1 implementation detected, check compilation settings!\n";
			exit(EXIT_FAILURE);
		}
	}
	{
		md5_state_s ctx;
		md5_init(&ctx);
		md5_append(&ctx, (md5_byte_t*) testvec, sizeof(testvec)-1);
		md5_finish(&ctx, out);
		if (CsBinToString(out, 16)!="900150983cd24fb0d6963f7d28e17f72")
		{
			cerr
					<< "Incorrect MD5 implementation detected, check compilation settings!\n";
			exit(EXIT_FAILURE);
		}
	}
}


/*
foreach $b (0..255) {
   print "\n" if($b%16==0);
   if( $b>=48 && $b<58 ) { $b-=48;}
   elsif($b>=97 && $b<103) { $b-=87;}
   elsif($b>=65 && $b<71) { $b-=55;}
   else {$b= --$dummy}
   print "$b,";
}
print "\n";
*/

namespace Meta
{

#define _inv (uint_fast16_t)-1
uint_fast16_t hexmap[] = {
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                0,1,2,3,4,5,6,7,8,9,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,10,11,12,13,14,15,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,10,11,12,13,14,15,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,
                _inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv,_inv
                };

}

bool CsAsciiToBin(const char *a, uint8_t b[], unsigned short binLength)
{
	using namespace Meta;
	const unsigned char *uA = (const unsigned char*) a;
	for(int i=0; i<binLength;i++)
	{
		uint_fast16_t r=hexmap[uA[i*2]] * 16 + hexmap[uA[i*2+1]];
		if(r>255) return false;
		b[i]=r;
	}
	return true;
}

string CsBinToString(const uint8_t sum[], unsigned short lengthBin)
{
	string sRet;
	char hexmap[] =
	{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
			'e', 'f' };
	for (int i=0; i<lengthBin; i++)
	{
		sRet+=(char) hexmap[sum[i]>>4];
		sRet+=(char) hexmap[sum[i]&0xf];
	}

	return sRet;
}

/*
bool filereader::CheckMd5Sum(const string & sFileName, const string & sReference)
{
	uint8_t sum[16];

	if(sFileName.length()!=16)
		return false;
	
	if(!GetMd5Sum(sFileName, sum))
		return false;
	
	return CsEqual( (unsigned char*) sReference.data(), sum, 16);
}

*/
