
//#define LOCAL_DEBUG
#include "debug.h"

#include "expiration.h"
#include "lockable.h"
#include "acfg.h"
#include "meta.h"
#include "filereader.h"
#include "fileitem.h"
#include "dlcon.h"
#include "dirwalk.h"
#include "header.h"
#include "job.h"
#include "dljob.h"

#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <glob.h>

#include <map>
#include <string>
#include <iostream>

using namespace MYSTD;

#define ENABLED
#ifndef ENABLED
#warning defused
#endif

expiration::expiration(int fd) :
	bgtask(fd), m_nTimeNow(0), m_nDropPrefLen(0),
	m_bErrAbort(false), m_bVerbose(false), m_bForceDownload(false), 
	m_bScanInternals(false), m_nErrorCount(0),
	m_nProgIdx(0), m_nProgTold(1)
{
	m_szDecoFile="maint.html";

	m_nDropPrefLen=acfg::cachedir.size()+1;
	m_nTimeNow=time(NULL);

}

expiration::~expiration()
{

}
bool expiration::ProcessOthers(const string & sPath, const struct stat &)
{
	// NOOP
	return true;
}

bool expiration::ProcessRegular(const string & sPath, const struct stat &stinfo)
{

	if(CheckAbortCondition())
		return false;

	if (m_nProgIdx++>m_nProgTold)
	{
		m_nProgTold*=2;
		char buf[50];
		int n=snprintf(buf, sizeof(buf), "Scanning, found %u files...<BR>\n",
				m_nProgTold+1);
		maintenance::SendMsg(buf, n);
	}
	 
	if(0==sPath.compare(m_nDropPrefLen, 1, "_") && !m_bScanInternals)
		return true; // not for us

	tStrPos nSlashPos=sPath.rfind(sPathSep[0]);
	string sBasename(sPath, nSlashPos+1);
	
	// handle the head files separately
    if (endsWithSzAr(sPath, ".head"))
    {
        m_trashCandHeadSet.insert(sPath.substr(m_nDropPrefLen));
        return true;
    }

	if (rechecks::FILE_INDEX == m_rex.getFiletype(sPath.c_str()))
    {
        //cerr << "ifile: " <<sPath <<endl;
		m_volatileFiles.insert(sPath);
    }
    string sDirnameAndSlash(sPath, m_nDropPrefLen, nSlashPos-m_nDropPrefLen+1);
    pair<string,tDiskFileInfo> fileDesc(sBasename,
        		tDiskFileInfo(sDirnameAndSlash, m_nTimeNow));
    fileDesc.second.fpr.size = stinfo.st_size;
    
    m_trashCandSet.insert(fileDesc);
    return true;
}

void expiration::_LoadTrashMapFromFile(bool bForceInsert)
{

	filereader reader;
	reader.OpenFile(acfg::cachedir+sPathSep+"_expending_dat");
	
	string sLine;

#if 0
	// stuff for user info
	time_t now=time(NULL);
	time_t oldest(now), newest(0);
#endif
	
	while(reader.GetOneLine(sLine))
	{
		tStrVec elems;
		if(Tokenize(sLine, "\t", elems)==3)
		{
			time_t timestamp=atoll(elems[0].c_str());
			if(timestamp>m_nTimeNow)
				continue; // where is the DeLorean?
			
			if(elems[1].empty() || elems[2].empty())
				continue;
			
			if(elems[1].at(elems[1].size()-1)!=SZPATHSEP[0])
				elems[1].append(SZPATHSEP);
			
			if(bForceInsert)
			{
				m_trashCandSet.insert(
						pair<string, tDiskFileInfo>(elems[2], 
								tDiskFileInfo(elems[1], 0)));
				continue;
			}
			
			static pair<tS2DAT::iterator, tS2DAT::iterator> fromTo;
			fromTo=m_trashCandSet.equal_range(elems[2]);
			// file already gone?
			if(fromTo.first==fromTo.second)
				continue; // file already gone?
			
			// TODO: make global iterators for the other function too?
			static tS2DAT::iterator itmp, it;
			for(it=fromTo.first; it!=fromTo.second; it++)
				if(it->second.sDirname == elems[1]) // hit
					it->second.nLostAt=timestamp;										

#if 0
			if(timestamp < oldest)
				oldest=timestamp;
			if(timestamp > newest)
				newest=timestamp;
#endif
		}
	}
	
#if 0
	/*
	cout << "Unreferenced: ";
	for(trashmap_t::iterator it=m_trashCandidates.begin(); it!=m_trashCandidates.end(); it++)
		fprintf(stdout, "%lu\t%s\t%s\n",  it->second.first, it->second.second.c_str(), it->first.c_str());
	*/
	
	if(m_trashCandidates.size()>0)
	{
		// pretty printing for impatient users
		char buf[200];
		
		// map to to wait
		int nMax=acfg::extreshhold-((now-oldest)/86400);
		int nMin=acfg::extreshhold-((now-newest)/86400);
				
		snprintf(buf, _countof(buf), "Previously detected: %lu rotten package file(s), "
				"to be deleted in about %d-%d day(s)<br>\n", 
				(unsigned long) m_trashCandidates.size(),
				nMin, nMax==nMin?nMax+1:nMax); // cheat a bit for the sake of code simplicity
		SendMsg(buf);
	}
#endif

}



bool expiration::_DownloadOne(dlcon &dl, const MYSTD::string & sFilePath, string &sErr)
{
	tFileItemPtr fi;
	acfg::tHostiVec *pBackends(NULL);

	string sKeyPath=sFilePath.substr(acfg::cachedir.size()+1, stmiss);

	// keypath must represent a previosly resolved URL, being including
	// either repname or real host which works
	tHttpUrl url;
#ifdef WIN32
#error rewrite for the backslashes or change reliably in the walker to slashes
#endif
	if (!url.SetHttpUrl(sKeyPath))
	{
		sErr=sKeyPath+" does not contain a valid repository or host name.";
		goto rep_update_prob;
	}

	fi=fileitem::GetFileItem(sKeyPath);
	if (fi)
	{
		fi->Setup(true, m_bForceDownload);
	}
	else
	{
		sErr=" could not create file item handler.";
		goto rep_update_prob;
	}
	
	SendMsg(string("Checking/Updating ")+sKeyPath+"...\n");
	pBackends = acfg::GetBackendVec(&url.sHost);
	if (pBackends)
		url.sPath.erase(0, 1); // get only the path string residual
	else
	{ 
		// try something different, do we have the original URL? It's not reliable, though
		lockguard g(fi.get());
		const header *pHead=fi->GetHeaderUnlocked();
		// must be well formated to be valid
		if (pHead
		&& pHead->h[header::XORIG]
		&& 0==strncmp(pHead->h[header::XORIG], "http://", 7)
		&& url.SetHttpUrl(pHead->h[header::XORIG]))
		{
			pBackends = acfg::GetBackendVec(&url.sHost);
		}
	}
	if (pBackends)
		dl.AddJob(fi, pBackends, url.sPath);
	else
		// use as hostname, and pray
		dl.AddJob(fi, url);

	dl.WorkLoop();
	{
		
		if (fi->GetStatus() == FIST_ERROR)
			goto rep_update_prob;
	}

	SendMsg(string("<i>(")+ltos(fi->GetTransferCount()/1024)+("KiB)</i><br>\n"));
	
	if (fi)
	{
		fileitem::DelUser(sKeyPath);
		fi.reset();
	}
	
	return true;

	rep_update_prob:

	if (fi)
	{
		{
			lockguard g(*fi);
			const header *pErHead=fi->GetHeaderUnlocked();
			if (pErHead)
				sErr+=string("Reason: ")+pErHead->frontLine+"<br>\n";
		}
		fileitem::DelUser(sKeyPath);
		fi.reset();
	}
	SendMsg("<br>\n");
	
	if(_CanBeIgnored(sKeyPath))
	{
		SendMsg("<font color=\"orange\">Ignoring download failure as advised.</font><br>\n");
		return true;
	}
	return false;
}

class tDlJobHints
{	
public:
	const MYSTD::string *psTargetBasePath, *psSrcTmpPath;
	const tStrSet *pGoodChecklist;
};

class tDlJobEx : public tDlJob 
{
public:
	tDlJobHints *m_pHints;
	tDlJobEx(dlcon *m, tFileItemPtr pFi, const tHttpUrl &url, tDlJobHints *pHints)
	: tDlJob(m, pFi, url.sHost, url.sPath), m_pHints(pHints)
	{
		m_sRequestCmd="HEAD ";
	}
	tDlJobEx(dlcon *m, tFileItemPtr pFi, acfg::tHostiVec * pBackends,
			const MYSTD::string & sPath, tDlJobHints *pHints)
	: tDlJob(m, pFi, pBackends, sPath), m_pHints(pHints)
	{
		m_sRequestCmd="HEAD ";
	}
protected:
	virtual tDlResult NewDataHandler(acbuf & inBuf, string &sErrorRet)
	{
		string sBzFn, sBzTarget;
		
		if (!m_pHints || ! m_pHints ->pGoodChecklist || !m_pHints->psSrcTmpPath || !m_pHints->psTargetBasePath)
			goto repErrLocal;
		sBzFn = *(m_pHints->psSrcTmpPath) + ".bz2";
		sBzTarget = *(m_pHints->psTargetBasePath) + ".bz2";
		
		{
			const header *ph = m_pStorage->GetHeaderUnlocked();
			
			if (!ph->h[header::CONTENT_LENGTH])
				goto repErrLocal;

			// .bz2 larger than 30mb? unlikely
			if (atoll(ph->h[header::CONTENT_LENGTH]) > 30000000)
				goto repErrLocal;
			
			// some mirrors don't use -9, and some might use the mt version
			const char *szCmds[] = { 
					"bzip2 -k -9 '",
					"bzip2 -k '",
					"pbzip2 -k -9 '",
					"pbzip2 -k '"
			};
			for (UINT i=0; i<_countof(szCmds); i++)
			{
				::unlink(sBzFn.c_str());
				
				string cmd(szCmds[i]);
				cmd+=*m_pHints->psSrcTmpPath+"'";
				if ( 0!=::system(cmd.c_str()) )
					continue;
				
				uint8_t cs[20];
				off_t nSize(0);
				filereader reader;

				if(reader.OpenFile(sBzFn, true) && reader.GetSha1Sum(cs, nSize))
				{
					// XXX lltos? For LFS?
					if(m_pHints->pGoodChecklist->find(ltos(nSize)
							+"_"+CsBinToString(cs, 20))
					!= m_pHints->pGoodChecklist->end())
					{
						// hit! Looks okayish, try to pass data through

						if (!m_pStorage->StoreFileData(reader.GetBuffer(),
								reader.GetSize()))
						{
							goto repErrLocal;
						}

						m_DlState=STATE_FINISHJOB;
						return R_NEXTSTATE;
					}
				}
			}
		}
		
		repErrLocal:
		sErrorRet = "567 Failed to reconstruct bzip'ed file";
		if(!sBzFn.empty()) ::unlink(sBzFn.c_str());
		
		return R_ERROR_LOCAL;
	}
};


void expiration::_InjectBz2ed(const MYSTD::string & sTargetBasePath, 
		const MYSTD::string & sSrcTmpPath, const tStrSet & goodChecklist)
{
	if(!acfg::recompbz2)
		return;
	
	class tDlJobHints hints;
	hints.pGoodChecklist=&goodChecklist;
	hints.psSrcTmpPath=&sSrcTmpPath;
	hints.psTargetBasePath=&sTargetBasePath;
	string junk;
	
	dlcon freshDler(true); // get a new connection, work around timeouts
	SendMsg("Attempting to inject bz2ed version...");
	if(_UpdateOne(freshDler, sTargetBasePath+".bz2", junk, &hints))
		SendMsg("succeeded<br>\n");
	else
		SendMsg("skipped<br>\n");
}


bool expiration::_UpdateOne(dlcon &dl, const MYSTD::string & sFilePath, string &sErr, tDlJobHints *pSpecialHints)
{
	if(m_uptodateTags.find(sFilePath) != m_uptodateTags.end())
		return true; // this is recent enough
	
	tFileItemPtr fi;
	tHttpUrl url;
	acfg::tHostiVec *pBackends(NULL);

	string sKeyPath=sFilePath.substr(acfg::cachedir.size()+1, stmiss);
	
	if(!pSpecialHints) // running wrapped, STFU
		SendMsg(string("Checking/Updating ")+sKeyPath+"...\n");
	
	fi=fileitem::GetFileItem(sKeyPath);
	if (fi)
	{
		fi->Setup(true, m_bForceDownload);
	}
	else
	{
		sErr=" could not create file item handler.";
		goto rep_update_prob;
	}
	

#ifdef WIN32
#error rewrite for the backslashes or change reliably in the walker to slashes
#endif

	/* 
	 * Three ways to find the source to download from:
	 * 1. Interpret the base directory as repository name
	 * 2. Interpret the whole subpath as URL (host/path)
	 * 3. Use X-Original-Source (which might be broken)
	 */
	
	// abusing url class to extract base directory

	if (!url.SetHttpUrl(sKeyPath))
	{
		// should never happen, though
		sErr=sKeyPath+" does not seem to contain valid repository or host name.";
		goto rep_update_prob;
	}
	
	pBackends = acfg::GetBackendVec(&url.sHost);
	
	if (pBackends)
	{
		// HIT, control by backend scheme, strip leading slash from path
		
		if(pSpecialHints)
			dl.EnqJob(new tDlJobEx(&dl, fi, pBackends, url.sPath.substr(1), pSpecialHints));
		else
			dl.AddJob(fi, pBackends, url.sPath.substr(1));
	}
	else
	{
		tHttpUrl urlOrig;
		string sOrig;
		{
			lockguard g(fi.get());
			const header *pHead=fi->GetHeaderUnlocked();
			if (pHead && pHead->h[header::XORIG])
				sOrig=pHead->h[header::XORIG];
		}
		if(startsWithSz(sOrig, "http://") && urlOrig.SetHttpUrl(sOrig))
		{
			// ok, looks valid, is it better than the one from the path?
			if(url != urlOrig)
			{
				if(urlOrig.sHost.find(".") != stmiss)
				{
					if(url.sHost.find(".") != stmiss)
					{
						// Both have dots, prefer directory as host
						//goto dl_from_url;
					}
					else
					{
						// dir has no dots, orig-url host has -> use orig url
						url=urlOrig;
						//goto dl_from_url;
					}
				}
				else // no dots in urlOrig host, most likely broken
				{
					/*
					 * if the directory has dots, use it and be quiet (unless verbosity is enabled).
					 * Otherwise, warn the user.
					 * */
					if (m_bVerbose || url.sHost.find(".") != stmiss)
					{
						SendMsg(string("<font color=\"orange\">Code 520824! "
							"Read the manual about known bugs! Attempting to use ")
								+ url.sHost + " as hostname</font>");
					}
				}
			}
		}
		if(pSpecialHints)
			dl.EnqJob(new tDlJobEx(&dl, fi, url, pSpecialHints));
		else
			dl.AddJob(fi, url);
	}	

	dl.WorkLoop();
	{
		
		if (fi->GetStatus() == FIST_ERROR)
			goto rep_update_prob;
	}

	//if(!pSpecialHints)
		SendMsg(string("<i>(")+ltos(fi->GetTransferCount()/1024)+("KiB)</i><br>\n"));
	
	if (fi)
	{
		fileitem::DelUser(sKeyPath);
		fi.reset();
	}
	
	m_uptodateTags.insert(sFilePath);
	return true;

	rep_update_prob:

	if (fi)
	{
		{
			lockguard g(*fi);
			const header *pErHead=fi->GetHeaderUnlocked();
			if (pErHead)
				sErr+=string("Reason: ")+pErHead->frontLine+"<br>\n";
		}
		fileitem::DelUser(sKeyPath);
		fi.reset();
	}
	SendMsg("<br>\n");
	
	if(_CanBeIgnored(sKeyPath))
	{
		SendMsg("<font color=\"orange\">Ignoring download failure as advised.</font><br>\n");
		return true;
	}
	return false;
}

void expiration::_LoadIgnoreList()
{
	filereader reader;
	string sTmp;
	if(reader.OpenFile(acfg::confdir+sPathSep+"ignore_list"))
	{
		while(reader.GetOneLine(sTmp))
		{
			trimLine(sTmp);
			if(!sTmp.empty() && sTmp[0]!='#')
				m_ignList.insert(sTmp);
		}
	}
}

bool expiration::_CanBeIgnored(const MYSTD::string & sCand)
{
	return ( m_ignList.find(sCand) != m_ignList.end()); 
}

inline bool file_exists(const char *name, off_t size)
{
	struct stat stbuf;
	if(0!=stat(name, &stbuf))
		return false;
	return (stbuf.st_size == size);
}

void expiration::_UpdateWithDiffs(dlcon &dl, tStrSet & listGuessedLocationsToTry)
{
	
	string sErr;
	filereader reader;
	
	// quick-and-dirty, extracting a bunch of csums/sizes for simple later validity checks later
	tStrSet validCsums; // <size>_<sum>
	
	for (tStrSet::const_iterator it=m_volatileFiles.begin(); it
			!=m_volatileFiles.end(); it++)
	{
		if ( endsWithSzAr(*it, "Release")
		&& _UpdateOne(dl, *it, sErr)
		&& reader.OpenFile(*it))
		{
			tStrVec tmp;
			string sLine;
			while (reader.GetOneLine(sLine))
			{
				
// XXX vielleicht auch automatisch .diff/Index aufnehmen und bei Bedarf ins m_volfiles stecken?
				
				if (3==Tokenize(sLine, SPACECHARS, tmp) && tmp[0].size()>=32)
				{
					validCsums.insert(tmp[1]+"_"+tmp[0]);
				}
			}
		}
	}
	
	for(tStrSet::const_iterator it=m_volatileFiles.begin(); it!=m_volatileFiles.end(); it++)
	{

		if( !endsWithSzAr(*it, ".diff/Index") )
			continue;
		string sBaseFn=it->substr(0, it->length()-11);
		
		// just to be sure...
		if(sBaseFn.find('\'') != stmiss)
			continue;
		
		// as last resort later, try to download blindly from there later (comp.versions)
		listGuessedLocationsToTry.insert(sBaseFn);
		
		if(!_UpdateOne(dl, *it, sErr)
			|| !reader.OpenFile(*it) )
		{
			continue;
		}
		//SendMsg(*it + " offen<br>");
		tStrVec stateList, patchSums;
		string sLine;
		string sCurrentCs;
		off_t nCurrentSize(-1);
		bool bErrNext=false;
		enum { eCurLine, eHistory, ePatches} eSection;
		eSection=eCurLine;
		
		while(reader.GetOneLine(sLine))
		{
			//SendMsg(sLine + "<br>");
			if(startsWithSz(sLine, "SHA1-Current: "))
			{
				//SendMsg(sLine + " ist current");
				nCurrentSize=atoll(sLine.c_str()+54);
				// detect obvious errors
				if(sLine[54] != ' ' || ! nCurrentSize)
				{
					//SendMsg(string(" error? <br>")+sLine[54]);
					bErrNext=true;
					break;
				}
				sCurrentCs=sLine.substr(14, 40);
				//SendMsg(sCurCs + " is curcs");
			}
			else if(startsWithSz(sLine, "SHA1-History:"))
				eSection=eHistory;
			else if(startsWithSz(sLine, "SHA1-Patches:"))
				eSection=ePatches;
			else if(eHistory == eSection || ePatches == eSection)
			{
				tStrVec & outList = (eHistory==eSection) ? stateList : patchSums;
				
				UINT slcount=outList.size();
				
				if(0==Tokenize(sLine, SPACECHARS, outList, true))
					continue;
				
				if(outList.size() != slcount+3)
				{
					bErrNext=true;
					break;
				}
			}
		}
		reader.Close();
		if(bErrNext || stateList.empty()) // XXX: report a recovery error?
			continue;
		// okay, got a list of states
		
		const char *suxe[] = { "", ".bz2", ".gz"};
		for(UINT si=0; si<_countof(suxe); si++)
		{
			uint8_t srcHexCs[20];
			off_t srcSize(0), tmpSize(0);
			uint8_t tmpHexCs[20];
			string sCmd=" ( cat ";
			
			// the names are generated locally with good chars only, sBaseFn part checked before
			string sTmpDir=sBaseFn+".diff/_actmp";
			string sTmpFile=sTmpDir+"/tmp";
			string sMergedFn=sTmpFile+".merged";
			mkbasedir(sTmpFile);

			tStrVec cleanSet;
			cleanSet.push_back(sTmpFile);
			string sGuessSrc = sBaseFn + suxe[si];
			UINT hit=0;
			if( ! filereader::GetSha1Sum(sGuessSrc, srcHexCs, suxe[si][0], // true when having suffix
					srcSize, sTmpFile.c_str()))
			{
				//SendMsg(sGuessSrc+": file missing or unpack/checksum error<br>");
				hit=UINT_MAX; // to be never found
			}
			
			// check current contents, does it hit the Release entry as-is?
			if(srcSize == nCurrentSize 
					&& Meta::CsEqual( (CUCHAR*) sCurrentCs.c_str(), srcHexCs, 20))
			{
				if(m_bVerbose)
					SendMsg(sGuessSrc+": content match, using this version<br>\n"); // Ignore all other files!
				::unlink(sTmpFile.c_str());
				
				// drop any other (older) version from the list, use only the new one
				m_volatileFiles.erase(sBaseFn);
				m_volatileFiles.erase(sBaseFn+".bz2");
				m_volatileFiles.erase(sBaseFn+".gz");
				m_volatileFiles.insert(sGuessSrc);
				
				// content is to be considered ok even if header is wrong/missing
				m_uptodateTags.insert(sGuessSrc);
				
				// feed the data set for the dupe filter
				string fpr=sCurrentCs+"_"+ltos(nCurrentSize);
				m_quickDupeHints[sBaseFn]=fpr;
				m_quickDupeHints[sBaseFn+".bz2"]=fpr;
				m_quickDupeHints[sBaseFn+".gz"]=fpr;
				
				goto clean_stuff_and_break;
			}
			
			// ok, then: does it hit some previous state we can patch from?
			for(; hit<stateList.size(); hit+=3)
			{
				//SendMsg(stateList[hit] + " <- testing<br>");				
				if(Meta::CsEqual((CUCHAR*) stateList[hit].c_str(), 
						srcHexCs, 20)
					&& srcSize==atoll(stateList[hit+1].c_str()) )
				{
					break; // GOTCHA! The index state to start patching
				}
			}
			if(hit>=stateList.size())
				goto clean_stuff_and_continue; // no hit, try other base file
			
			// Go fetch all patch files
			
			for(UINT pIdx=hit+2; pIdx<stateList.size(); pIdx+=3)
			{
				string sPatchFn=sBaseFn+".diff/"+stateList[pIdx]+".gz";

				string sPatchUncFn = sTmpFile+"."+ltos(pIdx);
				cleanSet.push_back(sPatchUncFn);

				// XXX: also check size/cs of the patches? needing to scan them above?
				if( ! _UpdateOne(dl, sPatchFn, sErr)
						|| ! filereader::GetSha1Sum(sPatchFn, tmpHexCs, true,
								tmpSize, sPatchUncFn.c_str()) )
				{
					aclog::err(sPatchFn+": failed to fetch or unpack the patch file");
					goto clean_stuff_and_continue;
					// XXX To consider: don't break here, keep pre-caching them all for later use. Pro: fetch earlier; Contra: maybe useless, client will never need them when it contatcs us only once a week
				}
				// double check it, just to be safe
				UINT i=0;
				for(; i<patchSums.size(); i+=3)
				{
					//aclog::err(sPatchFn+": "+stateList[pIdx]+" vs. "+ patchSums[i+2]);
					if(stateList[pIdx] == patchSums[i+2])
					{
						if(! Meta::CsEqual((CUCHAR*) patchSums[i].c_str(), tmpHexCs, 20)
								|| atoll(patchSums[i+1].c_str()) != tmpSize)
						{
							aclog::err(sPatchFn+": bad checksum or filesize");
							goto clean_stuff_and_continue;
						}
						break;
					}
				}
				if(i>=patchSums.size())
				{
					aclog::err(sPatchFn+": patch not listed in the file history");
					goto clean_stuff_and_continue;
				}

				// double-check, catch any non-sense therein, should not happen though
				//assert(sPatchUncFn.find('\'') == stmiss);
				if(sPatchUncFn.find('\'') != stmiss)
					goto clean_stuff_and_continue;

				sCmd+= (string(" '") + sPatchUncFn + "'");
			}
			sCmd += string(" ; echo w tmp.merged ) | ( cd '")
				+sBaseFn+".diff/_actmp/" + "' ; red tmp ) ";
			//SendMsg(sCmd+"<br>");
			//string sMergeFn=sTmpFile+".merged";
			cleanSet.push_back(sMergedFn);
			
			if(0 == system(sCmd.c_str()))
			{
				if(filereader::GetSha1Sum(sMergedFn, tmpHexCs, true, tmpSize)
				&& validCsums.find( ltos(tmpSize)+"_" 
						+ CsBinToString(tmpHexCs, 20)) != validCsums.end())
				{
					// GOOD! Looks very valid
					SendMsg(string("Patching... OK, updating ")+sBaseFn+"<br>");
					::unlink(sBaseFn.c_str());
					::unlink((sBaseFn+".head").c_str());
					
					_InjectBz2ed(sBaseFn, sMergedFn, validCsums);
					
					if(0==::rename(sMergedFn.c_str(), sBaseFn.c_str()))
					{
						// fine, making the new stuff public and ignore the compressed versions instead
						m_volatileFiles.erase(sBaseFn+".bz2");
						m_volatileFiles.erase(sBaseFn+".gz");
						m_volatileFiles.insert(sBaseFn);
						m_uptodateTags.insert(sBaseFn); // we have no .head but that's ok, don't refetch
						goto clean_stuff_and_break; // stop iteration over suffixes
					}
				}
				else
				{
					SendMsg(string("Patching... finished with errors, ")+sBaseFn+" will be redownloaded<br>\n");
				}
			}
			else
			{
				SendMsg("Patching... failed, is ed installed?<br>");
				if(acfg::debug>1)
					SendMsg(sCmd);
			}
			clean_stuff_and_continue:
			for(tStrVecIter it = cleanSet.begin(); it!=cleanSet.end(); it++)
				::unlink(it->c_str());
			::rmdir(sTmpDir.c_str());
			continue;
			
			clean_stuff_and_break:
			// might have no .head but that's ok, don't refetch
			for(tStrVecIter it = cleanSet.begin(); it!=cleanSet.end(); it++)
				::unlink(it->c_str());
			::rmdir(sTmpDir.c_str());
			break;
						
		}
	}

}

void expiration::_UpdateVolatileFiles()
{
	_LoadIgnoreList();

	// all possible versions based on data of Release files, might be downloadable as .gz or .bz2 variant
	tStrSet listGuessedLocationsToTry;

	dlcon dl(true);
	string sErr; // for downloader output

	_UpdateWithDiffs(dl, listGuessedLocationsToTry);

	/**
	 * Ok, what's still in the list?
	 * 1. ifiles without pdiffs (as found on the FS)
	 * 2. ifiles with pdiffs where reconstruction failed
	 * 3. other kinds of files, maybe having different variants, .bz2 and .gz or even uncompressed
	 *    
	 * Trying to find a compromise between user demands and transfer size. I.e.
	 * if only .gz version exists, refetch the .gz versions, if multiple version exists,
	 * try to get .bz2 and then .gz (on failure, or forget .gz on success).
	 */
	
	// Cleanup the list, remove semi-dupes
	// drop the current version if there is a good replacement somewhere
	// else in the queue
	
	m_volatileFiles.insert(""); // make sure there is always a predecessor
	for(tStrSet::const_iterator it=m_volatileFiles.begin();
	it!=m_volatileFiles.end(); it++)
	{
		string sErr;
		string sFn=*it;
		
		// if the other version was downloaded before, skip the current one
		if(endsWithSzAr(sFn, ".bz2"))
		{
			string sBaseFn=sFn.substr(0, sFn.size()-4);
			if(m_uptodateTags.find(sBaseFn)!=m_uptodateTags.end()
					|| m_uptodateTags.find(sBaseFn+".gz")!=m_uptodateTags.end())
				goto drop_file;
		}
		else if(endsWithSzAr(sFn, ".gz"))
		{
			// if the other version was downloaded before, skip the current one
			string sBaseFn=sFn.substr(0, sFn.size()-3);
			if(m_uptodateTags.find(sBaseFn)!=m_uptodateTags.end()
					|| m_uptodateTags.find(sBaseFn+".bz2")!=m_uptodateTags.end())					
				goto drop_file;
			
			// try to get .bz2 version if seen in the cache
			// XXX: there is a small chance that .gz is better even though .bz2 is around
			// 		it might be checksummed and tested against checksum heap (above)
			if(m_volatileFiles.find(sBaseFn+".bz2")!=m_volatileFiles.end())
			{
				if(_UpdateOne(dl, sBaseFn+".bz2", sErr))
					goto drop_file;
			}
		}
		else
		{
			if(m_uptodateTags.find(sFn)!=m_uptodateTags.end())
				continue; // this version == good, was downloaded/generated here before
			
			if(m_volatileFiles.find(sFn+".bz2")!=m_volatileFiles.end())
			{
				if(_UpdateOne(dl, sFn+".bz2", sErr))
				goto drop_file;
			}
			if(m_volatileFiles.find(sFn+".gz")!=m_volatileFiles.end())
			{
				if(_UpdateOne(dl, sFn+".gz", sErr))
				goto drop_file;
			}
		}
		continue;
		
		drop_file:
		tStrSet::const_iterator itmp = it;
		it--;
		m_volatileFiles.erase(itmp);
	}
	m_volatileFiles.erase("");


	for(tStrSet::const_iterator it=m_volatileFiles.begin(); it!=m_volatileFiles.end(); it++)
	{	
		if (!_UpdateOne(dl, *it, sErr))
		{
			m_nErrorCount++;
			SendMsg("Error while redownloading this file!<br>\n ");
			if (!sErr.empty())
				SendMsg(string("Reason: ")+sErr+"<br>\n");
		}
	}
	
	// download the guessed stuff unless fetched somehow before. Add to
	// vfiles on success, otherwise forget them
	for(tStrSet::const_iterator it=listGuessedLocationsToTry.begin();
	it!=listGuessedLocationsToTry.end(); it++)
	{
		string bVer((*it)+".bz2"), gVer((*it)+".gz");
		if(m_uptodateTags.find(*it) != m_uptodateTags.end()
				|| m_uptodateTags.find( bVer ) != m_uptodateTags.end()
				|| m_uptodateTags.find( gVer ) != m_uptodateTags.end() )
			continue;		
		if(_UpdateOne(dl, bVer, sErr))
			m_volatileFiles.insert(bVer);
		else if(_UpdateOne(dl, gVer, sErr))
			m_volatileFiles.insert(gVer);
		// otherwise just ignore it
	}
}

inline void expiration::_ParseVolatileFilesAndHandleEntries(tStrSet *pDupeCatcher)
{
	for(tStrSet::const_iterator it=m_volatileFiles.begin(); it!=m_volatileFiles.end(); it++)
	{
		if(it->empty() || it->at(0)!=cPathSepUnix)
			continue;
		
		string sSrcSig; // only set if pdupecatcher is set
		if(pDupeCatcher && ! m_bByPath)
		{
			map<string,string>::iterator spair = m_quickDupeHints.find(*it);
			if(spair != m_quickDupeHints.end())
			{
				sSrcSig=spair->second;
				if( ! sSrcSig.empty() 
						&& pDupeCatcher->find(sSrcSig) != pDupeCatcher->end())
				{
					//SendMsg("found dupe in the catchup ;-)<br>");
					SendMsg(string("Ignoring ")
							+it->substr(m_nDropPrefLen)
							+" (equivalent file seen)<br>\n");
					continue;
				}
			}
		}
		
		SendMsg(string("Parsing metadata in ")+it->substr(m_nDropPrefLen)+"<br>\n");
		filereader reader;
		if(!reader.OpenFile(*it))
		{
			SendMsg("<font color=red>ERROR: unable to open or read this file</font><br>\n");
			continue;
		}
		string sLine, key, val;
		const string sFile=reader.GetPureFilename();

		// pre calc relative base folders for later
		string sDirname("/");
		string sDebBaseDir("/"); // may differ from sDirname if the path looks like a Debian mirror path 
		tStrPos pos=it->rfind(sPathSep);
		if(stmiss!=pos)
		{
			sDirname.assign(*it, 0, pos+1);
			pos=sDirname.rfind("/dists/");
			if(stmiss!=pos)
				sDebBaseDir.assign(sDirname, 0, pos+1);
			else
				sDebBaseDir=sDirname;
		}
		
		tRemoteFileInfo info;
		info.SetInvalid();
		
		if (sFile=="Packages")
		{
			string csNeeded("MD5sum"), fnNeeded("Filename"), szNeeded("Size");
			
			for (bool bNoEof=true; bNoEof;)
			{
				bNoEof=reader.GetOneLine(sLine);
				trimBack(sLine);
				//cout << "file: " << *it << " line: "  << sLine<<endl;
				if (sLine.empty() || !bNoEof)
				{
					if(info.IsUsable())
						_HandlePkgEntry(info);
					info.SetInvalid();
					
					if(CheckAbortCondition())
						return;

					continue;
				}
				else if (_ParseLine(sLine, key, val))
				{
					// not looking for data we already have
					if(/*CSTYPE_INVALID == info.fpr.csType &&*/ 32==val.length() && key==csNeeded)
					{
						info.fpr.csType=CSTYPE_MD5;
						info.fpr.ReadCsFromString(val);
					}
					else if(/*0==info.fpr.size && */key==szNeeded)
						info.fpr.size=atol(val.c_str());
					else if(/*info.sFileName.empty() && */key==fnNeeded)
					{
						info.sDirectory=sDebBaseDir;
						tStrPos pos=val.rfind(sPathSepUnix);
						if(pos==stmiss)
							info.sFileName=val;
						else
						{
							info.sFileName=val.substr(pos+1);
							info.sDirectory.append(val, 0, pos+1);
						}
					}
				}
			}
		}
		else // sources-style formats
		{
			/* pickup all common information, then parse the remaining lines and
			 * assign the tokens to fields in each call of the callback function
			 */
			tStrVec vSumSizeName;
			string sStartMark;
			bool bUse(false);
			
			if (sFile=="Index")
			{
				info.fpr.csType = CSTYPE_SHA1;
				info.fpr.bUnpack = true;
				info.sDirectory=sDirname; // same for all
				sStartMark="SHA1-Patches:";
			}
			else if (sFile=="Sources")
			{
				info.fpr.csType = CSTYPE_MD5;
				info.fpr.bUnpack = false;
				sStartMark="Files:";
			}
			else if (sFile=="Release")
			{
				info.fpr.csType = CSTYPE_MD5;
				info.fpr.bUnpack = false;
				sStartMark="MD5Sum:";
			}
			else
				continue;
			const char typehint=sFile[0];
			
			for (bool bNoEof=true; bNoEof;)
			{
				bNoEof=reader.GetOneLine(sLine);
				trimBack(sLine);
				//if(sLine.find("unp_")!=stmiss)
				//	int nWtf=1;
				//cout << "file: " << *it << " line: "  << sLine<<endl;
				if (sLine.empty() || !bNoEof)
				{
					// optional, but better be sure
					info.sDirectory.clear();
					continue;
				}
				else if(startsWith(sLine, sStartMark))
					bUse=true;
				else if(startsWithSz(sLine, "Directory:"))
				{
					trimBack(sLine);
					tStrPos pos=sLine.find_first_not_of(SPACECHARS, 10);
					if(pos!=stmiss)
						info.sDirectory=sDebBaseDir+sLine.substr(pos)+sPathSepUnix;
				}
				else if (bUse && sLine[0]==' ' && 3==Tokenize(sLine,
						"\t ", vSumSizeName) && vSumSizeName[0].length()>30
						)
						
				{
				
					info.fpr.size = atol(vSumSizeName[1].c_str());
					if(info.fpr.size && info.fpr.ReadCsFromString(vSumSizeName[0]))
					{

						if (typehint=='S')//ources
							info.sFileName=vSumSizeName[2];
						else if (typehint=='I')//ndex
						{
							// We cheat! Files there are compressed and thus the checksum is wrong, deal with that later
							info.sFileName=vSumSizeName[2]+".gz";
							//info.sDirectory=sDirname;
						}
						else if (typehint=='R')//elease
						{
							// usually has subfolders
							pos=vSumSizeName[2].rfind(sPathSepUnix);
							if (stmiss!=pos)
							{
								info.sFileName=vSumSizeName[2].substr(pos+1);
								info.sDirectory=sDirname
										+vSumSizeName[2].substr(0, pos+1);
							}
							else // something new in main folder? unlikely...
							{
								info.sFileName=vSumSizeName[2];
								info.sDirectory=sDirname;
							}
						}
						// no else, caught above

						_HandlePkgEntry(info);

						if(CheckAbortCondition())
							return;

					}
				}
			}
		}

		if (reader.CheckGoodState(false))
		{
			if( ! sSrcSig.empty() )
				pDupeCatcher->insert(sSrcSig);
		}
		else
		{
			SendMsg("<font color=red>An error occured while reading this file, some contents may have been ignored.</font><br>\n");
			m_nErrorCount++;
			continue;
		}
		//		cout << "found package files: "<< m_trashCandidates.size()<<endl;
	}
}

void expiration::_HandlePkgEntry(const tRemoteFileInfo &entry)
{
	//aclog::err(string("wtf:")+entry.sDirectory+entry.sFileName);
	
	// singleton -> keep some things static == optimization
	static pair<tS2DAT::iterator, tS2DAT::iterator> fromTo;
	// range of files having at least the same file names
	fromTo=m_trashCandSet.equal_range(entry.sFileName);
	
	if(fromTo.first==fromTo.second)
		return; // just no hits, make it quick
			
	static tS2DAT::iterator itmp, it;
	
	for(it=fromTo.first;
	it!=fromTo.second;
	/* erases inside -> step forward there */ )
	{
		
		tFingerprint & fprFile=it->second.fpr;
		string sShortPath(it->second.sDirname+it->first);
		string sFullPath(acfg::cachedir+sPathSep+sShortPath);
		header h;
		
		// needs to match the exact file location
		if( (m_bByPath || m_bByChecksum)
				&& sFullPath != (entry.sDirectory+entry.sFileName))
		{
			goto keep_in_trash;
		}
		
		// no header check if the file was forcibly updated/reconstructed before
		if ( ! m_bSkipHeaderChecks && m_uptodateTags.find(sFullPath) == m_uptodateTags.end()
				&& ! it->second.bHeaderTestDone	) // do only once for each file
		{
			it->second.bHeaderTestDone = true;
			
			if (0<h.LoadFromFile(sFullPath+".head"))
			{
				if (h.h[header::CONTENT_LENGTH])
				{
					off_t len=atoll(h.h[header::CONTENT_LENGTH]);
					struct stat stinfo;
					off_t lenInfo=0;
					
					// avoid duplicate stat call if the data from the includer is still there
					if(fprFile.bUnpack == false 
							&& fprFile.csType == CSTYPE_INVALID
							&& fprFile.size>0)
					{
						lenInfo=fprFile.size;
					}
					else if (0==stat(sFullPath.c_str(), &stinfo))
						lenInfo = stinfo.st_size;
					else
					{
						SendMsg(sShortPath+ ": error reading attributes, ignoring");
						goto keep_in_trash;
					}
					
					if(len<lenInfo)
					{
						SendMsg(string("<font color=red>WARNING, header file of ")
								+ sShortPath
								+ " reported too small file size, invalidating "
								+it->first+"</font><br>\n");
						goto keep_in_trash;
					}
				}
				else
				{
					SendMsg(string("<font color=red>WARNING, header file of ")
							+ sShortPath + " does not contain content length");
					goto keep_in_trash;
				}
			}
			else
				SendMsg(string("<font color=\"orange\">WARNING, header file missing for ")
						+sFullPath+"</font><br>\n");
		}
		
		if(m_bByChecksum)
		{
			bool bSkipDataCheck=false;
			
			// scan file if not done before
			if(CSTYPE_INVALID == fprFile.csType)
			{
				// hints for checksumming to read what we need
				fprFile.csType=entry.fpr.csType;
				fprFile.bUnpack=entry.fpr.bUnpack;
				// finally get the physical file size and checksum of the contents
				if(!fprFile.ReadCsFromFile(sFullPath))
				{
					// IO error? better keep it for now
					aclog::err(string("An error occured while checksumming ")
							+sFullPath+", not touching it.");
					bSkipDataCheck=true;
				}
			}

			if ( !bSkipDataCheck)
			{
				if ( ! (fprFile == entry.fpr))
				{
					SendMsg(string("<font color=red>BAD: ")+sFullPath
					+"</font><br>\n");
					goto keep_in_trash;
				}
			}
		}
		
		// ok, package matched, contents ok if checked, drop it from the removal list
		if (m_bVerbose)
			SendMsg(string("<font color=green>OK: ")+it->second.sDirname
					+it->first+"</font><br>\n");

		itmp=it;
		itmp++;

		m_trashCandHeadSet.erase(it->second.sDirname+it->first+".head");
		m_trashCandSet.erase(it);

		it=itmp;
		continue;

		keep_in_trash:
		it++;
	}
}

inline void expiration::_RemoveAndStoreStatus(bool bPurgeNow)
{
	FILE *f(NULL);
    if(!bPurgeNow) 
    {
        string sDbFile=acfg::cachedir+sPathSep+"_expending_dat";

        f = fopen(sDbFile.c_str(), "w");
        if(!f)
        {
            SendMsg("Unable to open _expending.dat for writing, trying to recreate... ");
            ::unlink(sDbFile.c_str());
            f=::fopen(sDbFile.c_str(), "w");
            if(f)
                SendMsg("OK<br>\n");
            else
            {
                SendMsg("<font color=red>FAILED. ABORTING. Check filesystem and file permissions.");
                return;
            }
        }
    }
	
	int n(0);
	for(tS2DAT::iterator it=m_trashCandSet.begin(); 
            it!=m_trashCandSet.end(); it++)
	{
        if(m_rex.IsInWhitelist(it->first.c_str()))
        	continue;

		//cout << "Unreferenced: " << it->second.sDirname << it->first <<endl;

		string sWhat=acfg::cachedir+sPathSep+it->second.sDirname+it->first;
		
		// never purge headers in the remains cleanup below, only explicitely
	    m_trashCandHeadSet.erase(it->second.sDirname+it->first+".head");
	    //cout << "Took " << sWhatHead << " from the list" <<endl;

		if(bPurgeNow || (it->second.nLostAt < (m_nTimeNow-acfg::extreshhold*86400)))
		{
			SendMsg(string("Tagging ")+it->second.sDirname+it->first+"<br>\n");

#ifdef ENABLED
			::unlink(sWhat.c_str());
			::unlink((sWhat+".head").c_str());
			::rmdir(it->second.sDirname.c_str());
#endif
        }
		else if(f)
		{
			n++;
			fprintf(f, "%lu\t%s\t%s\n",  it->second.nLostAt,
					it->second.sDirname.c_str(), it->first.c_str());
		}
	}
    if(f)
        fclose(f);

    // now just kill dangling header files
	for(set<string>::iterator it=m_trashCandHeadSet.begin();
            it!=m_trashCandHeadSet.end(); it++)
	{
        // keep Release etc.
		if(m_rex.IsInWhitelist(it->c_str()))
            continue;
		
        cout << "Removing orphaned head file: " << *it << endl;
#ifdef ENABLED
        string sFullPath=acfg::cachedir+sPathSep+*it;
        ::unlink(sFullPath.c_str());
        string::size_type pos=sFullPath.find_last_of(sPathSep);
        if(pos!=stmiss)
        	::rmdir(sFullPath.substr(0, pos).c_str());
#endif
    }
    if(n>0)
    {
        char buf[100];
        snprintf(buf, _countof(buf), "<br>\n%d package file(s) marked for removal in few days.<br>\n<br>\n", n);
        SendMsg(buf);
    }
}



void expiration::Action(const string & cmd)
{
	
	if (cmd.find("justRemove")!=stmiss)
	{
		_LoadTrashMapFromFile(true);
		_RemoveAndStoreStatus(true);
		return;
	}
	if (cmd.find("justShow")!=stmiss)
	{
		_LoadTrashMapFromFile(true);
		for (tS2DAT::iterator it=m_trashCandSet.begin(); it
				!=m_trashCandSet.end(); it++)
		{
			SendMsg(it->second.sDirname+it->first+"<br>\n");
			SendMsg(it->second.sDirname+it->first+".head<br>\n");
		}
		return;
	}
	
	SendMsg("Locating potentially expired files in the cache...<br>\n");

	m_bErrAbort=(cmd.find("abortOnErrors=aOe")!=stmiss);
	m_bByPath=(cmd.find("byPath")!=stmiss);
	m_bByChecksum=(cmd.find("byChecksum")!=stmiss);
	m_bVerbose=(cmd.find("beVerbose")!=stmiss);
	m_bForceDownload=(cmd.find("forceRedownload")!=stmiss);
	m_bSkipHeaderChecks=(cmd.find("skipHeadChecks")!=stmiss);

	DirectoryWalk(acfg::cachedir, this);
	if(CheckAbortCondition())
		return;
	//cout << "found package files: " << m_trashCandidates.size()<<endl;

	SendMsg("Redownloading index files...<br>\n");

	_UpdateVolatileFiles();
	
	if(CheckAbortCondition())
		return;


	if (m_bErrAbort && m_nErrorCount>0)
	{
		SendMsg("<font color=\"red\">Found errors during processing, aborting as requested.</font>"
			"<!-- TELL:THE:ADMIN -->");
		return;
	}

	SendMsg("Identifying fresh files...<br>\n");
	tStrSet catchup;

	_ParseVolatileFilesAndHandleEntries(&catchup);

	if(CheckAbortCondition())
			return;

	// update timestamps of pending removals
	_LoadTrashMapFromFile(false);
	
	_RemoveAndStoreStatus(cmd.find("purgeNow")!=stmiss);
	_PurgeMaintLogs();
	SendMsg("Done.");

}


void expiration::_PurgeMaintLogs()
{
	glob_t globbuf;
	memset(&globbuf, 0, sizeof(glob_t));
	string pattern=acfg::logdir+SZPATHSEP"maint_*.log";
	glob(pattern.c_str(), GLOB_DOOFFS | GLOB_NOSORT, NULL, &globbuf);
	if (globbuf.gl_pathc>2)
	{
		SendMsg("Found required cleanup tasks: purging maintanence logs...<br>\n");
	}
	for (unsigned int i=0; i<globbuf.gl_pathc; i++)
	{
		//cerr << "Checking "<<globbuf.gl_pathv[i];
		// skip our log
		time_t id=atol(globbuf.gl_pathv[i]+acfg::logdir.size()+7);
		//cerr << "id ist: "<< id<<endl;
		if (id==GetTaskId())
			continue;
		//cerr << "Remove: "<<globbuf.gl_pathv[i]<<endl;
#ifdef ENABLED
		::unlink(globbuf.gl_pathv[i]);
#endif
	}
	globfree(&globbuf);

}


