/*
 * Diagnostics - a unified framework for code annotation, logging,
 * program monitoring, and unit-testing.
 *
 * Copyright (C) 2009 Christian Schallhart <christian@schallhart.net>,
 *                    Michael Tautschnig <tautschnig@forsyte.de>
 *               2008 model.in.tum.de group, FORSYTE group
 *               2006-2007 model.in.tum.de group
 *               2002-2005 Christian Schallhart
 *  
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */


/**
 * @file diagnostics/util/diff_generator.cpp
 *
 * @brief [LEVEL: alpha] Implementation of @ref
 * diagnostics::internal::Diff_Generator
 *
 * $Id: diff_generator.cpp 861 2009-04-18 20:14:51Z tautschnig $
 * 
 * @author Michael Tautschnig, Piotr Esden-Tempski 
 */

#include <diagnostics/util/diff_generator.hpp>
#include <diagnostics/util/diff_exception.hpp>
#include <limits>
#include <cmath>

/*
 * This is a hack: diagnostics is not allowed to log itself.
 * For debugging, we used the following macros, which worked fine --
 * but this construction is not guaranteed to work again. 
 *
 * DO NOT EVER USE THIS IN A PRODUCTION BUILD!
 */
#if 0
#   include <iostream>
#	include <diagnostics/annotations.hpp>
#	define DIFF_GENERATOR_AUDIT 1
#	define DIFF_GENERATOR_AUDIT_BLOCK_GUARD(TXT) DIAGNOSTICS_AUDIT_BLOCK_GUARD(TXT)
#	define DIFF_GENERATOR_AUDIT_TRACE(TXT) DIAGNOSTICS_AUDIT_TRACE(TXT)
#else
#	define DIFF_GENERATOR_AUDIT 0
#	define DIFF_GENERATOR_AUDIT_BLOCK_GUARD(TXT)
#	define DIFF_GENERATOR_AUDIT_TRACE(TXT)
#endif

DIAGNOSTICS_NAMESPACE_BEGIN;
INTERNAL_NAMESPACE_BEGIN;

Diff_Generator::Diff_Generator()
    : m_diff_done(false)
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("Diff_Generator");
}

Diff_Generator::~Diff_Generator()
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("~Diff_Generator");
}

/* Interface methods */
void Diff_Generator::texta(::std::string const & texta)
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("texta");

    m_diff_done=false;

    /* Preparing texta */
    m_vtexta.clear();
    m_hasha.clear();
    p_split_hash(m_vtexta, m_hasha, texta);
}

void Diff_Generator::textb(::std::string const & textb)
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("textb");

    m_diff_done=false;

    /* Preparing textb */
    m_vtextb.clear();
    m_hashb.clear();
    p_split_hash(m_vtextb, m_hashb, textb);
}

/* Private methods */
/* Splitting the input text around '\n' and
 * generating hashes for every line using
 * hashpjw algorithm by P. J. Weinberg
 */
void Diff_Generator::p_split_hash(VText_t & vtext, Hash_t & hashmap, ::std::string const &text)
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("p_split_hash");
    unsigned long int h(0), g;
    unsigned long int const hash_prime(211);
    ::std::string::const_iterator begin(text.begin());
    ::std::string::const_iterator const end(text.end());
    ::std::string line;

    // If text is empty leave vtext and hashmap empty
    DIFF_GENERATOR_AUDIT_TRACE("check if input empty");
    if(text.empty()){
	return;
    }

    // insert a blank line at the beginning to simplify the LCSS code
    vtext.push_back("\n");
    hashmap.push_back(0);

    // @todo slow loop -- improve
    DIFF_GENERATOR_AUDIT_TRACE("main loop");
    for(;begin!=end;++begin) {
	if(*begin=='\n'){
	    DIFF_GENERATOR_AUDIT_TRACE("adding line: \"" + line + "\"");
	    // insert a new string
	    vtext.push_back(line + '\n');
	    line.clear();
	    // insert string hashvalue and reset hash algorithm
	    hashmap.push_back(h%hash_prime);
	    h = 0;
	}else{
	    line += *begin;

	    // hash calculation
	    h = (h << 4) + *begin;
	    if((g=h&0xF0000000)){
		h ^= g>>24;
		h ^= g;
	    }
	}
    }

    if(!line.empty()) // in case of a missing final newline
    {
        // add a newline
        vtext.push_back(line + '\n');
        hashmap.push_back(h%hash_prime);
    }
}

/* Use Largest Common Subsequence algorithm to create a diff
 */
void Diff_Generator::p_do_diff() 
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("p_do_diff");

    if(m_diff_done) return;

    // reset data immediately
    m_diff_tupel.clear();

    // at the end of this procedure, the diff will be done
    m_diff_done=true;
	
    // in either case, m_diff_tupel must be empty:
    // m_diff_tupel contains matches and in these two cases,
    // there are no matches.
    if(m_vtexta.size() == 0) return;
    if(m_vtextb.size() == 0) return;

    // LCSS matrix
    // this may overflow, do some checking ...
	int const mva_b( ::log( static_cast<double>(m_vtexta.size()) ) / ::log(2.0) );
	int const mvb_b( ::log( static_cast<double>(m_vtextb.size()) ) / ::log(2.0) );
    if( mva_b + mvb_b + 1 > static_cast< int >( sizeof( int ) * 8 ) ) throw Diff_Exception( "Text too large for diff" );
    if( m_vtexta.size() > static_cast< ::std::vector<int>::size_type >( 
                ::std::numeric_limits< int >::max() ) ) throw Diff_Exception( "Text too large for diff" );
    if( m_vtextb.size() > static_cast< ::std::vector<int>::size_type >( 
                ::std::numeric_limits< int >::max() ) ) throw Diff_Exception( "Text too large for diff" );
    ::std::vector< int > lcss_matrix( m_vtexta.size() * m_vtextb.size() );
    int const row_length( m_vtexta.size() );
    int const col_count( m_vtextb.size() );

    // LCSS -- fill the matrix
    for( int i = 0; i < row_length; ++i )
        for( int j = 0; j < col_count; ++j )
            if( m_hasha[ i ] == m_hashb[ j ] && m_vtexta[ i ] == m_vtextb[ j ] )
            {
                lcss_matrix[ i + j*row_length ] = 1;
                if( j > 0 && i > 0 )
                    lcss_matrix[ i + j*row_length ] += lcss_matrix[ (i-1) + (j-1)*row_length ];
            }
            else
            {
                lcss_matrix[ i + j*row_length ] = 0;
                if( j > 0 )
                    lcss_matrix[ i + j*row_length ] = lcss_matrix[ i + (j-1)*row_length ];
                if( i > 0 )
                    lcss_matrix[ i + j*row_length ] = ::std::max( lcss_matrix[ i + j*row_length ],
                            lcss_matrix[ (i-1) + j*row_length ] );
            }

#if DIFF_GENERATOR_AUDIT
    for( int j = 0; j < col_count; ++j )
    {
        for( int i = 0; i < row_length; ++i )
            ::std::cerr << lcss_matrix[ i + j*row_length ] << "\t";
        ::std::cerr << ::std::endl;
    }
#endif

    // LCSS -- backtracking
    // note that the order in case of multiple possible matches of the
    // if-clauses is important to provide a convenient ordering of - and + lines
    // (- goes before +)
    // note that 0,0 will not be pushed, thus the initial blank lines of m_texta
    // and m_vtextb will not be printed later on
    int i( m_vtexta.size() - 1 ), j( m_vtextb.size() - 1 );
    while( i > 0 || j > 0 )
    {
        // find next step
        if( i == 0 )
        {
            m_diff_tupel.push_back(Match_t(-1, j));
            --j;
        }
        else if( j == 0 )
        {
            m_diff_tupel.push_back(Match_t(i, -1));
            --i;
        }
        else
        {
            if( m_hasha[ i ] == m_hashb[ j ] && m_vtexta[ i ] == m_vtextb[ j ] )
            {
	            m_diff_tupel.push_back(Match_t(i, j));
                --j;
                --i;
            }
            else if( lcss_matrix[ i + (j-1)*row_length ] < lcss_matrix[ i + j*row_length ] )
            {
	            m_diff_tupel.push_back(Match_t(i, -1));
                --i;
            }
            else // yields going vertically after horizontal steps when done in-order later on
            {
	            m_diff_tupel.push_back(Match_t(-1, j));
                --j;
            }
        }
#if DIFF_GENERATOR_AUDIT
        ::std::cerr << "Added " << m_diff_tupel.back().first << " " << m_diff_tupel.back().second << ::std::endl;
#endif
    }
}

void Diff_Generator::diff_text(::std::string & diff_text, bool const use_color)
{
    DIFF_GENERATOR_AUDIT_BLOCK_GUARD("diff_text");

    p_do_diff();

    diff_text = "";

    // actually we could use a const_reverse_iterator here, but some gcc
    // versions do not support this
    // (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11729)
    VText_t::const_iterator vta_iter( m_vtexta.empty() ? m_vtexta.begin() : ++(m_vtexta.begin()) ); 
    VText_t::const_iterator vtb_iter( m_vtextb.empty() ? m_vtextb.begin() : ++(m_vtextb.begin()) );
    for( ::std::vector< Match_t >::reverse_iterator iter( m_diff_tupel.rbegin() );
            iter != m_diff_tupel.rend(); ++iter )
    {
        if( iter->first != -1 && iter->second != -1 )
        {
            p_append_composed_line(diff_text, ' ', *vta_iter, use_color);
            ++vta_iter;
            ++vtb_iter;
        }
        else if( iter->first != -1 )
        {
            p_append_composed_line(diff_text, '-', *vta_iter, use_color);
            ++vta_iter;
        }
        else
        {
            p_append_composed_line(diff_text, '+', *vtb_iter, use_color);
            ++vtb_iter;
        }
    }
    // print remaining lines
    for(; vta_iter != m_vtexta.end(); ++vta_iter )
        p_append_composed_line(diff_text, '-', *vta_iter, use_color);
    for(; vtb_iter != m_vtextb.end(); ++vtb_iter )
        p_append_composed_line(diff_text, '+', *vtb_iter, use_color);
}

void Diff_Generator::p_append_composed_line(::std::string &target, char prefix, ::std::string const &line, bool const use_color)
{
    char const * const ansi_red = "\033[31m";
    char const * const ansi_green = "\033[32m";
    char const * const ansi_reset = "\033[0m";

    char prefixs[2];

    prefixs[0] = prefix;
    prefixs[1] = '\0';
	
    DIFF_GENERATOR_AUDIT_TRACE(::std::string("p_append_composed_line: \"") + prefixs + line + "\"");

    if(use_color){
	switch(prefix){
	    case '+':
		target += ansi_green;
		target += prefixs + line + ansi_reset;
		break;
	    case '-':
		target += ansi_red;
		target += prefixs + line + ansi_reset;
		break;
	    case ' ':
		target += prefixs + line;
		break;
	}
    }else{
	target += prefixs + line;
    }
}


INTERNAL_NAMESPACE_END;
DIAGNOSTICS_NAMESPACE_END;

// vim:ts=4:sw=4
