#!/usr/local/bin/bash

echo "Setting up the Ensembl Functional Genomics pipeline environment..." 

# Check that the following environment variables are set

for var in SRC EFG_SRC EFG_DATA EFG_SQL \
    EFG_HOST EFG_PORT EFG_READ_USER EFG_WRITE_USER \
    CORE_HOST CORE_PORT CORE_USER \
    EFG_GROUP EFG_LOCATION EFG_CONTACT \
    SPECIES SPECIES_COMMON VERSION ASSEMBLY; do

  if [ -z  "$(eval "echo \$$var")" ]; then
      echo "ERROR: Environment variable $var not set."
      return
  fi

done

### ENV VARS ###

#Code/Data Directories
export EFG_MODULES=${EFG_SRC}/modules
export EFG_SCRIPTS=${EFG_SRC}/scripts

export ENSEMBL_SRC=$SRC/ensembl                 #ensembl source directory
export ENSEMBL_MODULES=${ENSEMBL_SRC}/modules
export ENSEMBL_SCRIPTS=${ENSEMBL_SRC}/scripts

export ANALYSIS_SRC=$SRC/ensembl-analysis       #ensembl-analysis source directory
export ANALYSIS_MODULES=${ANALYSIS_SRC}/modules
export ANALYSIS_SCRIPTS=${ANALYSIS_SRC}/scripts

export PIPELINE_SRC=$SRC/ensembl-pipeline       #ensembl-pipline source directory
export PIPELINE_MODULES=${PIPELINE_SRC}/modules
export PIPELINE_SCRIPTS=${PIPELINE_SRC}/scripts

export PATH="\
$SCRIPTSDIR:\
${EFG_SCRIPTS}:${EFG_SCRIPTS}/import:\
${PIPELINE_SCRIPTS}:\
${ANALYSIS_SCRIPTS}:\
${ENSEMBL_SCRIPTS}:\
$PATH" 

#Update PERL5LIB. EDIT add ensembl(core) etc. if required
export PERL5LIB="\
${ENSEMBL_MODULES}:\
${EFG_MODULES}:\
${PIPELINE_MODULES}:\
${ANALYSIS_MODULES}:\
${PIPELINE_SCRIPTS}:\
${HOME}/lib/perl:${HOME}/lib/site_perl:\
${SRC}/bioperl-live"

# add to PERL5LIB for DAS server set up
#${SRC}/Bio-Das-ProServer/lib:\

# eFG database connection parameters
export DATA_VERSION="${VERSION}_${ASSEMBLY}"
export EFG_DBNAME=${EFG_DBNAME:="${PREFIX}${SPECIES}_funcgen_${DATA_VERSION}"}
export CORE_DBNAME=${CORE_DBNAME:="${SPECIES}_core_${DATA_VERSION}"}

export MYSQL_ARGS="-h${EFG_HOST} -P${EFG_PORT}"

export WRITE_DB_ARGS="-host ${EFG_HOST} -port ${EFG_PORT} -user ${EFG_WRITE_USER}"
export READ_DB_ARGS="-host ${EFG_HOST} -port ${EFG_PORT} -user ${EFG_READ_USER}"

# Analysis working directory
export ANALYSIS_WORK_DIR=${EFG_DATA}/analysis/${EFG_DBNAME}

### Prompt
export PS1="[\
\[\033[31m\]\
$EFG_DBNAME\
\[\033[0m\] \W\
\[\033[0m\]]$ "

### ALIASES ###
#single quotes enable dynamic updating of commands

alias efg='cd $EFG_SRC'
alias efgd='cd $EFG_DATA'
alias efgm='cd $EFG_SRC/modules/Bio/EnsEMBL/Funcgen'
alias mysqlw='mysql $MYSQL_ARGS -u${EFG_WRITE_USER}'
alias mysqlro='mysql $MYSQL_ARGS -u${EFG_READ_USER}'

alias workdir="cd $EFG_DATA"
alias scriptsdir="cd $SCRIPTSDIR"


#Copied from efg.env
#Can be removed when properly merged

#Only used for monitor, need to pass password on env init!
export PDB_SCRIPT_ARGS="-dbhost $EFG_HOST -dbuser $EFG_READ_USER -dbpass $EFG_WRITE_USER -dbname $PDBNAME -dbport $EFG_PORT"
alias monitor='monitor $PDB_SCRIPT_ARGS -current -finished'

#check for vars then suggest defaults or take stdin?

echo "Welcome to eFG!"
echo "  << ${EFG_DBNAME} @ ${EFG_HOST}:${EFG_PORT} >>"

### Analysis Pipeline DB ###
export PDBNAME=chip_pipeline_${PREFIX}${EFG_DBNAME}
export PDBHOST=${EFG_HOST}
export PDBPORT=${EFG_PORT}

### Functions ###

DropPipelineDB(){

    if [ $# -ne 1 ]; then
        echo "Usage: DropPipelineDB <password>"
        return
    fi

    PASS=$1
    shift
    
	echo "Droping pipeline database $PDBNAME ($MYSQL_ARGS -p$PASS)"
	echo "drop database $PDBNAME" | mysqlw -p$PASS

}

RemoveLock(){

    if [ $# -ne 1 ]; then
        echo "Usage: RemoveLock <password>"
        return
    fi

    PASS=$1
    shift
    
    echo "Removing pipeline lock from $MYSQL_ARGS $PDBNAME"
    echo "delete from meta where meta_key = 'pipeline.lock';" \
        | mysqlw -p$PASS $PDBNAME

}

CreatePipelineDB(){

    if [ $# -ne 1 ]; then
        echo "Usage: CreatePipelineDB <password>"
        return
    fi

    PASS=$1
    shift

	echo "Creating pipeline database $PDBNAME ($MYSQL_ARGS -p$PASS)"
	echo "create database $PDBNAME" | mysqlw -p$PASS
    
    echo "Creating pipeline tables"
    mysqlw -p$PASS $PDBNAME < $SRC/ensembl-pipeline/sql/efg.sql

    echo "Adding schema version $VERSION to database"
    echo "INSERT INTO meta (meta_key, meta_value) VALUES (\"schema_version\", \"$VERSION\");" \
        | mysqlw -p$PASS $PDBNAME

    echo
    echo "Don't forget to write the pipeline config (write_pipeline_config.pl)"
    echo "before you run ImportPipelineConfig!!!"
    echo

}

AddSchemaVersion(){

    if [ $# -ne 1 ]; then
        echo "Usage: AddSchemaVersion <password>"
        return
    fi
    
    PASS=$1
    shift

    echo "Adding schema version $VERSION to database $EFG_DBNAME"
    echo "INSERT INTO meta (meta_key, meta_value) VALUES (\"schema_version\", \"$VERSION\");" \
        | mysqlw -p$PASS $EFG_DBNAME

}

#
# before you can use ImportPipelineConfig we need to make sure we have generated  
# the analysis config file and the rules config file using write_pipline_config.pl.
# ./write_pipeline_config.pl -module Nessie -regexp 'Nessie_NG_' -slice

#
# make sure the directory for the config files exists
if [ ! -d $SCRIPTSDIR/conf ]; then 
    mkdir -p $SCRIPTSDIR/conf
fi


WritePipelineConfig (){


    if [ $# -lt 3 ]; then
        echo "Usage: WritePipelineConfig <password> <module> <SubmitType> ['overwrite']"
        echo "         where <SubmitType> is ['file'|'slice'|'array']"
        return
    fi

    PASS=$1
    shift

    MODULE=$1
    shift

    SUBMIT=''
    case "$1" in
        'file'|'slice'|'array')
            SUBMIT=$1
            shift
            ;;
        *)
            echo "Error: Need to pass either 'file', 'slice', or 'array' as SubmitType!"
            return
            ;;
    esac

    OVRWRT=''
    case "$1" in
        "overwrite")
            OVRWRT="-overwrite"
            shift
            ;;
    esac

    write_pipeline_config.pl -module $MODULE -$SUBMIT $OVRWRT $*
    
}


#
# config files generated by write_pipline_config.pl
export ANALYSIS_CONFIG=$SCRIPTSDIR/conf/analysis.conf
export RULES_CONFIG=$SCRIPTSDIR/conf/rules.conf
export BATCHQ_CONFIG=$SCRIPTSDIR/conf/BatchQueue.conf

# make sure the necessary pipeline script are executable
for f in analysis_setup.pl rule_setup.pl setup_batchqueue_outputdir.pl rulemanager.pl; do
    if [ ! -x ${PIPELINE_SCRIPTS}/$f ]; then
        chmod +x ${PIPELINE_SCRIPTS}/$f
    fi
done
    

ImportPipelineConfig (){


    if [ $# -ne 1 ]; then
        echo "Usage: ImportPipelineConfig <password>"
        return
    fi

    PASS=$1
    shift
    
    WRITE_ARGS=" -dbhost $EFG_HOST -dbport $EFG_PORT -dbuser $EFG_WRITE_USER -dbpass $PASS"
    
    echo "Setting up pipeline analysis" 
	analysis_setup.pl $WRITE_ARGS -dbname $PDBNAME -read -file $ANALYSIS_CONFIG
    if [ $? -ne 0 ]; then
        echo "Error: while running analysis_setup.pl.";
        return;
    fi
    
    echo "Setting up pipeline rules" 
	rule_setup.pl $WRITE_ARGS -dbname $PDBNAME -read -file $RULES_CONFIG
    if [ $? -ne 0 ]; then
        echo "Error: while running rule_setup.pl.";
        return;
    fi

	echo "setup the output directories from Bio::EnsEMBL::Pipeline::Config::BatchQueue"
    setup_batchqueue_outputdir.pl 
    if [ $? -ne 0 ]; then
        echo "Error: while running setup_batchqueue_outputdir.pl.";
        return;
    fi

}

CreateInputIds(){

    if [ $# -lt 2 ]; then
        echo "Usage: CreateInputIds <password> <SubmitType> [<exp_regex> <exp_suffix>]"
		echo "       where <SubmitType> is either"
		echo "          slice ['encode'|'toplevel']  or  file <directory>  or  array"
		echo "       See efg_analysis.txt for details!"
        return
    fi
    
    PASS=$1
    shift
    
    SUBMIT=$1
    shift

    case $SUBMIT in
        'file')
            if [[ "$1" == "" ]]; then
                echo "Error: SubmitType '$SUBMIT' takes a directory to read file from."
                return
            fi
                
            _SUBMIT="-$SUBMIT -dir $1"
            shift
            ;;

        'slice')
            case $1 in 
                'encode'|'toplevel')
                    _SUBMIT="-$SUBMIT -$1"
                    shift
                    ;;
                *)
                    echo "Error: SubmitType '$SUBMIT' takes only either 'encode' or 'toplevel' as parameter."
                    return
                    ;;
            esac
            ;;

        'array')
            _SUBMIT="-$SUBMIT"
            ;;

        *)
            echo "Error: Invalid SubmitType '$SUBMIT'!"
            return
            ;;
    esac
	echo "SubmitType: $_SUBMIT"
    
    OPTIONS=''
    if [ "$1" != "" ]; then OPTIONS="-exp_regex $1"; echo "exp_regex: $1"; fi
    shift
    if [ "$1" != "" ]; then OPTIONS="$OPTIONS -exp_suffix $1"; echo "exp_suffix: $1"; fi
    shift
    
    ### determine analysis_id for SubmitType and write input_ids
    create_input_ids.pl $WRITE_DB_ARGS -dbname $EFG_DBNAME -pass $PASS \
        $_SUBMIT $OPTIONS; 

    if [ $? == 0 ]; then 
        echo "Ready for RunAnalysis"
    else 
        echo "An error occured while inserting input_ids. You may double check your"
        echo "analysis pipeline isn't configured properly (see WritePipelineConfig)"
        echo "or you are trying to import input ids that have already been imported"
        echo "(use CleanInputIds to drop input_ids and rerun CreateInputIds)."
    fi
    
}

CleanInputIds(){

    if [ $# -ne 1 ]; then
        echo "Usage: CleanInputIds <password>"
        return
    fi
    
    PASS=$1
    shift

	echo "Cleaning input_id_analysis table"
	echo "delete from input_id_analysis" | mysqlw -p$PASS $PDBNAME

	echo "removing links from infiles directory ..."
	for file in ${ANALYSIS_WORK_DIR}/infiles/*; do
		if [ -L $file ]; then 
			rm -f $file
		fi
	done
}

CheckPipelineSanity(){

    if [ $# -ne 1 ]; then
        echo "Usage: CheckPipelineSanity <password>"
        return
    fi
    
    PASS=$1
    shift

    echo "Check pipeline sanity"
    pipeline_sanity.pl -dbhost $PDBHOST -dbport $PDBPORT -dbuser $EFG_WRITE_USER -dbpass $PASS -dbname $PDBNAME

}                        

test_eFG_Runnable () {

    if [ $# -lt 4 ]; then
        echo "Usage: test_eFG_Runnable <password> <module> <logic_name> <input_id>"
        return
    fi

    export PASS=$1; shift
    MODULE=$1; shift
    LOGIC_NAME=$1; shift
    INPUT_ID=$1; shift

    echo "MODULE: $MODULE"
    echo "LOGIC_NAME: $LOGIC_NAME"
    echo "INPUT_ID: ${INPUT_ID}"

    time \
    $PIPELINE_SCRIPTS/test_RunnableDB -dbhost $PDBHOST -dbport $PDBPORT -dbuser $EFG_WRITE_USER -dbpass $PASS -dbname $PDBNAME \
        -runnabledb_path Bio/EnsEMBL/Analysis/RunnableDB/Funcgen \
        -module $MODULE \
        -logic_name $LOGIC_NAME \
        -input_id ${INPUT_ID} \
        $*
    
}

RunAnalysis(){

    if [ $# -lt 1 ]; then
        echo "Usage: RunAnalysis <password> [<ln_regex>]"
        echo "          where is <ln_regex> is an optional regular expression to" 
        echo "          match logic_name of analysis"
        return
    fi
    echo "Doing Analysis"
    
    export PASS=$1
    shift
    
    LN_REGEX=$1
    shift
    
    WRITE_ARGS=" -dbhost $EFG_HOST -dbport $EFG_PORT -dbuser $EFG_WRITE_USER -dbpass $PASS"

    CMD="rulemanager.pl $WRITE_ARGS  -dbname $PDBNAME"

    if  [ "$LN_REGEX" == "" ]; then

        WHERE=''
        
    else 

        WHERE="WHERE logic_name RLIKE \"$LN_REGEX\""
        
    fi
    
    ANALYSES=$(
        for i in $(echo "select logic_name from analysis $WHERE" \
            | mysqlw -N -p$PASS $PDBNAME); do 
          echo -n "$i "
          done)
    #echo $ANALYSES

    if [ "$LN_REGEX" != "" ]; then
        ANALYSES=$(
            for i in $ANALYSES; do echo -n " -analysis $i"; done
        )
        CMD="$CMD $ANALYSES"

        echo "executing $CMD"
        eval $CMD

    else 

        echo "executing $CMD"
        eval $CMD

    fi


    return;

}

ResetAnalysis() {

    if [ $# -ne 2 ]; then
        echo "Usage: ResetAnalysis <password> <LOGIC_NAME>"
        return
    fi

    PASS=$1
    shift

    LOGIC_NAME=$1
    shift

    echo "Resetting retry_count for logic_name $LOGIC_NAME to 0"

    echo "update job, analysis set retry_count=0 where analysis.analysis_id=job.analysis_id and logic_name = \"${LOGIC_NAME}\"" \
        | mysqlw -p$PASS $PDBNAME;

}

Monitor(){

    monitor -dbhost $EFG_HOST -dbport $EFG_PORT -dbuser $EFG_READ_USER -dbname $PDBNAME -current -finished

}

GetFailedJobs(){
	query="select job.job_id, stderr_file, exec_host from job, job_status where job.job_id = job_status.job_id and is_current = 'y' and job_status.status like \"%FAIL%\";"
	echo $query | mysqlro $PDBNAME
	
}

GetRunningJobs(){
	query="select job.job_id, stderr_file, exec_host from job, job_status where job.job_id = job_status.job_id and is_current = 'y' and job_status.status like \"%RUN%\";"
	echo $query | mysqlro $PDBNAME
	
}
