#!/bin/bash
#=======================================================================
# * Project: MediaTex
# * Module : scripts
# *
# * This script manage apache2 access log extraction
#
# MediaTex is an Electronic Records Management System
# Copyright (C) 2014 2015 2016 Nicolas Roche
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#=======================================================================
#set -x
set -e

BINDIR=/usr/bin
CACHEDIR=/var/cache/mediatex
MDTX=${MDTX-mdtx}
GITCLT=$CACHEDIR/$MDTX/git

# backup apache access logs
for COLL in $($BINDIR/mediatex -c $MDTX ls coll); do
    DO_LOG=$(grep -i logApache $GITCLT/$MDTX-$COLL/servers.txt | \
		    awk '{ print $2 }')
    case $DO_LOG in
	yes|Yes|YES|y|Y|true|True|TRUE|t|T)
	    DATE=$(/bin/date +'%Y%m%d')
	    HOST=$(/bin/hostname -f)
	    NAME=apache_${DATE}_${HOST}
	    
	    /bin/grep "$MDTX-$COLL" /var/log/apache2/access.log \
		      >/tmp/$NAME.log || /bin/true
	    SIGN1=$(/usr/bin/md5sum /tmp/$NAME.log | cut -d' ' -f 1)
	    SIZE1=$(ls /tmp/$NAME.log -l | cut -d' ' -f 5)
	    
	    # cannot archive empty file
	    [ $SIZE1 -gt 0 ] || continue 
	    
	    /bin/gzip -f /tmp/$NAME.log
	    SIGN2=$(/usr/bin/md5sum /tmp/$NAME.log.gz | cut -d' ' -f 1)
	    SIZE2=$(/bin/ls /tmp/$NAME.log.gz -l | cut -d' ' -f 5)
	    
	    /bin/cat >/tmp/$NAME.cat <<EOF
Top Category "~mediatex"
Category "access": "~mediatex"
Document "$NAME": "access"
 "date" = "$(/bin/date)"
 "extracted from" = "$HOST"
 $SIGN1:$SIZE1
 $SIGN2:$SIZE2

Archive $SIGN1:$SIZE1
 "format" = "text"
Archive $SIGN2:$SIZE2
 "format" = "gz"
EOF

	    TARGETDIR="mediatex/access"
	    /bin/cat >/tmp/$NAME.ext <<EOF
(GZIP
$SIGN2:$SIZE2
=>
$SIGN1:$SIZE1 $TARGETDIR/$NAME.log
)
EOF
	    
	    $BINDIR/mediatex -c $MDTX upload++ \
			     file /tmp/$NAME.log.gz as mediatex/backups/ \
			     catalog /tmp/$NAME.cat rules /tmp/$NAME.ext \
			     to coll $COLL
	    
	    /bin/rm -f /tmp/$NAME.log.gz /tmp/$NAME.cat /tmp/$NAME.ext
	    ;;
	esac
done
