#! /bin/sh

#
# fshash                                                        (jh,30.04.2011)
#

#
#   fshash: checks a directory of hashsums against a directory of files
#   Copyright (C) 2010, 2011  Jochen Hepp <jochen.hepp@gmx.de>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#


script="${0##*/}"
version='0.0.2'
: ${HASH:=md5sum}


#
# --- usage ---
#

usage () {
	HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
	HASHROOT="${HASHROOT%/}/"
	cat <<EOF
Usage: $script: OPTIONS COMMAND FILES ...

Options:
       --hash=HASH       use HASH, default is $HASH
       --hashroot=DIR    search hashes of files in DIR, default is $HASHROOT
       --dataroot=DIR    search files in DIR, default is $DATAROOT
       -V  --version     display version number
       -h  --help        display this help and exit

Command:
       generate          compute hash file from data files
       add               add new files to the corresponding hash files
       check             read hash files and check them with data files
       lost              a hash exists but the data file is lost
       lostquick         a hash exists but the data file or directory is lost
       found             no hash exists but a data file is found
       foundquick        no hash exists but a data file or directory is found
       lostfound
       foundlost         print lost and found files
       lostfoundquick
       foundlostquick    print lost and found files or directories
       hashsum           print stored hashsum(s) in hashroot of data file(s)
       mkdir             create corresponding directories in data- and hashroot
       rmdir             remove corresponding directories in data- and hashroot
       mv                move corresponding entries in data- and hashroot
       rm                remove corresponding entries in data- and hashroot
EOF
}


#
# --- version ---
#

print_version () {
	cat <<-EOF
		$script $version

		Copyright (C) 2011 Jochen Hepp
		This program is distributed in the hope that it will be useful,
		but WITHOUT ANY WARRANTY; without even the implied warranty of
		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
		GNU General Public License for more details.

		Written by Jochen Hepp <jochen.hepp@gmx.de>.
EOF
}


#
# --- check root DATAROOT ---
#

check_dataroot () {
	if [ ! -d "$DATAROOT" ]; then
		echo "$script: ${DATAROOT%/}: dataroot directory no found" >&2
		exit 1
	fi
}


#
# --- check root HASHROOT ---
#

check_hashroot () {
	if [ "$HASHROOT" != "$DATAROOT" -a \
	     "$HASHROOT${DATAROOT#$HASHROOT}" = "$DATAROOT" ]; then
		echo "$script: dataroot is a subdirectory of hashroot" >&2
		exit 1
	fi
	if [ ! -d "$HASHROOT" ]; then
		echo "$script: ${HASHROOT%/}: hashroot directory no found" >&2
		exit 1
	fi
}


#
# --- check HASHROOT and DATAROOT ---
#

check_roots () {
	check_dataroot
	check_hashroot
}


#
# --- check filedir ---
#

check_filedir () { # dir
	local dir="$1"

	if [ ! -e "$dir" ]; then
		echo "$script: ${dir:-.}: no file or directory found" >&2
		exit 1
	fi
}


#
# --- generate a hash for a file or subdirectory ---
#

generate () { # file/directory
	local filedir="$1/"
	local hashdir
	local hashbasedir
	local datadir
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	hashbasedir="${hashdir%/*}"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ ! -d "$hashbasedir" ]; then
		mkdir -p "$hashbasedir"
	elif [ -d "$hashdir" ]; then
		echo "$script: ${filedir:-.}: hash in subdirectory exists" >&2
		exit 1
	fi

	# directory recursive
	if [ -d "$datadir" ]; then
		cd "$datadir" && \
		find . -path "./$HASHNAME" -prune -o -type f -print0 | \
		xargs -0 --no-run-if-empty $HASH > "$hashdir"
	# single file
	elif [ -f "$datadir" ]; then
		cat "$datadir" | \
		$HASH > "$hashdir"
	fi
}


#
# --- add a hash for a file or subdirectory and print added files ---
#

add () { # file/directory
	local filedir="$1/"
	local hashdir
	local hashbasedir
	local datadir
	local subdir
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	hashbasedir="$hashdir"
	while [ ! -e "$hashbasedir" ]; do
		hashdir="$hashbasedir"
		hashbasedir="${hashbasedir%/*}"
	done

	# directory recursive
	if [ -d "$hashbasedir" ]; then
		subdir="${hashdir#$HASHROOT}"
		generate "$subdir"
		sedpat="$(echo "$subdir" | sed 's%/%\\/%g')"
		sed "s%^[^ ]*  %%; s%./%%; s/^/$sedpat\//" "$hashdir"
	# single file with name
	elif [ -f "$hashbasedir" ]; then
		subdir="${hashbasedir#$HASHROOT}"
		cd "$DATAROOT$subdir" && \
		$HASH "./${filedir#$subdir/}" >> "$hashbasedir"
		echo "$filedir"
	fi
}


#
# --- search a hash for a file or subdirectory ---
#

search () { # file/directory
	local filedir="$1/"
	local hashdir
	local hashbasedir
	local datadir
	local sedpat1
	local sedpat2
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	hashbasedir="$hashdir"
	while [ ! -e "$hashbasedir" ]; do
		hashbasedir="${hashbasedir%/*}"
	done

	# directory recursive
	if [ "$hashbasedir" = "$hashdir" ]; then
		cd "$HASHROOT" && \
		find "${filedir:-.}" -type f -print0 | \
		xargs -0 --no-run-if-empty -I {} \
		sed -e '/  -$/ { s%  -$%  {}%; s%  ./%  %; b }' \
		    -e 's%  ./%  %; s%  %  {}/%; s%  ./%  %' "{}"
	# single file
	else
		if [ ! -f "$hashbasedir" ]; then
			echo "$script: $filedir: no hash found" >&2
			exit 1
		fi

		filedir="${hashdir#$hashbasedir/}"
		sedpat1="$(echo "$filedir" | sed 's%/%\\/%g')"
		sedpat2="$(echo "${hashbasedir#$HASHROOT}" | sed 's%/%\\/%g')"
		sed -n -e 's%  ./%  %' \
		       -e "/  $sedpat1\(\/\|$\)/{ s/  /  $sedpat2\//; p }" \
		    "$hashbasedir"
	fi
}


#
# --- diff hash and filesystem files ---
#

diffhash () { # mode, filedir, tmpfile
	local mode="$1"
	local filedir="$2/"
	local tmpfile="$3"
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ "$mode" = 'lost' ]; then
		sedpat='s/^-//; t print; d'
	elif [ "$mode" = 'found' ]; then
		sedpat='s/^+//; t print; d'
	else
		sedpat='s/^+/found: /; s/^-/lost: /'
	fi

	search "$datadir" | \
	sed 's/^[^ ]*  //' | \
	sort >"$hashfile"
	cd "$DATAROOT" && \
	find "${filedir:-.}" -path "./$HASHNAME" -prune -o -type f -print | \
	sed 's%^./%%' | \
	sort | \
	diff -U0 "$hashfile" - | \
	sed -n -e '/^\(@@\|--- \|+++ \)/d' \
	       -e "$sedpat" -e ':print p'
}


#
# --- quick ---
#

quick () { # mode file/directory
	local mode="$1"
	local filedir="$2/"
	local hashdir
	local datadir
	local subdir
	local msg
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ "$mode" != 'found' ]; then
		if [ "$mode" != 'lost' ]; then
			msg='lost: '
		fi
		cd "$hashdir" && \
		find . -type f | \
		while read file; do
			if [ ! -e "$datadir/$file" ]; then
				echo "$msg$filedir${filedir:+/}${file#./}"
			fi
		done
	fi

	if [ "$mode" != 'lost' ]; then
		if [ "$mode" != 'found' ]; then
			msg='found: '
		fi
		subdir="${datadir#$DATAROOT}"
		sedpat="$(echo "${subdir:-.}/" | sed 's%/%\\/%g')"
		cd "$datadir" && \
		find . -mindepth 1 -path "./$HASHNAME" -prune -o \
		     \( -exec test -f "$hashdir/"{} \; -prune -o \
		     \( -type f -printf "%h\n" \) \) | \
		sort | \
		uniq | \
		sed -e "s/$/\//; s%^\./%%; s/^/$sedpat/; s%^\./%%; s%/$%%; s%^$%.%" \
		    -e "s/^/$msg/"
	fi
}


#
# --- file_name (absolute file name) ---
#

file_name () { # filename
	local file="$pwd/$1"

	# absolute filename?
	case "$1" in
		/*) file="$1" ;;
	esac
	echo "$file" | \
	sed 's%^\./%%; s%/\./%/%g;
	     :up /\/[^/]\+\/\.\.\(\/\|$\)/ { s%/[^/]\+/\.\.\(/\|$\)%\1%g; b up; };
	     s%^\(\.\./\)\+%/%'
}


#
# --- create temporary file ---
#

tmpfile () {
	local file="$(mktemp)"
	if [ ! -f "$file" ]; then
		echo "$script: unable to create temporary file" >&2
		exit 1
	fi
	echo "$file"
}


#
# --- main ---
#

HASHNAME="$(echo $HASH | tr 'a-z' 'A-Z')"
DATAROOT="${DATAROOT:-/}"
DATAROOT="${DATAROOT%/}/"
pwd="$PWD"
cmd=

while [ $# -gt 0 ]; do
	cmd="$1"
	shift

	case "$cmd" in
		-h|--help)
			usage
			exit 0
			;;
		-V|--version)
			print_version
			exit 0
			;;
		--hash=*)
			HASH="${cmd#--hash=}"
			HASH="${HASH:-md5sum}"
			HASHNAME="$(echo $HASH | tr 'a-z' 'A-Z')"
			;;
		--dataroot=*)
			DATAROOT="${cmd#--dataroot=}"
			DATAROOT="${DATAROOT:-/}"
			DATAROOT="${DATAROOT%/}/"
			;;
		--hashroot=*)
			HASHROOT="${cmd#--hashroot=}"
			HASHROOT="${HASHROOT%/}/"
			;;
		generate)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_dataroot
			while [ $# -gt 0 ]; do
				generate "$(file_name "$1")"
				shift
			done
			;;
		add)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashfile="$(tmpfile)"
			newfile="$(tmpfile)"
			while [ $# -gt 0 ]; do
				diffhash 'found' "$(file_name "$1")" "$hashfile" | \
				while read file; do
					if ! grep -x -q "$file" "$newfile"; then
						add "$file" >> "$newfile"
					fi
				done
				shift
			done
			rm "$hashfile" "$newfile"
			;;
		hashsum)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			while [ $# -gt 0 ]; do
				cd "$DATAROOT" && \
				search "$(file_name "$1")" | \
				sed 's%  %  ./%'
				shift
			done
			;;
		check)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			while [ $# -gt 0 ]; do
				cd "$DATAROOT" && \
				search "$(file_name "$1")" | \
				$HASH -c --quiet -
				shift
			done
			;;
		lost|found|lost+found|lostfound|foundlost)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashfile="$(tmpfile)"
			while [ $# -gt 0 ]; do
				diffhash "$cmd" "$(file_name "$1")" "$hashfile"
				shift
			done
			rm "$hashfile"
			;;
		lostquick|foundquick|lostquick+foundquick|lostfoundquick|foundlostquick)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			cmd="${cmd%quick}"
			while [ $# -gt 0 ]; do
				quick "$cmd" "$(file_name "$1")"
				shift
			done
			;;
		mkdir|rmdir|mv|rm)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashdir="$pwd/"
			hashdir="${hashdir#$DATAROOT}"
			hashdir="$HASHROOT${hashdir#/}"
			"$cmd" "$@" && \
			cd "$hashdir" && \
			"$cmd" "$@"
			shift "$#"
			;;
		--*)
			echo "$script: unrecognized option \`$cmd'" >&2
			echo "$script: Try \`$script --help' for more information." >&2
			exit 1
			;;
		*)
			echo "$script: unrecognized command \`$cmd'" >&2
			echo "$script: Try \`$script --help' for more information." >&2
			exit 1
			;;
	esac
done

if [ $# -eq 0 ]; then
	case "$cmd" in
		--hash=*|--dataroot=*|--hashroot=*)
			usage >&2
			exit 1
			;;
		*)
			if [ -z "$cmd" ]; then
				usage >&2
				exit 1
			fi
			;;
	esac
fi

# --- end ---

