#!/bin/ksh -u
# Backup to (NFS) mounted partition
# Author: Alain D D Williams, addw@phcomp.co.uk
# SCCS: @(#)BackupToDisk	1.17 11/23/09 22:07:59
# Copyright (c) the author, 1990 - 2009. This code is released under the GPL (version 2 or version 3), the copyright must be preserved.
# Generate a list of all files on a machine.
# Backs up a set of directories as separate cpio archives into a (NFS mounted) directory, a full
# backup on Mondays, incremental from the Monday on other days of week.
# Mysql will optionally be dumped.
# All backups compressed with bzip2.
# This script assumes that it is running on a Linux system.

# Your crontab entry may look like this:
#	10	5	*	*	*	/usr/local/bin/BackupToDisk -dc

# To see options:
#	BackupToDisk -x

PROGNAME=${0##*/}

# For debugging:
: PID=$$

# Edit the next few lines:

# Put list of directories to be backed up here. Now empty and complain if nothing specified:
DIRS=	# 'home usr/local etc'

# Where we (may) log what we do:
logdir=/var/log/backups/

# Where the backup disk is mounted (must have trailing /):
base=/mnt/backup/
basedaily=$base/daily/
backupdir=$base

# Expect to see a file $base/MOUNTED, if it is not there - abort.
# You need to create this file before first running the script.


# Defeat SCCS:
yyyymmdd=$( date '+%Y''%m%d' )
hhmm=$( date '+%H''%M' )
dayofweek=$( date +%u )	# Monday is 1
IncrDays=$(( dayofweek - 1 ))	# Days since Monday
SecsInDay=86400

# Defaults:
CleanDays=14		# Backups older than this (days) will be cleaned (removed)
incremental=n		# Do incremental backup
IncFind=		# Incremental option to find
CreateLinks=y		# Create a symlink of LATEST-???? to today's archive
DoFull=n		# Backup everything
DoFind=y		# Generate a list of every file on the machine
DoCopyLog=n		# Copy output to stderr as well as $logdir
mydump=n		# Do mysqldump
clean=n			# Don't perform a clean operation
MysqlOptions=
OutputToFile=n		# Set y once has been redirected. Old stderr is copied to FD 3

# Nothing should need changing below this line:

# Print the message to what used to be stderr AND current stderr and exit.
function Die {
	date=$( date )

	[[ $OutputToFile = y ]] && {
		echo "$PROGNAME: $*" >&3
		echo "ABORTING at: $date" >&3
	}

	echo "$PROGNAME: $*" >&2
	echo "ABORTING at: $date" >&2

	exit 2
}

# Generate a note that will be seen by the user -- even if redirected
function Note {
	[[ $OutputToFile = y ]] && echo "$PROGNAME: $*" >&3
	echo "$PROGNAME: $*" >&2
}

function usage {
	cat <<-!
		Back the system up to disk under $base or $basedaily
		Usage: $PROGNAME [-opts]
		-b dir	Base directory where backups are written, default: $base
		-c	Clean out $basedaily, remove anything more than $CleanDays days old
		-C nn	Set the age for -c option
		-d	Daily backup, ie write to $basedaily, incremental, full on Mondays
		-D dirs	List of directories to backup
		-F	Do NOT (find) generate a list of every file on the machine
		-f	Full backup - regardless of the day of week
		-m	Perform mysqldump, default $mydump
		-M	Don't perform mysqldump
		-o	Copy output to the current stderr as well as something in $logdir
		-p dbs	dumP mysql databases dbs - a space separated list (in quotes)
		-x	eXplain
		--help	help message
		Version: 1.17 11/23/09, latest at: http://www.phcomp.co.uk/Packages/BackupToDisk.html
		!
}

# Parse options, recognise --help
while	[[ $# -ge $OPTIND ]] && eval A1=\$$OPTIND || A1=
	if [[ $A1 = --help ]]
	then	opt=x	# --help is -x
	else	getopts b:cC:dD:FfmMop:x opt
	fi
do	case "$opt" in
	b)	base="$OPTARG"	;;
	c)	clean=y ;;
	C)	CleanDays="$OPTARG" ;;
	d)	incremental=y	;;
	D)	DIRS="$OPTARG"	;;
	f)	DoFull=y ;;
	F)	DoFind=n ;;
	m)	mydump=y ;;
	M)	mydump=n ;;
	o)	DoCopyLog=y ;;
	p)	MysqlOptions="--databases $OPTARG"	;;
	x)	usage
		exit 0;;
	*)	echo "$PROGNAME: Unknown option '$1'" >&2
		usage
		exit 2;;
	esac
done
shift $((OPTIND - 1))

# base may have been changed:
basedaily=$base/daily/
backupdir=$base

[[ $incremental = y ]] && backupdir=$basedaily

[[ $DoFull = y ]] && incremental=n

[[ ! -w / ]] && Die "Not being run as root"

[[ -n $MysqlOptions && $mydump = n ]] && Die "Mysql option given (-p) but mysqldump set 'no'"
[[ $mydump = y && -z $MysqlOptions ]] && MysqlOptions=--all-databases	# Dump all databases by default

[[ -z $DIRS && $mydump = n ]] && Die "Nothing to back up, missing -D option & no mysqldump"

# Fiddle with where the output of this program goes to.
# The point is that under normal operation we want to capture/redirect
# everything to a log file, when run manually we might want to do that
# but still have everything come to the terminal as well.
[[ -d $logdir ]] || mkdir -p $logdir || Die "Can't create logging directory: $logdir"
exec 3>&2	# So that we can use it later
if [[ $DoCopyLog = y ]]
then	tee /proc/$$/fd/3 > $logdir$yyyymmdd-$hhmm |&
	exec 1>&p 2>&1
	OutputToFile=y
else
	# If being run non interactively, send output to a log file:
	[[ ! -t 2 ]] && exec > $logdir$yyyymmdd-$hhmm 2>&1 && OutputToFile=y
fi

echo "Backup starting at $( date )"

# Check that the partition is there.
# If it can't be seen, have another go. This sometimes seems to work with
# an NFS filesystem that is automounted.
if [[ ! -f $base/MOUNTED ]]
then	echo "Try a little harder to see: $base/MOUNTED"
	ls -l $base/MOUNTED
	sleep 2
	[[ -f $base/MOUNTED ]] || Die "It looks as if $base is not mounted (I cannot see $base/MOUNTED)"
fi

# If there are LATEST-????, get what they point at:
cd $backupdir || Die "Can't cd to $backupdir"
[[ -L LATEST-FULL        ]] && lastestfulldir=$(find LATEST-FULL        -printf %l ) || lastestfulldir=
[[ -L LATEST-INCREMENTAL ]] && lastestincdir=$( find LATEST-INCREMENTAL -printf %l ) || lastestincdir=

[[ -n $lastestfulldir ]] &&
	echo "The last successful full backup was on $lastestfulldir" ||
	echo "The date of the last successful full backup is not known"
[[ -n $lastestincdir ]] &&
	echo "The last successful incremental backup was on $lastestincdir" ||
	echo "The date of the last successful incremental backup is not known"

# If incremental, work out since when it was incremental from.
# Set $suff, $SlinkName & $IncFind
if [[ $incremental = y && $dayofweek != 1 ]]
then	# Incremental
	suff=incr
	SlinkName=LATEST-INCREMENTAL

	# Is there a full backup ?
	if [[ -n $lastestfulldir ]]
	then	# We know when the last full backup happened.
		# The point is that if backups have been failing for some time
		# just blindly doing it since the last Monday will not back up
		# a lot that should be backed up.
		FullBackupDay=$( date --date=$lastestfulldir '+%s' )
		Now=$( date '+%s' )
		IncrDays=$(( (Now - FullBackupDay) / SecsInDay ))
	else	# Leave IncrDays as days since Monday
		:
	fi

	IncFind="-mtime -$IncrDays"

	echo "Backup will be incremental, $IncrDays days"

else	# Full backup
	suff=full
	SlinkName=LATEST-FULL
	IncFind=

	echo "Backup will be full - all files"
fi

echo ""

# Throw away old stuff. NB removing from a directory will change the time stamp,
# so get a list of them first.
# NB PWD must be $backupdir
if [[ $clean = y ]]
then	# What date do we clean before ?
	CleanBefore=$( perl -e '$Now = time - 86400 * '$CleanDays';
		my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime $Now;
		printf "%4.4d%2.2d%2.2d\n", $year + 1900, $mon + 1, $mday' )

	echo "Cleaning old backups - Clean older than $CleanDays days, ie before: $CleanBefore"
	[[ -n $lastestincdir  ]] && echo "Protect LATEST-INCREMENTAL=$lastestincdir"
	[[ -n $lastestfulldir ]] && echo "Protect LATEST-FULL=$lastestfulldir"

	echo ""
	for dir in $( ls | grep '^2' )	# Y3K bug here
	do	# Leave if too young:
		[[ $dir -ge $CleanBefore ]] && continue

		# Leave if it is pointed to by LATEST-???? symlink, if backups have been
		# failing for some time this could be quite old:
		[[ $lastestfulldir = $dir ]] && continue
		[[ $lastestincdir  = $dir ]] && continue

		echo "Cleaning old backup: $dir"
		rm -rf "$dir"
	done

	echo "Completed cleanup of old backups at $( date )"
	echo ""
fi

arch=$backupdir$yyyymmdd

# Make the backup directory if it doesn't exist:
[[ -d $arch ]] || mkdir $arch || exit

cd /

Success=y	# Let's be optimistic

# The trouble with the error checks below is that they test that bzip2 worked, not the
# backup command. With ksh93 (real ksh) we can fix this, not pdksh. How to tell them apart:
# ${.sh.version} - ksh93, real ksh
# $KSH_VERSION - pdksh
# Ensure that the pipelines fail if anything in the pipeline fails.
[[ -z ${KSH_VERSION:-} ]] && set -o pipefail

if [[ $DoFind = y ]]
then	echo "Generating list of every file on the machine at $( date )"
	# Getting a list of file systems is not so easy. We want to avoid network mounted file
	# systems (other than where we backup to) as they are slow & nothng to do with us.
	# Also want to avoid things like /proc & /sys.
	# So look at the file system type.
	FileSystems=$( mount | perl -walne 'print $F[2] if($F[4] =~ /^(ext.|jfs|reiserfs|sysv|xfs)$/)' )
	echo "File systems seem to be mounted at: $FileSystems"
	# Don't go into /proc & similar (assume that $base is (nfs) mounted from elsewhere).
	find $FileSystems $base -mount | bzip2 > $arch/AllFiles.bz2 || {
		echo "Error in generating of file names on the machine"
		Success=n
	}
	echo "Done $( date )"
	echo ""
fi

if [[ $mydump = y ]]
then	echo "Taking copy of mysql databases at $( date )"
	mysqldump $MysqlOptions | bzip2 > $arch/MysqlDump.bz2 || {
		echo "Error in dumping mysql database"
		Success=n
	}
	echo "Done $( date )"
	echo ""
fi

# Names below are relative to '/' - ie not start with a '/'.
for Dir in $DIRS
do	echo "Starting $Dir at $( date )"

	name=$( echo $Dir | sed -e 's:/:-:g' )
	[[ $Dir = . ]] && name=RFS

	[[ -f $arch/$name.cpio.$suff.bz2 ]] &&
		echo "$name.cpio.$suff.bz2 already exists, skipping" &&
		continue

	find $Dir -mount $IncFind -print0 | cpio -oBa -H crc --null | bzip2 > $arch/$name.cpio.$suff.bz2 || {
		Note "Error in backup of $Dir"
		Success=n
	}
	echo ""
done

echo "Directory backups completed at $( date )"
echo ""

# Create/maintain the symlink LATEST-FULL and LATEST-INCREMENTAL to the latest *successful*
# full or incremental backup ($SlinkName is the appropriate name).
# If the above backup failed, don't reassign to today's since the next one that works
# will not find the files that it needs to link to - so we will get them recreated
# which takes more execution time and loads of disk to hold the files that we already
# have an unchanged copy of.
if [[ $CreateLinks = y && $Success = y ]]
then	echo "Creating links; $SlinkName -> $yyyymmdd"
	rm -f $backupdir/$SlinkName
	ln -s $yyyymmdd $backupdir/$SlinkName || Success=n
else	Note "Not creating links due to error in backup"
fi
echo ""

echo "Completed backup at $( date )"

# If something went wrong and output is NOT going to stderr (ie on a terminal) - copy it there.
# This is prob being run from cron which will mail the output somewhere suitable.
if [[ $Success = n && $OutputToFile = y ]]
then	( echo ""
	  echo "**** Error in backup ****"
	  echo ""
	  cat $logdir$yyyymmdd-$hhmm 
	) >&3
fi

# end
