#!/bin/bash
set -e
			usage () {
			cat <<END
usage:
  expire-iso8601 [<options>] <number>x<interval> [<number>x<interval> ...]
options:
   -u<unitlen>  <interval> is measured in units of <unitlen> seconds
                   (default is 86400, so <interval> is in days)
   -s<slop>     allow kept items to be <slop> seconds shorter apart than
                   specified; default is 10% of <unitlen>
   -n           do not really delete
   -r           recursive removal (rm -r)
example:
   /home/ian/junk/expire-iso8601 14x1 4x7
      uses units of 86400s (1 day) with a slop of 8640
      it keeps 14 daily items
       (that is 14 items, dated no less than 86400-8640 apart)
      and 7 weekly items
       (that is 7 items, dated no less than 7*86400-8640 apart)
      the 14 daily and 7 weekly items may be the same, or not
   There is no need to sort the list of <number>x<interval> pairs.
exit status:
   0                   ok
   4                   rm failed
   8                   bad usage
   16                  catastrophic failure
END
			}

# Copyright 2006 Ian Jackson <ian@chiark.greenend.org.uk>
#
# This script and its documentation (if any) are free software; you
# can redistribute it and/or modify them under the terms of the GNU
# General Public License as published by the Free Software Foundation;
# either version 3, or (at your option) any later version.
# 
# chiark-named-conf and its manpage are distributed in the hope that
# it will be useful, but WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, consult the Free Software Foundation's
# website at www.fsf.org, or the GNU Project website at www.gnu.org.


trap 'exit 16' 0
badusage () { echo >&2 "bad usage: $*"; usage >&2; trap '' 0; exit 8; }

#-------------------- argument parsing --------------------

alldigits () {
	[ "x${2##*[^0-9]}" = "x$2" ] || \
		badusage "bad $1 \`$2'; must be all digits"
	[ "$2" ] || badusage "bad $2; must be nonempty"
	eval $1='$2'
}

rm=rm
recurse=''
unit=86400
slop=''

while [ $# -ge 1 ]; do
	arg=$1; shift
	case "$arg" in
	--|-)	break ;;
	--help)	usage; exit 0 ;;
	--*)	badusage "unknown option $arg" ;;
	-*)
		val=${arg#-?}
		case "$arg" in
		-n*)	rm=: ;;
		-r*)	recurse=-r ;;
		-u*)	alldigits unit "$val"; arg='' ;;
		-s*)	alldigits slop "$val"; arg='' ;;
		*)	badusage "unknown option ${1:0:2}" ;;
		esac
		arg=-${arg#-?}
		if test "x$arg" != x-; then set -- "$arg" "$@"; fi
		;;
	*)	set "$arg" "$@"; break ;;
	esac
done

[ $# -ge 1 ] || badusage 'too few arguments'
[ "$slop" ] || slop=$(( $unit / 10 ))

for ni in "$@"; do
	case "$ni" in *x*);; *) badusage "bad <number>x<interval> $ni";; esac
	alldigits number "${ni%%x*}"
	alldigits interval "${ni#*x}"
done

#-------------------- scanning the directory ----------

# We build in $l a list of the relevant filenames and the time_t's
# they represent.
#
# Each entry in $l is $time_t/$filename, and the list is
# newline-separated for the benefit of sort(1).

ls=0
for cn in [0-9]*; do
	case "$cn" in
	????-??-??)
		conv="$cn";;
	????-??-??T[0-2][0-9]+[0-9][0-9][0-9][0-9]|\
	????-??-??T[0-2][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9]|\
	????-??-??T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]+[0-9][0-9][0-9][0-9])
		conv="${cn%T*} ${cn#*T}";;
	*)
		echo >&2 "ignoring $cn"
		continue;;
	esac
	cs=$(date -d "$conv" +%s)
	l="$cs/$cn
$l"
done

#-------------------- main computation --------------------

# We process each minimum/extent pair, to have it select a bunch of
# versions to keep.  We annotate entries in $l: if we are keeping
# an entry we prepend a colon; temporarily, if we are keeping an entry
# because of this particular minimum/extent, we prepend a comma.

# For each minimum/extent pair we look at the list from most recent
# to least recent,
#   ie in order of increasing age
#   ie in order of decreasing time_t
# and each time we're more than min older than the last item we kept,
# we mark the item to keep, until we have as many as we want.
#
# We build the new list (space-separated) in lnew.

l=$(sort -nr <<END
$l
END
)

for ni in "$@"; do
	wantcount=${ni%x*}

	div=1

	while true; do
		min=$(( (${ni#*x} * $unit) / $div - $slop ))

		ls=''
		lnew=''
		skipped=0
		for ce in $l; do
			cn=${ce#*/}; cl=${ce%%/*}
			cs=${cl#,}; cs=${cs#:}
			case $cl in ,*)	ls=$cs; continue;; esac
			if [ $wantcount != 0 ]; then
				if ! [ "$ls" ] || \
				   [ $(( $ls - $cs )) -ge $min ]; then
					echo "keep (for $ni) $cn"
					ce=,$ce
					ls=$cs
					wantcount=$(( $wantcount - 1 ))
				else
					skipped=$(( $skipped+1 ))
				fi
			fi
			lnew="$lnew $ce"
		done
		l=$lnew

		if [ $wantcount = 0 ]; then break; fi
		printf "%s" "insufficient (for $ni) by $wantcount"
		if [ $skipped = 0 ]; then echo; break; fi
		div=$(( $div * 2 ))
		echo " shortening interval ${div}x"
	done

	# s/([,:]+).*/:\1/g
	lnew=''
	for ce in $l; do
		case $ce in ,*) ce=:${ce#,};; esac
		case $ce in ::*) ce=${ce#:};; esac
		lnew="$lnew $ce"
	done
	l=$lnew
done

#-------------------- execution --------------------

trap '' 0
exitstatus=0

nonbroken_echo () { (echo "$@"); }
# While we have subprocesses, we have to avoid bash calling write(1,...)
# because of a bug in bash (Debian #382798), so we arrange for a subshell
# for each echo.

jobs=''
for ce in $l; do
	case $ce in
	:*);;
	*)
		cn=${ce#*/}
		nonbroken_echo "expire $cn"
		$rm $recurse -- $cn &
		jobs="$jobs $!"
		;;
	esac
done

if [ "$jobs" ]; then
	nonbroken_echo "all running"
fi

for job in $jobs; do
	wait $job || exitstatus=4
done

if [ $exitstatus = 0 ]; then
	echo "complete"
else
	echo "complete, but problems deleting"
fi

exit $exitstatus
