#!/bin/bash
Version=1.07
Myname="${0##*/}"

:<<'DOC'
= clzip - remove common files in two zip archives from the first

= Synopsis
clzip [options] cleanedfile[.zip] otherfile[.zip]	

== Options
-h,--help	print this help and exit
-H,--Help	print full documentation via less and exit
-V,--version	print version and exit
-d,--dry	do a dry run, keeping temporary files

= Description
clzip removes files that are common in two zip archives from the
first archive. This may be useful if you make full backups from time to
time: many files that occur in the first backup will also occur in
following backups.

clzip considers two files to be identical if they have the same size and
modification date and time.

= Author
[Wybo Dekker](wybodekker@me.com)

= Copyright
Released under the [GNU General Public License](www.gnu.org/copyleft/gpl.html)
DOC

REd='\e[38;5;9m' Nor='\e[0m'
    die() { local i; for i; do echo -e "$Myname: $REd$i$Nor"; done 1>&2; exit 1; }
helpsrt() { sed -n '/^= Synopsis/,/^= /p' "$0"|sed '1d;$d'; exit; }
helpall() { sed -n "/^:<<'DOC'$/,/^DOC/p" "$0"|sed '1d;$d'|
            less -P"$Myname-${Version/./·} (press h for help, q to quit)";exit; }

:<<'DOC' #----------------------------------------------------------------------
= excheck
synopsis:	 excheck executable1 [executable2...]
description:	check if all needed execs are there,
		getopt is GNU, and zip and unzip are not obsolete.
DOC
#-------------------------------------------------------------------------------
excheck() {
   local ok=true i
   for i; do 
      command -v "$i" > /dev/null && continue
      Warn "Missing executable: $i"
      ok=false
   done
   $ok || die
   getopt -T 
   [[ $? -ne 4 ]] && die "Your getopt is not GNU"
   i=$(unzip -h |awk '{if ($1 == "UnZip") print $2}')
   test "${i:0:1}" -ge 6 || die "Your unzip is too old (version $i)"
   i=$(zip -h |awk '{if ($1 == "Zip") print $2}')
   test "${i:0:1}" -ge 3 || die "Your zip is too old (version $i)"
}

:<<'DOC' #----------------------------------------------------------------------
= handle_options
synopsis:	 handle_options "$@"
description:	Handle the options
globals used:	 Myname Version args dry
DOC
#-------------------------------------------------------------------------------
handle_options() {
   local options
   if ! options=$(getopt \
      -n "$Myname" \
      -o hHVId \
      -l help,Help,version,dry \
      -- "$@"
   ); then exit 1; fi
   eval set -- "$options"
   dry=false  
   while true; do
      case $1 in
      (-h|--help)    # print this help and exit
		     helpsrt
		     ;;
      (-H|--Help)    # print full documentation via less and exit
		     helpall
		     ;;
      (-V|--version) # print version and exit
		     echo $Version
		     exit
		     ;;
      (-d|--dry)     # do a dry run, keeping temporary files
		     dry=true
		     shift
		     ;;
      (-I)           instscript "$0" ||
		        die 'the -I option is for developers only'
		     exit
		     ;;
      (--)           shift
		     break
		     ;;
      (*)	     break
		     ;;
      esac
   done
   args=( "$@" )
}

excheck zip unzip mktemp tac 
handle_options "$@"
set -- "${args[@]}"

test $# -eq 2 || die "I need 2 input files, not $#"

a="${1%.zip}.zip"
b="${2%.zip}.zip"
[[ -e $a ]] || die "$a does not exist"
[[ -e $b ]] || die "$b does not exist"
[[ $a = "$b" ]] && die "zip files $a and $b are the same file!"

tmp=$(mktemp -dt "$Myname.XXXXXXXXXX")
$dry || trap 'rm -rf $tmp' 0 1 2 15

# list both zips:
unzip -l "$a" |tail -n +4|tac|tail -n +3|sort >"$tmp/1"
unzip -l "$b" |tail -n +4|tac|tail -n +3|sort >"$tmp/2"

# make a backup:
$dry || cp -i "$a" "$a~" || die "No permission to write in $PWD"

# files to be deleted:
join -t\| "$tmp/1" "$tmp/2" |
sed -e 's/\s*[0-9]\+  ....-..-.. ..:..   //' >"$tmp/3"

# list of cleaned zip is as long as list of deleted files?
if [ "$(wc -l < "$tmp/1")" -eq "$(wc -l < "$tmp/3")" ]; then 
  echo cleaned zip became empty - deleted \(there is a backup\)
  $dry || rm "$a"
  exit
fi

# doit, saving in temporary directory if it's a dry run:
if $dry; then
   sed 's/^/deleting: /' "$tmp/3"
   die "This was a dry run, nothing changed" "temp files in $tmp"
fi
zip --no-wild -d@ "$a" <"$tmp/3"
zip -o "$a"	# set modification time back
