#!/bin/bash
set -eo pipefail

echoerr() {
  # write to stderr
  cat <<< "$@" 1>&2;
}

usage() {
  echo " lostfiles 5.0"
  echo " Usage: $0 [-s] [-z] [-h]"
  echo "      Calling without an option runs in relaxed mode sorting by name"
  echo "  -h  display this help"
  echo "  -s  optionally define strict mode"
  echo "  -z  optionally sort results by size"
  exit 0
}

# setup defaults which user can override with switches
postprocess="sort"
make_filter="make_relaxed_filter"
# post-filter applied to the candidate list; relaxed mode overrides this
postfilter="cat"

while getopts 'hsz' OPTION; do
  case "$OPTION" in
    z)
      postprocess="sort_by_size"
      ;;
    s)
      make_filter="make_strict_filter"
      ;;
    h)
      usage
      ;;
    *)
      usage
      ;;
  esac
done
shift $((OPTIND -1))

if [ $UID != "0" ]; then
  echoerr "You must run this script as root."
  exit 1
fi

# Sorts a list of file names by size
sort_by_size() {
  tr '\n' '\0' | xargs -0 -n1 du -s | sort -rn -k1
}

# reads a list of paths from a configuration file at the specified path
# and adds them to the global include/exclude list variables
read_config() {
  if [ -f "$1" ]; then
    if grep -q '^[^# +-]' "$1"; then
      echoerr 'Invalid configuration file.'
      echoerr 'All lines in '"$1"' must start with #, + or -.'
      exit 1
    fi

    readarray -t include_list_from_file < <(grep '^+' "$1" | cut -c 2-)
    include_list=("${include_list[@]}" "${include_list_from_file[@]}")

    readarray -t exclude_list_from_file < <(grep '^-' "$1" | cut -c 2-)
    exclude_list=("${exclude_list[@]}" "${exclude_list_from_file[@]}")
  fi
}

# Removes symbolic links under /etc/systemd from the candidate list (read on
# stdin). `systemctl enable` creates these and they are not owned by any
# package. This cannot be expressed as an fd --exclude glob because it is
# type-based, so gather the links once (a tiny subtree) and filter them out
# with grep -- testing every candidate line in bash is orders of magnitude
# slower. An empty pattern file simply keeps every line.
drop_systemd_symlinks() {
  LC_ALL=C grep -vxF -f <(
    fd --absolute-path --hidden --no-ignore --no-global-ignore-file \
      --type symlink '' /etc/systemd 2>/dev/null | sed -e 's|/$||'
  ) - || [ "$?" = 1 ]   # grep exit 1 == produced no output, not an error
}

# Reads the configuration files and builds the relaxed include/exclude lists.
# relaxed mode is more forgiving about hits, and excludes files generated by various apps.
make_relaxed_filter() {
  local_conf="$(dirname "$0")/lostfiles.conf"

  if [ -f "$local_conf" ]; then
    read_config "$local_conf"
  else
    read_config "/etc/lostfiles.conf"

    # read drop-in configuration overrides from /etc/lostfiles.d/*.conf
    if [ -d /etc/lostfiles.d ]; then
      for f in /etc/lostfiles.d/*.conf; do
        [ -f "$f" ] && read_config "$f"
      done
    fi
  fi

  # exclude the in-use module directory for each installed kernel; these hold
  # generated files (modules.dep, dkms builds) that are not owned by a package
  for kernel in /boot/vmlinuz-linux*; do
    [ -e "$kernel" ] || continue
    version=$(file "$kernel" | cut -d ' ' -f 9)
    [ -n "$version" ] && exclude_list+=("/usr/lib/modules/$version")
  done

  search_paths=("${include_list[@]}")

  # drop symbolic links under /etc/systemd (see function above)
  postfilter="drop_systemd_symlinks"
}

# Do not exclude anything in strict mode, just add the default Arch paths
make_strict_filter() {
  search_paths=(/boot /efi /etc /opt /srv /usr /var)
}

$make_filter

# Lists every file under the configured search paths, honoring the excludes.
#
# fd matches --exclude globs gitignore-style, i.e. relative to the search path
# rather than as absolute paths, so we run fd once per search root and anchor
# each relevant exclude to that root by stripping its prefix (keeping the
# leading slash). An exclude equal to a whole root drops that root entirely.
list_files() {
  local root e rel skip_root
  for root in "${search_paths[@]}"; do
    root="${root%/}"   # tolerate a trailing slash on the configured root
    [ -e "$root" ] || continue
    fd_excludes=()
    skip_root=
    for e in "${exclude_list[@]}"; do
      if [ "$e" = "$root" ]; then
        skip_root=1
        break
      fi
      case "$e" in
        "$root"/*)
          rel="${e#"$root"}"   # strip root prefix, keep leading slash to anchor
          fd_excludes+=(--exclude "$rel")
          ;;
      esac
    done
    [ -n "$skip_root" ] && continue
    fd --absolute-path --hidden --no-ignore --no-global-ignore-file \
      "${fd_excludes[@]}" '' "$root" 2>/dev/null || :
  done
}

# fd appends a trailing slash to directories; strip it so the list matches the
# slash-normalized output of `pacman -Qlq` below.
LC_ALL=C comm -13 \
  <(LC_ALL=C pacman -Qlq | sed -e 's|/$||' | LC_ALL=C sort -u) \
  <(list_files | sed -e 's|/$||' | $postfilter | LC_ALL=C sort -u) | $postprocess

# vim:set ts=2 sw=2 et:
