#!/bin/bash
#  file - file and directory support library

set -e

lib_load 'core/qsort'


######
# Settings

core_file_config_init() {
	# file_find_opts[] - Passed to find(1) when called by file_find.
	lib_setting_arrays file_find_opts

	lib_setting_vars file_find_sort
	file_find_sort=false

	# file_hashbin - Name of default hashing program
	lib_setting_vars file_hashbin
	file_hashbin=sha1sum

	# file_hash result arrays
	lib_setting_arrays \
		file_hash_texts file_hash_names \
		file_hash_sizes file_hash_modes
}


######
# Directory helpers

run_mkdir() {
	local opts
	! $debug || opts=-v
	run mkdir -p $opts "$@" >&2
}

run_pushd() { run pushd "$1" >/dev/null || error "$1: pushd failed"; }
run_popd() { run popd >/dev/null; }

short_path() {
	local name=$1
	local dir dname prefix
	dir=$(dirname "$name")
	dir=$(realpath "$dir")
	if [ "$dir" != '.' ]; then
		dir=$(basename "$dir")
		prefix=".../"
	fi
	name=$(basename "$name")
	echo "$prefix$dir/$name"
}


# file_mkdir - Creates directory portion of file name ($1).
file_mkdir() {
	has_args 1 "$@"
	local f=$1
	local dir
	dir=$(dirname "$f")
	[ -d "$dir" ] || run_mkdir "$dir"
}


######
# Search

# file_find - Fills an array with names of things found by find(1).
file_find() {
	min_args 2 "$@"
	local var=$1
	shift

	local fftmp
	fftmp=$(cmd_tempfile)
	find "$@" "${file_find_opts[@]}" -print >"$fftmp"

	local -a __files
	readarray -t $var <"$fftmp"

	! $file_find_sort || qsort_list $var
}


######
# Checking

file_size() { stat -c'%s' "$@"; }

file_test() {
	min_args 2 "$@"

	local tst=$1
	shift

	local i
	for i in "$@"; do
		test $tst "$i" || error "$i: not found"
	done
}

file_is_terminal() {
	has_args 1 "$@"
	local fd=$1
	case "$1" in
	(stdin) fd=0 ;;
	(stdout) fd=1 ;;
	(stderr) fd=2 ;;
	esac
	[ -t $fd ]
}


######
# Hashing

file_hash_raw() { min_args 1 "$@"; run $file_hashbin "$@"; }

file_hash_list() {
	local var=$1
	shift

	local tmp
	tmp=$(cmd_tempfile)
	file_hash_raw "$@" >"$tmp"

	readarray -t $var <"$tmp"
}

file_hash() {
	min_args 1 "$@"
	local -a lines
	file_hash_list lines "$@"
	for_each _file_hash_split "${lines[@]}"
}
_file_hash_split() {
	local line=$1

	local text=${line%% *}
	local name=${line#* }
	local mode=${name:0:1}

	# remove mode from name
	local len=$((${#name} - 1))
	name=${name:1:$len}

	local size
	size=$(file_size "$name")

	list_append file_hash_names "$name"
	list_append file_hash_modes "$mode"
	list_append file_hash_texts "$text"
	list_append file_hash_sizes "$size"
}

file_hash_print() {
	local count=$((${#file_hash_names[@]} - 1))
	for_each _file_hash_print_index $(seq 0 $count)
}
_file_hash_print_index() {
	local i=$1
	local text name size mode
	text=${file_hash_texts[$i]}
	name=${file_hash_names[$i]}
	size=${file_hash_sizes[$i]}
	mode=${file_hash_modes[$i]}
	echo "$text $mode$name ($size)"
}


######
# Duplicates

file_duplicate_scan() {
	min_args 1 "$@"

	info "duplicate scan started..."

	local -a fd_files
	local file_find_opts=( -type f )
	file_find fd_files "$@"

	info "hashing ${#fd_files[@]} files..."

	file_hash "${fd_files[@]}"
	! $pretend || error "file duplicate detection"

	local -A fd_hashmap
	local -A fd_dups
	for_each _file_duplicate_scan_index $(seq 0 $((${#fd_files[@]} - 1)))

	info "printing list of duplicates..."

	local -a dups=( "${fd_dups[@]}" )
	list_unique dups
	qsort_list dups

	for_each _file_duplicate_scan_check "${dups[@]}"

	info "duplicate scan... done."
}

_file_duplicate_scan_index() {
	local i=$1
	local text name
	text=${file_hash_texts[$i]}
	name=${file_hash_names[$i]}

	if [ "${fd_hashmap[$text]}" ]; then
		fd_dups[$name]=$text
	else
		fd_hashmap[$text]=$name
	fi
}

_file_duplicate_scan_check() {
	local dup_text=$1
	local dup_name=${fd_hashmap[$dup_text]}
	echo "-$dup_name"
	for i in $(seq 0 $((${#fd_files[@]} - 1))); do
		local text=${file_hash_texts[$i]}
		[[ "$text" == "$dup_text" ]] || continue
		local name=${file_hash_names[$i]}
		[[ "$name" != "$dup_name" ]] || continue
		echo "+$name"
	done
}

file_duplicate_scan_list() {
	local var=$1
	shift

	local tmp
	tmp=$(cmd_tempfile)
	file_duplicate_scan "$@" >"$tmp"

	readarray -t $var <"$tmp"
}

for_each_file_duplicate() {
	local func=$1
	shift

	local item src dst
	for item in "$@"; do
		case "$item" in
		(-*) src=${item:1} ;;
		(+*) dst=${item:1}; $func "$src" "$dst" ;;
		(*) error "$item: unknown duplicate item" ;;
		esac
	done
}

file_duplicate_symlink() {
	local file_duplicate_link_type=soft
	file_duplicate_link "$@"
}
file_duplicate_link() {
	local confirm_func=$1
	shift

	local -a dups
	file_duplicate_scan_list dups "$@"

	local file_duplicate_link_type=${file_duplicate_link_type:-hard}
	case "$file_duplicate_link_type" in
	hard) ;;
	soft) file_duplicate_link_opt="-s" ;;
	*) error "$file_duplicate_link_type: unknown link type" ;;
	esac

	$confirm_func "${dups[@]}" || error "$__FUNCNAME__: aborted"

	for_each_file_duplicate _file_duplicate_link_item "${dups[@]}"
}

_file_duplicate_link_item() {
	local src=$1
	local dst=$2
	info "linking: '$src' -> '$dst'"
	run rm "$dst"
	run ln ${file_duplicate_link_opt} "$src" "$dst"
}

file_duplicate_delete() {
	local confirm_func=$1
	shift

	local -a dups
	file_duplicate_scan_list dups "$@"

	$confirm_func "${dups[@]}" || error "$__FUNCNAME__: aborted"

	for_each_file_duplicate _file_duplicate_delete_item "${dups[@]}"
}

_file_duplicate_delete_item() {
	local dst=$2
	run rm "$dst"
}

View the Script Reference Index


Generated on Tue Apr 25 21:20:06 PDT 2017 by mcsh i7 v0.18.0.