#!/bin/bash
# file - file and directory support library
set -e
lib_load 'core/qsort'
######
# Library Settings
core_file_config_init() {
# $file_find_opts[] - Passed to find(1) when called by file_find.
lib_setting_arrays file_find_opts
# $file_find_sort - If set to `true`, output from ``file_find()``
# will be sorted.
lib_setting_vars file_find_sort
file_find_sort=false
# $file_hashbin - Name of default hashing program
lib_setting_vars file_hashbin
file_hashbin=sha1sum
# $file_hash_texts[] - Hashing algorithm output (see ``file_hash()``).
# $file_hash_name[] - Hashed file names (see ``file_hash()``).
# $file_hash_sizes[] - Hashed file sizes (see ``file_hash()``).
# $file_hash_modes[] - Hashed file modes (see ``file_hash()``).
lib_setting_arrays \
file_hash_texts file_hash_names \
file_hash_sizes file_hash_modes
}
######
# File I/O
# file_read() - Reads file ($2) into an array variable ($1)
file_read() {
local var=$1
local name=$2
readarray -t $var <"$name"
}
# file_read_func() - Reads file ($1) and calls a command ($@) with
# lines as its arguments.
#
# $1 - File name
# $@ - Command to receive the file contents as arguments
file_read_func() {
local name=$1
shift
local -a lines=()
file_read lines "$name"
"$@" "${lines[@]}"
}
# file_capture_func() - Capture the output of a command ($@) to a
# temporary file, storing the name of the file in a named variable ($1).
#
# $1 - Name of variable to receive the name of the temporary file.
# $2 - Command to execute
# $@ - Command arguments (optional)
file_capture_func() {
min_args 2 "$@"
local -n name=$1
shift
name=$(cmd_tempfile)
[ "$name" ] || error "unable to create temporary file"
"$@" >"$name"
}
# file_capture() - Captures `stdin` to a temporary file, storing
# the name of the file in a named variable ($1).
# $1 - Name of variable to receive the name of the temporary file.
file_capture() {
has_args 1 "$@"
file_capture_func "$1" cat
}
######
# File Variable I/O
# file_var_func() - Captures the output of a command ($@) and
# assigns its output to the given variable ($1).
# $1 - Name of variable to receive output from the command.
# $2 - Command to execute
# $@ - Command arguments (optional)
file_var_func() {
min_args 2 "$@"
local -n var=$1
shift
local -a lines
file_array_func lines "$"@
var=${lines[*]}
}
# file_array_func() - Captures the output of a command ($@) and
# assigns its output to the given array variable ($1).
# $1 - Name of array variable to receive lines from the command.
# $2 - Command to execute
# $@ - Command arguments (optional)
file_array_func() {
min_args 2 "$@"
local var=$1
shift
local str
file_capture_func str "$@"
file_read $var "$str"
}
######
# Directory and Path Functions
# run_mkdir() - Wraps ``mkdir -p``. This function should be used instead
# of the bare command in most cases.
run_mkdir() {
local opts
! $debug || opts=-v
run mkdir -p $opts "$@" >&2
}
# run_pushd() - Wraps ``pushd`` to be silent or report a useful error
run_pushd() { run pushd "$1" >/dev/null || error "$1: pushd failed"; }
# run_popd() - Wraps ``popd`` to be silent
run_popd() { run popd >/dev/null; }
# run_in_dir() - Runs the command ($@) after changing a new directory ($1).
# $1 - New directory
# $@ - Command to run
run_in_dir() {
min_args 2 "$@"
local dir=$1
shift
run_pushd "$dir"
run "$@"
run_popd
}
short_path() {
local name=$1
local dir dname prefix
dir=$(dirname "$name")
dir=$(realpath "$dir")
if [ "$dir" != '.' ]; then
dir=$(basename "$dir")
prefix=".../"
fi
name=$(basename "$name")
echo "$prefix$dir/$name"
}
# file_mkdir() - Creates directory portion of file name ($1).
file_mkdir() {
has_args 1 "$@"
local f=$1
local dir
dir=$(dirname "$f")
[ -d "$dir" ] || run_mkdir "$dir"
}
######
# Finding Files
# file_find - Fills an array with names of things found by ``find(1)``.
# The ``$file_find_opts[]`` variable controls the behavior of this function,
# allowing additional arguments to be passed to ``find``.
# $1 - Name of array variable
# $@ - Path names to search
file_find() {
min_args 2 "$@"
local var=$1
shift
local fftmp
fftmp=$(cmd_tempfile)
find "$@" "${file_find_opts[@]}" -print >"$fftmp"
local -a __files
readarray -t $var <"$fftmp"
! $file_find_sort || qsort_list $var
}
######
# File Checking
# file_size() - Prints the size of the given file(s)
# $@ - File names
file_size() { stat -c'%s' "$@"; }
# file_test() - Performs a existential test ($1) on the given files ($@)
# using the ``test`` shell command.
file_test() {
min_args 2 "$@"
local tst=$1
shift
local i
for i in "$@"; do
test $tst "$i" || error "$i: not found"
done
}
# file_is_terminal() - Returns success if the named fd ($1) is a terminal.
file_is_terminal() {
has_args 1 "$@"
local fd=$1
case "$1" in
(stdin) fd=0 ;;
(stdout) fd=1 ;;
(stderr) fd=2 ;;
esac
[ -t $fd ]
}
######
# File Hashing
# file_hash_raw() - Runs the ``$file_hashbin`` program on the given files ($@).
file_hash_raw() { min_args 1 "$@"; run $file_hashbin "$@"; }
# file_hash_list() - Runs ``file_hash_raw()`` on the given files ($@)
# and reads the results in the named array variable ($1).
file_hash_list() {
local var=$1
shift
local tmp
tmp=$(cmd_tempfile)
file_hash_raw "$@" >"$tmp"
readarray -t $var <"$tmp"
}
# file_hash() - Loads the hash information for the given files ($@) into
# the ``file_hash_*`` arrays
file_hash() {
min_args 1 "$@"
local -a lines
file_hash_list lines "$@"
for_each _file_hash_split "${lines[@]}"
}
# _file_hash_split() - Splits apart the hash results ($1) for a single file
# and appends the various pieces to the respective ``$file_hash_*[]`` arrays.
_file_hash_split() {
local line=$1
local text=${line%% *}
local name=${line#* }
local mode=${name:0:1}
# remove mode from name
local len=$((${#name} - 1))
name=${name:1:$len}
local size
size=$(file_size "$name")
list_append file_hash_names "$name"
list_append file_hash_modes "$mode"
list_append file_hash_texts "$text"
list_append file_hash_sizes "$size"
}
# file_hash_print() - Prints the stored hash information.
# This function exists primarily for verification purposes.
file_hash_print() {
local count=$((${#file_hash_names[@]} - 1))
for_each _file_hash_print_index $(seq 0 $count)
}
# _file_hash_print_index() - Prints stored hash information for a single file.
# $1 - Index into ``$file_hash_*[]`` arrays
_file_hash_print_index() {
local i=$1
local text name size mode
text=${file_hash_texts[$i]}
name=${file_hash_names[$i]}
size=${file_hash_sizes[$i]}
mode=${file_hash_modes[$i]}
echo "$text $mode$name ($size)"
}
######
# Duplicate Detection
# file_duplicate_scan() - Scans the provided paths ($@) for duplicate files.
file_duplicate_scan() {
min_args 1 "$@"
info "duplicate scan started..."
local -a fd_files
local file_find_opts=( -type f )
file_find fd_files "$@"
info "hashing ${#fd_files[@]} files..."
file_hash "${fd_files[@]}"
! $pretend || error "file duplicate detection"
local -A fd_hashmap
local -A fd_dups
for_each _file_duplicate_scan_index $(seq 0 $((${#fd_files[@]} - 1)))
info "printing list of duplicates..."
local -a dups=( "${fd_dups[@]}" )
list_unique dups
qsort_list dups
for_each _file_duplicate_scan_check "${dups[@]}"
info "duplicate scan... done."
}
# _file_duplicate_scan_index() - Checks an individual file for duplicity.
_file_duplicate_scan_index() {
local i=$1
local text name
text=${file_hash_texts[$i]}
name=${file_hash_names[$i]}
if [ "${fd_hashmap[$text]}" ]; then
fd_dups[$name]=$text
else
fd_hashmap[$text]=$name
fi
}
# _file_duplicate_scan_check() - Prints the full list of duplicate files.
# The first of a group of duplicates files is prefixed with ``-``; the
# remaining duplicates in the group are prefixed with ``+``.
_file_duplicate_scan_check() {
local dup_text=$1
local dup_name=${fd_hashmap[$dup_text]}
echo "-$dup_name"
for i in $(seq 0 $((${#fd_files[@]} - 1))); do
local text=${file_hash_texts[$i]}
[[ "$text" == "$dup_text" ]] || continue
local name=${file_hash_names[$i]}
[[ "$name" != "$dup_name" ]] || continue
echo "+$name"
done
}
# file_duplicate_scan_list() - Calls ``file_duplicate_scan "$@"`` and
# stores the results in an array variable ($1).
file_duplicate_scan_list() {
local var=$1
shift
local tmp
tmp=$(cmd_tempfile)
file_duplicate_scan "$@" >"$tmp"
readarray -t $var <"$tmp"
}
# for_each_file_duplicate() - Iterates through a list of duplicates, as
# produced by ``file_duplicate_scan_list()``. For each duplicate, a
# given function ($1) will be called with two arguments: the original and
# duplicate file names.
for_each_file_duplicate() {
local func=$1
shift
local item src dst
for item in "$@"; do
case "$item" in
(-*) src=${item:1} ;;
(+*) dst=${item:1}; $func "$src" "$dst" ;;
(*) error "$item: unknown duplicate item" ;;
esac
done
}
######
# Duplicate Elimination
# file_duplicate_symlink() - Finds all duplicates and eliminates them
# by replacing them with symbolic links.
file_duplicate_symlink() {
local file_duplicate_link_type=soft
file_duplicate_link "$@"
}
# file_duplicate_link() - Finds all duplicates and eliminates them
# by replacing them with links.
file_duplicate_link() {
local confirm_func=$1
shift
local -a dups
file_duplicate_scan_list dups "$@"
local file_duplicate_link_type=${file_duplicate_link_type:-hard}
case "$file_duplicate_link_type" in
hard) ;;
soft) file_duplicate_link_opt="-s" ;;
*) error "$file_duplicate_link_type: unknown link type" ;;
esac
$confirm_func "${dups[@]}" || error "$__FUNCNAME__: aborted"
for_each_file_duplicate _file_duplicate_link_item "${dups[@]}"
}
_file_duplicate_link_item() {
local src=$1
local dst=$2
info "linking: '$src' -> '$dst'"
run rm "$dst"
run ln ${file_duplicate_link_opt} "$src" "$dst"
}
# file_duplicate_delete() - Finds duplicates and deletes them.
file_duplicate_delete() {
local confirm_func=$1
shift
local -a dups
file_duplicate_scan_list dups "$@"
$confirm_func "${dups[@]}" || error "$__FUNCNAME__: aborted"
for_each_file_duplicate _file_duplicate_delete_item "${dups[@]}"
}
_file_duplicate_delete_item() {
local dst=$2
run rm "$dst"
}
Generated on Wed Jun 28 07:39:38 PDT 2017 by mcsh d14 v0.20.0.