#!/bin/bash

# A script to find duplicate files in a directory and its subdirectories
# by comparing their MD5 hashes. It takes the directory path as an argument.

# Check if a directory path is provided as an argument
if [ -z "$1" ]; then
    echo "Usage: $0 <directory_path>"
    exit 1
fi

# Check if the provided path is a valid directory
if [ ! -d "$1" ]; then
    echo "Error: The provided path '$1' is not a valid directory."
    exit 1
fi

echo "Scanning for duplicate files in '$1'..."
echo "This may take a while depending on the number and size of files."

# Use 'find' to get a list of all files, then calculate the MD5 hash for each.
# The 'sort' command groups files with identical hashes together.
# 'uniq -w 32 -d' then filters for hashes that appear more than once,
# which indicates a duplicate.

find "$1" -type f -print0 | xargs -0 md5sum 2>/dev/null | sort | uniq -w 32 --all-repeated=separate
