git_cleaner.py:递归对所有仓库运行 git gc
这个脚本 git_cleaner.py 会递归搜索指定目录(默认为当前目录)下的 Git 仓库,并对每个仓库执行 git gc。它会在运行 git gc 前后分别记录 .git 目录的大小和文件数量,让你清楚地看到回收了多少空间、删除了多少文件。脚本最后还会输出所有已处理仓库的汇总报告。
git_cleaner.py
#!/usr/bin/env python3
import os
import subprocess
import argparse
import sys
def get_dir_stats(path):
"""返回 (总字节数, 文件总数) 元组。"""
total_size = 0
file_count = 0
for dirpath, _, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
file_count += 1
return total_size, file_count
def format_size(bytes_size):
"""将字节数转换为易读的字符串。"""
for unit in ['B', 'KB', 'MB', 'GB']:
if abs(bytes_size) < 1024:
return f"{bytes_size:.2f} {unit}"
bytes_size /= 1024
return f"{bytes_size:.2f} TB"
def main():
parser = argparse.ArgumentParser(
description="递归运行 'git gc',并报告节省的空间和文件数。"
)
parser.add_argument(
"target_dir",
nargs="?",
default=".",
help="要搜索的目录(默认:'.')"
)
args, unknown_args = parser.parse_known_args()
target_path = os.path.abspath(args.target_dir)
if not os.path.isdir(target_path):
print(f"错误:{target_path} 不是目录。")
sys.exit(1)
total_saved_bytes = 0
total_files_removed = 0
repos_processed = 0
print(f"--- 正在扫描:{target_path} ---")
for root, dirs, _ in os.walk(target_path):
if ".git" in dirs:
repos_processed += 1
git_dir = os.path.join(root, ".git")
# 1. 记录优化前的统计信息
size_before, files_before = get_dir_stats(git_dir)
print(f"\n[{repos_processed}] 正在优化:{root}")
try:
# 2. 运行 git gc
# 注意:我们使用 -C 让 git 在指定目录中运行
cmd = ["git", "-C", root, "gc"] + unknown_args
subprocess.run(cmd, check=True, capture_output=True)
# 3. 记录优化后的统计信息
size_after, files_after = get_dir_stats(git_dir)
saved_size = size_before - size_after
removed_files = files_before - files_after
total_saved_bytes += max(0, saved_size)
total_files_removed += max(0, removed_files)
print(f" 文件:{files_before} -> {files_after}(删除 {removed_files} 个)")
print(f" 大小:{format_size(size_before)} -> {format_size(size_after)}(节省 {format_size(saved_size)})")
except subprocess.CalledProcessError as e:
print(f" [!] 错误:{e.stderr.decode().strip()}")
# 出于效率考虑:不要继续遍历刚刚处理过的 .git 目录
dirs.remove(".git")
# 最终汇总表
print("\n" + "="*45)
print(f"{'最终汇总':^45}")
print("-" * 45)
print(f" 已处理仓库数 : {repos_processed}")
print(f" 总回收空间 : {format_size(total_saved_bytes)}")
print(f" 总删除文件数 : {total_files_removed}")
print("="*45)
if __name__ == "__main__":
main()示例输出
git_cleaner_output.txt
[...]
[433] Optimizing: /home/uli/dev/FlareDNS
Files: 73 -> 33 (40 removed)
Size: 70.14 KB -> 47.03 KB (23.10 KB saved)
=============================================
FINAL SUMMARY
---------------------------------------------
Repositories Processed : 433
Total Space Reclaimed : 238.14 MB
Total Files Removed : 21612
=============================================If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow