#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2025 Bradley Dean <bjdean@bjdean.id.au>
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""
Git Commit Message Regenerator

This script analyzes all commits in a git repository, uses Claude to generate
better commit messages based on diffs, and outputs a Python script to apply the changes.

Uses claude_agent_sdk which leverages your local Claude Code session (no API charges).
"""

import asyncio
import subprocess
import json
import sys
import argparse
import os
import shutil
from datetime import datetime
from pathlib import Path
from claude_agent_sdk import query, ClaudeAgentOptions, AssistantMessage, TextBlock

# Global output log file handle
_output_log = None

class TeeOutput:
    """Write to both stdout and a log file."""
    def __init__(self, log_file):
        self.terminal = sys.stdout
        self.log = log_file

    def write(self, message):
        self.terminal.write(message)
        if self.log:
            self.log.write(message)
            self.log.flush()

    def flush(self):
        self.terminal.flush()
        if self.log:
            self.log.flush()

def print_and_log(message='', **kwargs):
    """Print to stdout and log file."""
    print(message, **kwargs)
    # print() already writes to sys.stdout, which is redirected through TeeOutput

def log_error(message):
    """Log error to both stderr and output log file."""
    # Write to stderr
    print(message, file=sys.stderr)
    # Also write to stdout (which is redirected to log file via TeeOutput)
    print(f"ERROR: {message}", file=sys.stdout)

def run_git_command(args, check=True):
    """Run a git command and return the output."""
    result = subprocess.run(
        ['git'] + args,
        capture_output=True,
        text=True,
        check=check
    )
    return result.stdout.strip()

def check_git_filter_repo():
    """Check if git-filter-repo is installed."""
    result = subprocess.run(
        ['git', 'filter-repo', '--version'],
        capture_output=True,
        text=True
    )
    return result.returncode == 0

def check_repo_is_clean():
    """Check if the repository has uncommitted changes."""
    status = run_git_command(['status', '--porcelain'])
    return len(status) == 0

def get_uncommitted_files():
    """Get list of uncommitted files."""
    status = run_git_command(['status', '--porcelain'])
    if not status:
        return []

    files = []
    for line in status.split('\n'):
        if line.strip():
            # Format is "XY filename" where X is staged, Y is unstaged
            files.append(line[3:].strip())
    return files

def is_safe_uncommitted_file(filename):
    """Check if an uncommitted file is safe to ignore (script-generated)."""
    # Allow log files created by this script
    if filename.startswith('commit-regeneration-log-') and filename.endswith('.jsonl'):
        return True
    # Allow output log files
    if filename.startswith('commit-regeneration-output-') and filename.endswith('.log'):
        return True
    return False

def get_current_branch():
    """Get the current branch name."""
    return run_git_command(['branch', '--show-current'])

def check_for_unpushed_commits():
    """Check if there are unpushed commits."""
    # Get the tracking branch
    tracking = run_git_command(['rev-parse', '--abbrev-ref', '@{upstream}'], check=False)
    if not tracking:
        return None  # No upstream configured

    # Check for unpushed commits
    unpushed = run_git_command(['log', f'{tracking}..HEAD', '--oneline'], check=False)
    return len(unpushed) > 0 if unpushed else False

def get_all_commits():
    """Get list of all commit hashes in reverse chronological order."""
    output = run_git_command(['rev-list', '--reverse', 'HEAD'])
    return output.split('\n')

def get_commit_info(commit_hash):
    """Get commit message, author date, and diff for a commit."""
    # Get commit message
    message = run_git_command(['log', '-1', '--format=%B', commit_hash])

    # Get author date
    author_date = run_git_command(['log', '-1', '--format=%aD', commit_hash])

    # Get diff
    diff = run_git_command(['show', '--format=', commit_hash])

    return {
        'hash': commit_hash,
        'message': message,
        'author_date': author_date,
        'diff': diff
    }

def is_message_useful(message):
    """Determine if a commit message is already meaningful."""
    generic_messages = [
        'updated notes',
        'update notes',
        'updates',
        'update',
        'wip',
        'work in progress',
        'fixes',
        'fix',
        'changes',
        'misc',
        'temp'
    ]

    message_lower = message.strip().lower()

    # Check if it's a generic message
    if message_lower in generic_messages:
        return False

    # Check if it's reasonably descriptive (more than 20 chars and has spaces)
    if len(message.strip()) > 20 and ' ' in message.strip():
        return True

    return False

def validate_message_changed(original_message, new_message, commit_hash):
    """Validate that the new message is actually different from the original.

    Returns True if valid, False if the message is essentially unchanged.
    """
    # Normalize messages for comparison
    original_normalized = original_message.strip()
    new_normalized = new_message.strip()

    # Check if messages are identical
    if original_normalized == new_normalized:
        log_error(f"VALIDATION ERROR for {commit_hash}: New message is identical to original")
        log_error(f"  Original: {original_normalized}")
        log_error(f"  New: {new_normalized}")
        return False

    # Check if new message just duplicates original in Claude Notes
    # Pattern: "original\n\nClaude Notes: original"
    if '\n\nClaude Notes: ' in new_message:
        parts = new_message.split('\n\nClaude Notes: ', 1)
        if len(parts) == 2:
            first_part = parts[0].strip()
            claude_notes = parts[1].strip()

            # Check if first part matches original
            if first_part != original_normalized:
                # This is fine - the first part was modified
                return True

            # First part matches original, check if Claude Notes is identical
            if claude_notes == original_normalized:
                log_error(f"VALIDATION ERROR for {commit_hash}: Claude Notes section is identical to original message")
                log_error(f"  Original: {original_normalized}")
                log_error(f"  Claude Notes: {claude_notes}")
                return False

    return True

async def generate_commit_message(diff, original_message, commit_hash, wait_and_retry=False):
    """Use Claude to generate a commit message based on the diff."""
    import time

    # Truncate diff if too long (max ~4000 chars to stay within reasonable token limits)
    diff_preview = diff[:4000]
    if len(diff) > 4000:
        diff_preview += "\n\n... (diff truncated)"

    prompt = f"""Analyze this git diff and generate a concise commit message (1-2 sentences max).

Original commit message: {original_message}

Diff:
{diff_preview}

Generate a clear, specific commit message that describes what changed and why (if apparent).
Use imperative mood (e.g., "Add feature" not "Added feature").
Be concise but informative. Do not include commit hash or metadata.
Return ONLY the commit message text, nothing else."""

    options = ClaudeAgentOptions(
        max_turns=1,
        system_prompt="You are a git commit message expert. Generate concise, meaningful commit messages based on diffs. Be brief and specific."
    )

    max_retries = 5 if wait_and_retry else 0
    retry_count = 0
    base_wait = 60  # Start with 60 seconds

    while retry_count <= max_retries:
        generated = ""

        try:
            async for message in query(prompt=prompt, options=options):
                if isinstance(message, AssistantMessage):
                    for block in message.content:
                        if isinstance(block, TextBlock):
                            generated = block.text.strip()

            # Remove any quotes that Claude might have added
            if generated.startswith('"') and generated.endswith('"'):
                generated = generated[1:-1]
            if generated.startswith("'") and generated.endswith("'"):
                generated = generated[1:-1]

            return generated if generated else original_message

        except Exception as e:
            error_str = str(e).lower()

            # Check if it's a quota or rate limit error
            is_quota_error = any(keyword in error_str for keyword in [
                'quota', 'rate limit', 'rate_limit', 'too many requests',
                '429', 'limit exceeded', 'usage limit'
            ])

            if is_quota_error:
                if wait_and_retry and retry_count < max_retries:
                    wait_time = base_wait * (2 ** retry_count)  # Exponential backoff
                    log_error(f"Quota/rate limit error for {commit_hash}: {e}")
                    log_error(f"Waiting {wait_time} seconds before retry {retry_count + 1}/{max_retries}...")
                    time.sleep(wait_time)
                    retry_count += 1
                    continue
                else:
                    log_error(f"Quota/rate limit error for {commit_hash}: {e}")
                    log_error("Aborting. Use --wait-and-retry to automatically retry after quota errors, or use --continue to resume later.")
                    sys.exit(1)
            else:
                # Non-quota error - log and return original
                log_error(f"Error generating message for {commit_hash}: {e}")
                return original_message

    # If we exhausted all retries
    log_error(f"Exhausted all {max_retries} retries for {commit_hash}")
    log_error("Aborting. Use --continue to resume later.")
    sys.exit(1)

def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Regenerate git commit messages using Claude AI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s                                    # Start new regeneration
  %(prog)s --dry-run                          # Preview changes without applying
  %(prog)s --test 5                           # Test on first 5 commits only
  %(prog)s --continue log.jsonl               # Continue from existing log
  %(prog)s --apply log.jsonl                  # Apply changes from existing log
  %(prog)s --apply log.jsonl --dry-run        # Preview what would be applied

The script generates:
  - commit-regeneration-log-TIMESTAMP.jsonl  # Log of all changes

IMPORTANT - Backup Your Repository First:
  This script DESTRUCTIVELY rewrites git history. The built-in git backup mechanism
  does NOT work because git-filter-repo rewrites all refs, including backup branches.

  Before applying changes, create a FILESYSTEM backup:
    tar -czf ../repo-backup-$(date +%%Y%%m%%d_%%H%%M%%S).tar.gz .

  Or clone the entire repository:
    cp -r /path/to/repo /path/to/repo-backup

Safety Features:
  - Uses git-filter-repo
  - Dry-run mode to preview changes
  - Explicit "yes" confirmation required before applying
  - Validates repository state before modifications
  - Test mode to try on subset of commits first

After Applying Changes - Pushing to Remote:

  The script automatically restores your remote configuration and branch tracking
  after git-filter-repo removes them. You still need to restore remote tracking refs:

    git fetch origin                         # Restore remote tracking refs
    git push --force-with-lease              # Safe force-push

  After fetch, 'git pull' and 'git push' commands will work normally.

  The --force-with-lease flag:
    ✓ Checks you have latest remote commits before overwriting
    ✓ Prevents accidentally destroying unseen work
    ✓ Perfect for personal repos or single contributors
    ✓ Requires git fetch first (to restore remote tracking refs)

  Important if you have multiple clones or collaborators:
    - Other local clones must be updated:
      git fetch origin && git reset --hard origin/main
    - Coordinate with collaborators before force-pushing
    - Their unpushed work will need rebasing onto new history

  Recovery if something goes wrong:
    # Extract your backup tarball to a safe location
    tar -xzf ../repo-backup-TIMESTAMP.tar.gz -C ../repo-restored
    # Or copy from your cloned backup
    cp -r /path/to/repo-backup/.git .
    git push --force-with-lease              # Restore remote

Both log files are updated incrementally, so you can abort and review progress.
"""
    )
    parser.add_argument(
        '--continue',
        dest='continue_from',
        metavar='LOGFILE',
        help='Continue from an existing log file'
    )
    parser.add_argument(
        '--apply',
        dest='apply_log',
        metavar='LOGFILE',
        help='Apply changes from an existing log file'
    )
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Preview changes without applying them'
    )
    parser.add_argument(
        '--test',
        type=int,
        metavar='N',
        help='Test mode: only process first N commits'
    )
    parser.add_argument(
        '--force',
        action='store_true',
        help='Skip interactive confirmation (use with caution)'
    )
    parser.add_argument(
        '--wait-and-retry',
        action='store_true',
        help='When quota or rate limit errors occur, wait and retry instead of aborting'
    )
    return parser.parse_args()

def load_processed_commits(log_filename):
    """Load already processed commits from a log file."""
    processed = set()
    if not os.path.exists(log_filename):
        print(f"Error: Log file not found: {log_filename}", file=sys.stderr)
        sys.exit(1)

    with open(log_filename, 'r') as f:
        for line in f:
            try:
                entry = json.loads(line.strip())
                processed.add(entry['commit'])
            except (json.JSONDecodeError, KeyError) as e:
                print(f"Warning: Skipping malformed log entry: {e}", file=sys.stderr)

    return processed

def perform_safety_checks(args):
    """Perform comprehensive safety checks before modifying repository."""
    print("\nPerforming safety checks...")
    print("=" * 60)

    # Check for git-filter-repo
    if check_git_filter_repo():
        print("✓ git-filter-repo is installed")
    else:
        print("❌  git-filter-repo not found, install PyPI package: git-filter-repo")
        return False

    # Check repository state
    uncommitted_files = get_uncommitted_files()
    if uncommitted_files:
        # Separate safe files from potentially problematic ones
        safe_files = [f for f in uncommitted_files if is_safe_uncommitted_file(f)]
        unsafe_files = [f for f in uncommitted_files if not is_safe_uncommitted_file(f)]

        if unsafe_files:
            print("❌ Repository has uncommitted changes:")
            for f in unsafe_files[:10]:  # Show first 10
                print(f"   - {f}")
            if len(unsafe_files) > 10:
                print(f"   ... and {len(unsafe_files) - 10} more")
            print("\nPlease commit or stash your changes before proceeding.")
            print("(Script-generated log files are automatically ignored)")
            return False
        elif safe_files:
            # Only script-generated files
            print("⚠️  Uncommitted files detected (script-generated):")
            for f in safe_files:
                print(f"   - {f}")
            if not args.force:
                response = input("\nThese files are safe to ignore. Continue? [y/N]: ")
                if response.lower() != 'y':
                    print("Aborted.")
                    return False
            print("✓ Repository is clean (ignoring script-generated files)")
    else:
        print("✓ Repository is clean")

    # Check for unpushed commits
    unpushed = check_for_unpushed_commits()
    if unpushed:
        print("⚠️  Warning: You have unpushed commits")
        if not args.force:
            response = input("Rewriting history will require force-push. Continue? [y/N]: ")
            if response.lower() != 'y':
                print("Aborted.")
                return False
    elif unpushed is None:
        print("ℹ️  No upstream branch configured")
    else:
        print("✓ All commits are pushed")

    print("=" * 60)
    return True

def show_change_summary(log_filename):
    """Show a summary of changes that will be applied."""
    print("\nChange Summary:")
    print("=" * 60)

    changes = []
    with open(log_filename, 'r') as f:
        for line in f:
            entry = json.loads(line.strip())
            changes.append(entry)

    total = len(changes)
    regenerated = sum(1 for c in changes if c['action'] == 'regenerated')
    enhanced = sum(1 for c in changes if c['action'] == 'enhanced')

    print(f"Total commits: {total}")
    print(f"  Regenerated: {regenerated}")
    print(f"  Enhanced: {enhanced}")
    print()

    # Show first few examples
    print("Example changes (first 3):")
    for i, change in enumerate(changes[:3], 1):
        print(f"\n{i}. Commit: {change['commit'][:12]}")
        print(f"   Original: {change['original'][:60]}...")
        print(f"   New: {change['new'][:60]}...")

    if total > 3:
        print(f"\n... and {total - 3} more changes")

    print("=" * 60)

def save_git_remotes():
    """Save current git remote and branch tracking configuration."""
    config = {'remotes': {}, 'branches': {}}

    # Save remotes
    remote_list = run_git_command(['remote'], check=False)
    if remote_list:
        for remote in remote_list.split('\n'):
            if not remote:
                continue
            url = run_git_command(['remote', 'get-url', remote], check=False)
            fetch = run_git_command(['config', '--get', f'remote.{remote}.fetch'], check=False)
            if url:
                config['remotes'][remote] = {'url': url, 'fetch': fetch or f'+refs/heads/*:refs/remotes/{remote}/*'}

    # Save branch tracking information
    branches = run_git_command(['for-each-ref', '--format=%(refname:short)', 'refs/heads/'], check=False)
    if branches:
        for branch in branches.split('\n'):
            if not branch:
                continue
            remote = run_git_command(['config', '--get', f'branch.{branch}.remote'], check=False)
            merge = run_git_command(['config', '--get', f'branch.{branch}.merge'], check=False)
            if remote:
                config['branches'][branch] = {'remote': remote, 'merge': merge}

    return config

def restore_git_remotes(config):
    """Restore git remote and branch tracking configuration."""
    # Restore remotes
    for remote, remote_config in config.get('remotes', {}).items():
        # Re-add the remote
        run_git_command(['remote', 'add', remote, remote_config['url']], check=False)
        # Set the fetch refspec if we saved one
        if remote_config.get('fetch'):
            run_git_command(['config', f'remote.{remote}.fetch', remote_config['fetch']], check=False)

    # Restore branch tracking
    for branch, branch_config in config.get('branches', {}).items():
        if branch_config.get('remote'):
            run_git_command(['config', f'branch.{branch}.remote', branch_config['remote']], check=False)
        if branch_config.get('merge'):
            run_git_command(['config', f'branch.{branch}.merge', branch_config['merge']], check=False)

def apply_changes_with_filter_repo(commit_messages):
    """Apply changes using git-filter-repo Python API."""
    try:
        import git_filter_repo as fr
    except ImportError:
        return None  # Signal to use fallback method

    import tempfile

    # Create a Python script that uses git-filter-repo's API
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as tf:
        tf.write("#!/usr/bin/env python3\n")
        tf.write("import sys\n")
        tf.write("import git_filter_repo as fr\n\n")
        tf.write(f"commit_messages = {commit_messages!r}\n\n")
        tf.write("def message_callback(commit, metadata):\n")
        tf.write("    original_id = commit.original_id.decode('utf-8')\n")
        tf.write("    if original_id in commit_messages:\n")
        tf.write("        commit.message = commit_messages[original_id].encode('utf-8')\n\n")
        tf.write("args = fr.FilteringOptions.parse_args(['--force'])\n")
        tf.write("filter = fr.RepoFilter(args, commit_callback=message_callback)\n")
        tf.write("filter.run()\n")
        filter_script = tf.name

    try:
        os.chmod(filter_script, 0o755)
        subprocess.run([sys.executable, filter_script], check=True)
        return True
    finally:
        os.unlink(filter_script)

def apply_changes(log_filename, dry_run=False, force=False):
    """Apply commit message changes using git-filter-repo."""

    if dry_run:
        print("\n🔍 DRY RUN MODE - No changes will be applied")
        show_change_summary(log_filename)
        return True

    # Show summary
    show_change_summary(log_filename)

    # Get confirmation
    if not force:
        print("\n" + "=" * 60)
        print("⚠️  CRITICAL WARNING: DESTRUCTIVE OPERATION")
        print("=" * 60)
        print("\nThis will PERMANENTLY REWRITE git history!")
        print("Built-in git backups DO NOT WORK - git-filter-repo rewrites ALL refs.")
        print("\nBefore proceeding, you MUST create a filesystem backup:")
        print("  tar -czf ../repo-backup-$(date +%Y%m%d_%H%M%S).tar.gz .")
        print("  OR")
        print("  cp -r . ../repo-backup")
        print("\nWithout a filesystem backup, recovery is IMPOSSIBLE.")
        print("=" * 60)
        response = input("\nType 'yes' to confirm you have backed up your repository: ")
        if response != 'yes':
            print("Aborted. You must type exactly 'yes' to proceed.")
            return False

    # Save git remote configuration before git-filter-repo removes it
    print("\nSaving git remote and branch tracking configuration...")
    saved_config = save_git_remotes()
    num_remotes = len(saved_config.get('remotes', {}))
    num_branches = len(saved_config.get('branches', {}))
    if num_remotes > 0:
        print(f"✓ Saved {num_remotes} remote(s): {', '.join(saved_config['remotes'].keys())}")
        if num_branches > 0:
            print(f"✓ Saved tracking info for {num_branches} branch(es): {', '.join(saved_config['branches'].keys())}")
    else:
        print("ℹ️  No remotes configured")

    # Load commit message mappings
    commit_messages = {}
    with open(log_filename, 'r') as f:
        for line in f:
            entry = json.loads(line.strip())
            commit_messages[entry['commit']] = entry['new']

    try:
        print("\nApplying changes...")
        print("This may take a while for large repositories...\n")

        result = apply_changes_with_filter_repo(commit_messages)
        if result is None:
            print("\n❌ Failed to apply changes")
            return False

        if result:
            # Restore git remote configuration
            if num_remotes > 0:
                print("\nRestoring git remote and branch tracking configuration...")
                restore_git_remotes(saved_config)
                print(f"✓ Restored {num_remotes} remote(s)")
                if num_branches > 0:
                    print(f"✓ Restored tracking info for {num_branches} branch(es)")

            print("\n✅ Commit messages updated successfully!")
            if num_remotes > 0:
                print("\n⚠️  Note: Remote configuration restored. To push changes:")
                print("  git fetch origin                     # Restore remote tracking refs")
                print("  git push --force-with-lease          # Safe force-push")
                if num_branches > 0:
                    print("\n✓ Branch tracking restored - 'git pull' and 'git push' should work normally")
            print("\nTo restore from your backup if needed:")
            print("  tar -xzf ../repo-backup-TIMESTAMP.tar.gz -C ../repo-restored")
            return True
        else:
            print("\n❌ Failed to apply changes")
            return False

    except subprocess.CalledProcessError as e:
        print(f"\n❌ Error applying changes: {e}")
        print("Restore from your filesystem backup to recover.")
        return False

async def main():
    """Main function to process commits and apply changes."""
    args = parse_args()

    # Set up output logging
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_log_filename = f"commit-regeneration-output-{timestamp}.log"
    output_log_file = open(output_log_filename, 'w')

    # Redirect stdout to write to both terminal and log file
    original_stdout = sys.stdout
    sys.stdout = TeeOutput(output_log_file)

    try:
        print("Git Commit Message Regenerator")
        print("=" * 60)
        print(f"Output logged to: {output_log_filename}")
        print("=" * 60)
        print()

        # Check if we're in a git repo
        try:
            run_git_command(['rev-parse', '--git-dir'])
        except subprocess.CalledProcessError:
            print("Error: Not in a git repository", file=sys.stderr)
            sys.exit(1)

        # Handle --apply mode
        if args.apply_log:
            if not os.path.exists(args.apply_log):
                print(f"Error: Log file not found: {args.apply_log}", file=sys.stderr)
                sys.exit(1)

            # Perform safety checks before applying
            if not args.dry_run and not perform_safety_checks(args):
                sys.exit(1)

            success = apply_changes(
                args.apply_log,
                dry_run=args.dry_run,
                force=args.force
            )
            sys.exit(0 if success else 1)

        # Analysis mode: Generate commit message proposals
        print("Analysis Mode: Generating improved commit messages...")
        print()

        # Get all commits
        print("Fetching commit history...")
        commits = get_all_commits()

        # Apply test limit if specified
        if args.test:
            commits = commits[:args.test]
            print(f"🧪 Test mode: Processing first {args.test} commits only")

        print(f"Found {len(commits)} commits to analyze\n")

        # Determine if we're continuing or starting fresh
        if args.continue_from:
            if not os.path.exists(args.continue_from):
                print(f"Error: Log file not found: {args.continue_from}", file=sys.stderr)
                sys.exit(1)

            print(f"Continuing from: {args.continue_from}\n")
            processed_commits = load_processed_commits(args.continue_from)
            print(f"Already processed: {len(processed_commits)} commits\n")
            log_filename = args.continue_from
            log_mode = 'a'
        else:
            # Starting fresh
            data_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            log_filename = f"commit-regeneration-log-{data_timestamp}.jsonl"
            processed_commits = set()
            log_mode = 'w'

        print(f"Log file: {log_filename}\n")

        # Process commits
        with open(log_filename, log_mode) as log_file:
            commits_to_process = [c for c in commits if c not in processed_commits]

            if not commits_to_process:
                print("All commits already processed!")
                print()
            else:
                print(f"Processing {len(commits_to_process)} remaining commits...\n")

                for i, commit_hash in enumerate(commits_to_process, 1):
                    total_processed = len(processed_commits) + i
                    print(f"Processing commit {total_processed}/{len(commits)}: {commit_hash[:8]}...")

                    info = get_commit_info(commit_hash)
                    original_msg = info['message']

                    log_entry = {
                        'commit': commit_hash,
                        'original': original_msg,
                        'new': None,
                        'action': None
                    }

                    # Check if message is already useful
                    if is_message_useful(original_msg):
                        # Keep original, add Claude notes
                        print(f"  ✓ Message already useful, adding notes...")
                        claude_notes = await generate_commit_message(info['diff'], original_msg, commit_hash, args.wait_and_retry)
                        new_message = f"{original_msg}\n\nClaude Notes: {claude_notes}"
                        log_entry['action'] = 'enhanced'
                        log_entry['new'] = new_message
                    else:
                        # Generate new message
                        print(f"  → Generating new message...")
                        new_message = await generate_commit_message(info['diff'], original_msg, commit_hash, args.wait_and_retry)
                        log_entry['action'] = 'regenerated'
                        log_entry['new'] = new_message

                    # Validate that the message actually changed
                    if not validate_message_changed(original_msg, new_message, commit_hash):
                        log_error("Message validation failed. This likely indicates an API error that wasn't properly detected.")
                        log_error("Aborting to prevent corrupted data. Use --continue to resume after fixing the issue.")
                        sys.exit(1)

                    # Write log entry
                    log_file.write(json.dumps(log_entry) + '\n')
                    log_file.flush()

                    print(f"  New: {new_message[:60]}...")
                    print()

        # Show summary and next steps
        print()
        print("=" * 60)
        print("Analysis Complete!")
        print("=" * 60)
        print()
        show_change_summary(log_filename)
        print()
        print("Next Steps:")
        print(f"  1. Review changes: {log_filename}")
        print(f"  2. Preview application: {sys.argv[0]} --apply {log_filename} --dry-run")
        print(f"  3. CREATE FILESYSTEM BACKUP (REQUIRED!):")
        print(f"       tar -czf ../repo-backup-$(date +%Y%m%d_%H%M%S).tar.gz .")
        print(f"  4. Apply changes: {sys.argv[0]} --apply {log_filename}")
        print()
        print("Safety Features Active:")
        print("  ✓ Dry-run mode available for preview")
        print("  ✓ Explicit 'yes' confirmation required before applying")
        print("  ✓ Repository state validation")
        print("  ⚠ Manual filesystem backup REQUIRED (git backups don't work!)")
        print()
        if args.test:
            print(f"ℹ️  Test mode was used - only {args.test} commits analyzed")
            print(f"   Run without --test to analyze all commits")

    finally:
        # Restore stdout and close log file
        sys.stdout = original_stdout
        output_log_file.close()
        print(f"\nSession output saved to: {output_log_filename}")

if __name__ == '__main__':
    asyncio.run(main())