"""
Fix tool_config paths in Tool Shed repository metadata.

This script updates tool_config paths in RepositoryMetadata that reference an old
file_path location. When the Tool Shed's file_path configuration changes, tool_config
paths in the metadata may become invalid. This script identifies invalid paths and
updates them to use the current file_path configuration.

Run this script from the root folder:

$ python scripts/tool_shed/fix_tool_config_paths.py -c config/tool_shed.yml --dry-run

Options:
  -c, --config        Path to Tool Shed configuration file (required)
  --dry-run          Report changes without modifying database
  -d, --debug        Enable debug logging
  --backup-dir       Directory to save original metadata JSON files (default: ./metadata_backups)

This script expects the Tool Shed's runtime virtualenv to be active.
"""

import argparse
import json
import logging
import os
import re
import sys
from datetime import datetime

sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "lib")))

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm.attributes import flag_modified

from galaxy.util.script import (
    app_properties_from_args,
    populate_config_args,
)
from tool_shed.webapp import config as ts_config
from tool_shed.webapp.model import RepositoryMetadata

log = logging.getLogger()
log.addHandler(logging.StreamHandler(sys.stdout))


class PathFixerStats:
    """Track statistics for the path fixing operation."""

    def __init__(self):
        self.total_repo_metadata = 0
        self.repo_metadata_with_tools = 0
        self.total_tools = 0
        self.invalid_paths = 0
        self.updated_paths = 0
        self.paths_with_existing_files = 0
        self.paths_with_missing_files = 0
        self.already_valid_paths = 0
        self.unfixable_paths = 0  # Kept for errors in path construction

    def print_summary(self):
        """Print a summary of the statistics."""
        log.info("=" * 70)
        log.info("SUMMARY")
        log.info("=" * 70)
        log.info(f"Total RepositoryMetadata records processed: {self.total_repo_metadata}")
        log.info(f"RepositoryMetadata with tools: {self.repo_metadata_with_tools}")
        log.info(f"Total tools examined: {self.total_tools}")
        log.info(f"Tools with correct paths (unchanged): {self.already_valid_paths}")
        log.info(f"Tools with paths updated: {self.updated_paths}")
        log.info(f"  - Tool file exists on disk: {self.paths_with_existing_files}")
        log.info(f"  - Tool file missing (removed in later changeset): {self.paths_with_missing_files}")
        log.info(f"Tools with path construction errors: {self.unfixable_paths}")
        log.info("=" * 70)


def construct_new_path(old_path, repository, current_file_path):
    """
    Extract the repository-relative portion from old path and join with new file_path.

    Pattern: ${hash_dirs}/repo_${id}/relative_tool_path
    Example: 000/repo_123/filtering.xml

    Args:
        old_path: The old absolute path to the tool config
        repository: Repository object
        current_file_path: Current file_path configuration value

    Returns:
        str: The new path constructed with current file_path
    """
    # Get expected repository directory using the repository's method
    expected_repo_path = repository.hg_repository_path(current_file_path)

    # Extract the portion after "repo_{id}/" to get the relative tool path
    if (repo_pattern := f"repo_{repository.id}/") in old_path:
        # Split on the repo pattern and take everything after it
        relative_tool_path = old_path.split(repo_pattern, 1)[1]
        new_path = os.path.join(expected_repo_path, relative_tool_path)
        return new_path

    # Fallback: try to extract using regex pattern for hash_dirs/repo_id/file
    # Pattern matches: .../000/repo_123/tool.xml or .../000/123/456/repo_789/tool.xml
    pattern = r".*/(\d+/)*(repo_\d+/.*)"

    if match := re.search(pattern, old_path):
        # Extract everything from "repo_" onward
        repo_relative = match.group(2)  # e.g., "repo_123/filtering.xml"

        # Verify the repo_id matches
        repo_id_match = re.search(r"repo_(\d+)/", repo_relative)
        if repo_id_match and int(repo_id_match.group(1)) == repository.id:
            # Extract just the file path after repo_id
            relative_tool_path = repo_relative.split("/", 1)[1] if "/" in repo_relative else ""
            new_path = os.path.join(expected_repo_path, relative_tool_path)
            return new_path

    # Final fallback: use repository's expected path + filename only
    filename = os.path.basename(old_path)
    new_path = os.path.join(expected_repo_path, filename)
    log.debug(f"Using fallback (filename only) for path construction: {old_path} -> {new_path}")

    return new_path


def save_metadata_backup(repo_metadata, backup_dir):
    """
    Save original metadata to JSON file.

    Args:
        repo_metadata: RepositoryMetadata object
        backup_dir: Directory to save backup files
    """
    os.makedirs(backup_dir, exist_ok=True)

    filename = f"repo_metadata_{repo_metadata.id}_{repo_metadata.changeset_revision}.json"
    filepath = os.path.join(backup_dir, filename)

    backup_data = {
        "id": repo_metadata.id,
        "repository_id": repo_metadata.repository_id,
        "changeset_revision": repo_metadata.changeset_revision,
        "metadata": repo_metadata.metadata,
        "backup_time": datetime.now().isoformat(),
    }

    with open(filepath, "w") as f:
        json.dump(backup_data, f, indent=2)

    log.debug(f"Saved metadata backup to {filepath}")


def report_change(repo_metadata, tool, old_path, new_path, tool_file_exists, stats):
    """
    Report a path change in dry-run mode.

    Args:
        repo_metadata: RepositoryMetadata object
        tool: Tool dictionary from metadata
        old_path: Old tool_config path
        new_path: New tool_config path
        tool_file_exists: Whether the specific tool file exists on disk
        stats: PathFixerStats object
    """
    file_status = "FILE EXISTS" if tool_file_exists else "FILE MISSING (removed in later changeset)"
    tool_id = tool.get("id", "unknown")
    tool_name = tool.get("name", "unknown")

    log.info(
        f"[REPO_METADATA {repo_metadata.id}] Tool '{tool_id}' ({tool_name}):\n"
        f"  OLD: {old_path}\n"
        f"  NEW: {new_path}\n"
        f"  STATUS: {file_status}"
    )

    stats.updated_paths += 1
    if tool_file_exists:
        stats.paths_with_existing_files += 1
    else:
        stats.paths_with_missing_files += 1


def process_repository_metadata(session, current_file_path, dry_run, backup_dir, stats):
    """
    Process all RepositoryMetadata records and fix tool_config paths.

    Args:
        session: SQLAlchemy session
        current_file_path: Current file_path configuration value
        dry_run: If True, only report changes without modifying database
        backup_dir: Directory to save metadata backups
        stats: PathFixerStats object
    """
    log.info(f"Current file_path: {current_file_path}")
    log.info(f"Backup directory: {backup_dir}")
    log.info(f"Dry run mode: {dry_run}")
    log.info("-" * 70)

    # Load all RepositoryMetadata items
    repo_metadata_list = session.query(RepositoryMetadata).all()
    stats.total_repo_metadata = len(repo_metadata_list)

    log.info(f"Found {stats.total_repo_metadata} RepositoryMetadata records")

    for repo_metadata in repo_metadata_list:
        # Skip if no metadata or no tools
        if not repo_metadata.metadata or "tools" not in repo_metadata.metadata:
            continue

        stats.repo_metadata_with_tools += 1
        tools = repo_metadata.metadata.get("tools", [])
        modified = False

        # Get the repository directory path (ending with repo_${id})
        try:
            repo_dir = repo_metadata.repository.hg_repository_path(current_file_path)
            repo_dir_exists = os.path.exists(repo_dir)
        except Exception as e:
            log.error(f"[REPO_METADATA {repo_metadata.id}] Error getting repository path: {e}")
            continue

        if not repo_dir_exists:
            log.warning(
                f"[REPO_METADATA {repo_metadata.id}] Repository directory does not exist: {repo_dir}. "
                f"Skipping all tools for this repository."
            )
            continue

        for tool in tools:
            stats.total_tools += 1

            old_path = tool.get("tool_config")
            if not old_path:
                log.debug(f"[REPO_METADATA {repo_metadata.id}] Tool has no tool_config: {tool.get('id')}")
                continue

            # Construct expected new path
            try:
                new_path = construct_new_path(old_path, repo_metadata.repository, current_file_path)
            except Exception as e:
                log.error(f"[REPO_METADATA {repo_metadata.id}] Error constructing new path for {old_path}: {e}")
                stats.unfixable_paths += 1
                continue

            # Check if this is already using the correct base path
            if old_path == new_path:
                log.debug(f"[REPO_METADATA {repo_metadata.id}] Path already correct: {old_path}")
                stats.already_valid_paths += 1
                continue

            # Path needs updating
            stats.invalid_paths += 1

            # Check if the specific tool file exists (for informational purposes only)
            tool_file_exists = os.path.exists(new_path)

            # Report or update
            if dry_run:
                report_change(repo_metadata, tool, old_path, new_path, tool_file_exists, stats)
            else:
                file_status_msg = (
                    "tool file exists"
                    if tool_file_exists
                    else "tool file missing - may have been removed in later changeset"
                )
                log.info(
                    f"[REPO_METADATA {repo_metadata.id}] Updating tool '{tool.get('id')}': "
                    f"{old_path} -> {new_path} ({file_status_msg})"
                )
                tool["tool_config"] = new_path
                modified = True
                stats.updated_paths += 1
                if tool_file_exists:
                    stats.paths_with_existing_files += 1
                else:
                    stats.paths_with_missing_files += 1

        # Save changes if modified (SQLAlchemy change detection for JSON columns)
        if modified and not dry_run:
            # Force SQLAlchemy to detect the change by flagging the attribute as modified
            flag_modified(repo_metadata, "metadata")

        # Always backup original metadata if there were tools
        if tools:
            save_metadata_backup(repo_metadata, backup_dir)

    log.info("-" * 70)


def parse_arguments():
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Fix tool_config paths in Tool Shed repository metadata after file_path config changes."
    )
    populate_config_args(parser)
    parser.add_argument(
        "--dry-run",
        action="store_true",
        default=False,
        help="Report changes without modifying the database",
    )
    parser.add_argument(
        "-d",
        "--debug",
        action="store_true",
        default=False,
        help="Enable debug logging",
    )
    parser.add_argument(
        "--backup-dir",
        type=str,
        default="./metadata_backups",
        help="Directory to save original metadata JSON files (default: ./metadata_backups)",
    )
    args = parser.parse_args()

    # Setup logging first
    if args.debug:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)

    # Log the config file being used
    config_file = getattr(args, "config_file", None)
    log.info(f"Loading configuration from: {config_file}")

    # Set config section to 'tool_shed'
    args.config_section = "tool_shed"

    # Load Tool Shed configuration
    app_properties = app_properties_from_args(args)

    # Log what properties were loaded
    if args.debug:
        log.debug("Loaded app_properties:")
        for key, value in sorted(app_properties.items()):
            log.debug(f"  {key}: {value}")

    config = ts_config.ToolShedAppConfiguration(**app_properties)

    # Add config values to args
    args.dburi = config.database_connection
    args.file_path = config.file_path

    # Log final config values
    if args.debug:
        log.debug("Final configuration:")
        log.debug(f"  Config file: {config_file}")
        log.debug(f"  Database: {args.dburi}")
        log.debug(f"  File path: {args.file_path}")
        log.debug(f"  Dry run: {args.dry_run}")
        log.debug(f"  Backup dir: {args.backup_dir}")

    return args


def main():
    """Main entry point."""
    args = parse_arguments()

    # Setup database session
    engine = create_engine(args.dburi)
    Session = sessionmaker(bind=engine)
    session = Session()

    # Initialize statistics
    stats = PathFixerStats()

    try:
        # Process all repository metadata
        process_repository_metadata(
            session=session,
            current_file_path=args.file_path,
            dry_run=args.dry_run,
            backup_dir=args.backup_dir,
            stats=stats,
        )

        # Commit changes if not in dry-run mode
        if not args.dry_run:
            session.commit()
            log.info("Changes committed to database")
        else:
            log.info("Dry run complete - no changes made to database")

    except Exception as e:
        log.error(f"Error during processing: {e}")
        session.rollback()
        raise
    finally:
        session.close()

    # Print summary
    stats.print_summary()


if __name__ == "__main__":
    main()