""" NGC CLI Utility Module Provides utilities for working with NVIDIA GPU Cloud (NGC) CLI to download NeMo resources, datasets, and other NGC catalog resources. This module handles: - NGC CLI detection and installation - Resource download from NGC catalog - Configuration management - Error handling and retry logic """ import logging import os import shutil import subprocess from dataclasses import dataclass from pathlib import Path from typing import Any logger = logging.getLogger(__name__) @dataclass class NGCConfig: """NGC CLI configuration""" api_key: str | None = None org: str | None = None team: str | None = None class NGCCLIError(Exception): """Base exception for NGC CLI operations""" class NGCCLINotFoundError(NGCCLIError): """NGC CLI not found or not installed""" class NGCCLIAuthError(NGCCLIError): """NGC CLI authentication error""" class NGCCLIDownloadError(NGCCLIError): """NGC CLI download error""" class NGCCLI: """ NGC CLI wrapper for downloading resources from NVIDIA GPU Cloud. Supports multiple installation methods: 1. System-installed ngc in PATH 2. Local installation at ~/ngc-cli/ngc 3. Python package via uv (ngc-python-cli) """ def __init__(self, use_uv: bool = True): """ Initialize NGC CLI wrapper. Args: use_uv: If True, prefer uv-based installation if ngc not in PATH """ self.use_uv = use_uv self.ngc_cmd: str | None = None self.uv_cmd: str | None = None self._detect_ngc_cli() def _detect_ngc_cli(self) -> None: """Detect and set up NGC CLI command""" # Method 1: Check if ngc is in PATH if shutil.which("ngc"): self.ngc_cmd = "ngc" logger.info("Found NGC CLI in PATH") return # Method 2: Check common installation location home_ngc = Path.home() / "ngc-cli" / "ngc" if home_ngc.exists(): self.ngc_cmd = str(home_ngc) # Add to PATH for subprocess calls env_path = os.environ.get("PATH", "") os.environ["PATH"] = f"{home_ngc.parent}:{env_path}" logger.info(f"Found NGC CLI at {home_ngc}") return # Method 3: Use uv to run ngc (if enabled) if self.use_uv: self._setup_uv_ngc() def _setup_uv_ngc(self) -> None: """Set up NGC CLI via uv""" # Find uv if shutil.which("uv"): self.uv_cmd = "uv" elif (Path.home() / ".local" / "bin" / "uv").exists(): self.uv_cmd = str(Path.home() / ".local" / "bin" / "uv") elif (Path.home() / ".cargo" / "bin" / "uv").exists(): self.uv_cmd = str(Path.home() / ".cargo" / "bin" / "uv") else: logger.warning("uv not found, cannot use uv-based NGC CLI") return # Check if ngc is installed via uv try: result = subprocess.run( [self.uv_cmd, "pip", "list"], capture_output=True, text=True, check=False, ) if "ngc" in result.stdout.lower(): self.ngc_cmd = f"{self.uv_cmd} run ngc" logger.info("Found NGC CLI via uv") return except Exception as e: logger.debug(f"Error checking uv packages: {e}") # Note: NGC CLI is not a Python package on PyPI # It must be downloaded from https://catalog.ngc.nvidia.com # We can only check if it's available in PATH or local installation # The uv method here is for running Python-based NGC SDK if available logger.debug("NGC CLI must be installed separately from NVIDIA website") def is_available(self) -> bool: """Check if NGC CLI is available""" return self.ngc_cmd is not None def ensure_available(self) -> None: """Ensure NGC CLI is available, raise error if not""" if not self.is_available(): raise NGCCLINotFoundError( "NGC CLI not found. Please install it:\n" " 1. Download from https://catalog.ngc.nvidia.com\n" " 2. Or install to ~/ngc-cli/ directory\n" " 3. Or add to system PATH\n" "\n" "Note: NGC CLI is not available as a PyPI package.\n" "You must download it directly from NVIDIA." ) def check_config(self) -> dict[str, Any]: """ Check NGC CLI configuration. Returns: Configuration dictionary with API key status, org, team, etc. Raises: NGCCLINotFoundError: If NGC CLI is not available NGCCLIAuthError: If authentication is not configured """ self.ensure_available() if self.ngc_cmd is None: raise NGCCLINotFoundError("NGC CLI command not set") try: result = subprocess.run( [*self.ngc_cmd.split(), "config", "current"], capture_output=True, text=True, check=True, ) config = {} # Parse the table format output lines = result.stdout.strip().split("\n") current_key = None for line in lines: if "|" in line and "| key " not in line.lower() and "---" not in line: parts = [part.strip() for part in line.split("|") if part.strip()] if len(parts) >= 3: # key | value | source key, value, source = parts[0], parts[1], parts[2] if key: # New key current_key = key config[key] = value elif current_key and value: # Continuation of previous key config[current_key] += value elif len(parts) == 1 and current_key: # Just a value continuation config[current_key] += parts[0] # Check if API key is configured (it will be masked with asterisks) # If we have any config and apikey exists (even masked), consider it configured if config and ("apikey" in config or "API key" in config): return config raise NGCCLIAuthError( "NGC CLI not configured. Run: ngc config set\n" "Get your API key from: https://catalog.ngc.nvidia.com" ) return config except subprocess.CalledProcessError as e: raise NGCCLIAuthError(f"Failed to check NGC config: {e.stderr}") from e def set_config( self, api_key: str, _org: str | None = None, _team: str | None = None ) -> None: """ Configure NGC CLI with API key. Args: api_key: NGC API key from https://catalog.ngc.nvidia.com _org: Optional organization name (reserved for future use) _team: Optional team name (reserved for future use) """ self.ensure_available() if self.ngc_cmd is None: raise NGCCLINotFoundError("NGC CLI command not set") # Set API key try: subprocess.run( [*self.ngc_cmd.split(), "config", "set"], input=f"{api_key}\n", text=True, check=True, capture_output=True, ) logger.info("NGC CLI configured successfully") except subprocess.CalledProcessError as e: raise NGCCLIAuthError(f"Failed to configure NGC CLI: {e.stderr}") from e def download_resource( self, resource_path: str, version: str | None = None, output_dir: Path | None = None, extract: bool = True, # noqa: ARG002 ) -> Path: """ Download a resource from NGC catalog. Args: resource_path: Resource path in format "org/team/resource" or "nvidia/nemo-microservices/nemo-microservices-quickstart" version: Optional version tag (e.g., "25.10") output_dir: Optional output directory (defaults to current directory) extract: Whether to extract downloaded archive Returns: Path to downloaded/extracted resource Raises: NGCCLINotFoundError: If NGC CLI is not available NGCCLIAuthError: If authentication failed NGCCLIDownloadError: If download failed """ self.ensure_available() # Check config first try: self.check_config() except NGCCLIAuthError: logger.warning("NGC CLI not configured. Attempting download anyway...") if output_dir is None: output_dir = Path.cwd() else: output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) if self.ngc_cmd is None: raise NGCCLINotFoundError("NGC CLI command not set") # Build download command cmd = [*self.ngc_cmd.split(), "registry", "resource", "download-version"] resource_spec = f"{resource_path}:{version}" if version else resource_path cmd.append(resource_spec) # Change to output directory for download original_cwd = Path.cwd() try: return self._execute_download_in_directory(output_dir, resource_spec, cmd) finally: os.chdir(original_cwd) def _execute_download_in_directory( self, output_dir: Path, resource_spec: str, cmd: list[str] ) -> Path: """ Execute download command in the specified directory and locate the downloaded resource. Args: output_dir: Directory to download into resource_spec: Resource specification string for logging cmd: Command to execute Returns: Path to the downloaded resource (most recently modified item, or output_dir if empty) Raises: NGCCLIDownloadError: If download fails """ os.chdir(output_dir) logger.info(f"Downloading {resource_spec} to {output_dir}...") result = subprocess.run(cmd, capture_output=True, text=True, check=False) if result.returncode != 0: error_msg = result.stderr or result.stdout raise NGCCLIDownloadError( f"Failed to download {resource_spec}:\n{error_msg}" ) logger.info(f"Successfully downloaded {resource_spec}") if downloaded_items := list(output_dir.iterdir()): # Return the most recently modified item return max(downloaded_items, key=lambda p: p.stat().st_mtime) return output_dir def list_resources( self, org: str | None = None, team: str | None = None ) -> list[dict[str, Any]]: """ List available resources in NGC catalog. Args: org: Optional organization filter team: Optional team filter Returns: List of resource dictionaries """ self.ensure_available() if self.ngc_cmd is None: raise NGCCLINotFoundError("NGC CLI command not set") cmd = [*self.ngc_cmd.split(), "registry", "resource", "list"] if org: cmd.extend(["--org", org]) if team: cmd.extend(["--team", team]) try: subprocess.run(cmd, capture_output=True, text=True, check=True) # Parse output (format may vary) # TODO: Implement proper parsing based on actual NGC CLI output format return [] except subprocess.CalledProcessError as e: logger.warning(f"Failed to list resources: {e.stderr}") return [] def get_ngc_cli(use_uv: bool = True) -> NGCCLI: """ Get an NGC CLI instance. Args: use_uv: If True, prefer uv-based installation Returns: NGCCLI instance """ return NGCCLI(use_uv=use_uv) def ensure_ngc_cli_configured(api_key: str | None = None) -> NGCCLI: """ Ensure NGC CLI is available and configured. Args: api_key: Optional API key to configure (if not already configured) Returns: Configured NGCCLI instance Raises: NGCCLINotFoundError: If NGC CLI cannot be found or installed NGCCLIAuthError: If configuration fails """ cli = get_ngc_cli() if not cli.is_available(): raise NGCCLINotFoundError( "NGC CLI not available. Install with:\n" " uv pip install nvidia-pyindex nvidia-nim ngc-python-cli\n" "Or download from: https://catalog.ngc.nvidia.com" ) # Check if already configured try: cli.check_config() return cli except NGCCLIAuthError as err: if api_key: cli.set_config(api_key) return cli raise NGCCLIAuthError( "NGC CLI not configured. Provide API key or run: ngc config set\n" "Get API key from: https://catalog.ngc.nvidia.com" ) from err