| | """ |
| | NGC CLI Utility Module |
| | |
| | Provides utilities for working with NVIDIA GPU Cloud (NGC) CLI to download |
| | NeMo resources, datasets, and other NGC catalog resources. |
| | |
| | This module handles: |
| | - NGC CLI detection and installation |
| | - Resource download from NGC catalog |
| | - Configuration management |
| | - Error handling and retry logic |
| | """ |
| |
|
| | import logging |
| | import os |
| | import shutil |
| | import subprocess |
| | from dataclasses import dataclass |
| | from pathlib import Path |
| | from typing import Any |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | @dataclass |
| | class NGCConfig: |
| | """NGC CLI configuration""" |
| |
|
| | api_key: str | None = None |
| | org: str | None = None |
| | team: str | None = None |
| |
|
| |
|
| | class NGCCLIError(Exception): |
| | """Base exception for NGC CLI operations""" |
| |
|
| |
|
| | class NGCCLINotFoundError(NGCCLIError): |
| | """NGC CLI not found or not installed""" |
| |
|
| |
|
| | class NGCCLIAuthError(NGCCLIError): |
| | """NGC CLI authentication error""" |
| |
|
| |
|
| | class NGCCLIDownloadError(NGCCLIError): |
| | """NGC CLI download error""" |
| |
|
| |
|
| | class NGCCLI: |
| | """ |
| | NGC CLI wrapper for downloading resources from NVIDIA GPU Cloud. |
| | |
| | Supports multiple installation methods: |
| | 1. System-installed ngc in PATH |
| | 2. Local installation at ~/ngc-cli/ngc |
| | 3. Python package via uv (ngc-python-cli) |
| | """ |
| |
|
| | def __init__(self, use_uv: bool = True): |
| | """ |
| | Initialize NGC CLI wrapper. |
| | |
| | Args: |
| | use_uv: If True, prefer uv-based installation if ngc not in PATH |
| | """ |
| | self.use_uv = use_uv |
| | self.ngc_cmd: str | None = None |
| | self.uv_cmd: str | None = None |
| | self._detect_ngc_cli() |
| |
|
| | def _detect_ngc_cli(self) -> None: |
| | """Detect and set up NGC CLI command""" |
| | |
| | if shutil.which("ngc"): |
| | self.ngc_cmd = "ngc" |
| | logger.info("Found NGC CLI in PATH") |
| | return |
| |
|
| | |
| | home_ngc = Path.home() / "ngc-cli" / "ngc" |
| | if home_ngc.exists(): |
| | self.ngc_cmd = str(home_ngc) |
| | |
| | env_path = os.environ.get("PATH", "") |
| | os.environ["PATH"] = f"{home_ngc.parent}:{env_path}" |
| | logger.info(f"Found NGC CLI at {home_ngc}") |
| | return |
| |
|
| | |
| | if self.use_uv: |
| | self._setup_uv_ngc() |
| |
|
| | def _setup_uv_ngc(self) -> None: |
| | """Set up NGC CLI via uv""" |
| | |
| | if shutil.which("uv"): |
| | self.uv_cmd = "uv" |
| | elif (Path.home() / ".local" / "bin" / "uv").exists(): |
| | self.uv_cmd = str(Path.home() / ".local" / "bin" / "uv") |
| | elif (Path.home() / ".cargo" / "bin" / "uv").exists(): |
| | self.uv_cmd = str(Path.home() / ".cargo" / "bin" / "uv") |
| | else: |
| | logger.warning("uv not found, cannot use uv-based NGC CLI") |
| | return |
| |
|
| | |
| | try: |
| | result = subprocess.run( |
| | [self.uv_cmd, "pip", "list"], |
| | capture_output=True, |
| | text=True, |
| | check=False, |
| | ) |
| | if "ngc" in result.stdout.lower(): |
| | self.ngc_cmd = f"{self.uv_cmd} run ngc" |
| | logger.info("Found NGC CLI via uv") |
| | return |
| | except Exception as e: |
| | logger.debug(f"Error checking uv packages: {e}") |
| |
|
| | |
| | |
| | |
| | |
| | logger.debug("NGC CLI must be installed separately from NVIDIA website") |
| |
|
| | def is_available(self) -> bool: |
| | """Check if NGC CLI is available""" |
| | return self.ngc_cmd is not None |
| |
|
| | def ensure_available(self) -> None: |
| | """Ensure NGC CLI is available, raise error if not""" |
| | if not self.is_available(): |
| | raise NGCCLINotFoundError( |
| | "NGC CLI not found. Please install it:\n" |
| | " 1. Download from https://catalog.ngc.nvidia.com\n" |
| | " 2. Or install to ~/ngc-cli/ directory\n" |
| | " 3. Or add to system PATH\n" |
| | "\n" |
| | "Note: NGC CLI is not available as a PyPI package.\n" |
| | "You must download it directly from NVIDIA." |
| | ) |
| |
|
| | def check_config(self) -> dict[str, Any]: |
| | """ |
| | Check NGC CLI configuration. |
| | |
| | Returns: |
| | Configuration dictionary with API key status, org, team, etc. |
| | |
| | Raises: |
| | NGCCLINotFoundError: If NGC CLI is not available |
| | NGCCLIAuthError: If authentication is not configured |
| | """ |
| | self.ensure_available() |
| |
|
| | if self.ngc_cmd is None: |
| | raise NGCCLINotFoundError("NGC CLI command not set") |
| | try: |
| | result = subprocess.run( |
| | [*self.ngc_cmd.split(), "config", "current"], |
| | capture_output=True, |
| | text=True, |
| | check=True, |
| | ) |
| |
|
| | config = {} |
| | |
| | lines = result.stdout.strip().split("\n") |
| | current_key = None |
| |
|
| | for line in lines: |
| | if "|" in line and "| key " not in line.lower() and "---" not in line: |
| | parts = [part.strip() for part in line.split("|") if part.strip()] |
| | if len(parts) >= 3: |
| | key, value, source = parts[0], parts[1], parts[2] |
| | if key: |
| | current_key = key |
| | config[key] = value |
| | elif current_key and value: |
| | config[current_key] += value |
| | elif len(parts) == 1 and current_key: |
| | config[current_key] += parts[0] |
| |
|
| | |
| | |
| | if config and ("apikey" in config or "API key" in config): |
| | return config |
| |
|
| | raise NGCCLIAuthError( |
| | "NGC CLI not configured. Run: ngc config set\n" |
| | "Get your API key from: https://catalog.ngc.nvidia.com" |
| | ) |
| |
|
| | return config |
| | except subprocess.CalledProcessError as e: |
| | raise NGCCLIAuthError(f"Failed to check NGC config: {e.stderr}") from e |
| |
|
| | def set_config( |
| | self, api_key: str, _org: str | None = None, _team: str | None = None |
| | ) -> None: |
| | """ |
| | Configure NGC CLI with API key. |
| | |
| | Args: |
| | api_key: NGC API key from https://catalog.ngc.nvidia.com |
| | _org: Optional organization name (reserved for future use) |
| | _team: Optional team name (reserved for future use) |
| | """ |
| | self.ensure_available() |
| |
|
| | if self.ngc_cmd is None: |
| | raise NGCCLINotFoundError("NGC CLI command not set") |
| | |
| | try: |
| | subprocess.run( |
| | [*self.ngc_cmd.split(), "config", "set"], |
| | input=f"{api_key}\n", |
| | text=True, |
| | check=True, |
| | capture_output=True, |
| | ) |
| | logger.info("NGC CLI configured successfully") |
| | except subprocess.CalledProcessError as e: |
| | raise NGCCLIAuthError(f"Failed to configure NGC CLI: {e.stderr}") from e |
| |
|
| | def download_resource( |
| | self, |
| | resource_path: str, |
| | version: str | None = None, |
| | output_dir: Path | None = None, |
| | extract: bool = True, |
| | ) -> Path: |
| | """ |
| | Download a resource from NGC catalog. |
| | |
| | Args: |
| | resource_path: Resource path in format "org/team/resource" or "nvidia/nemo-microservices/nemo-microservices-quickstart" |
| | version: Optional version tag (e.g., "25.10") |
| | output_dir: Optional output directory (defaults to current directory) |
| | extract: Whether to extract downloaded archive |
| | |
| | Returns: |
| | Path to downloaded/extracted resource |
| | |
| | Raises: |
| | NGCCLINotFoundError: If NGC CLI is not available |
| | NGCCLIAuthError: If authentication failed |
| | NGCCLIDownloadError: If download failed |
| | """ |
| | self.ensure_available() |
| |
|
| | |
| | try: |
| | self.check_config() |
| | except NGCCLIAuthError: |
| | logger.warning("NGC CLI not configured. Attempting download anyway...") |
| |
|
| | if output_dir is None: |
| | output_dir = Path.cwd() |
| | else: |
| | output_dir = Path(output_dir) |
| | output_dir.mkdir(parents=True, exist_ok=True) |
| |
|
| | if self.ngc_cmd is None: |
| | raise NGCCLINotFoundError("NGC CLI command not set") |
| | |
| | cmd = [*self.ngc_cmd.split(), "registry", "resource", "download-version"] |
| |
|
| | resource_spec = f"{resource_path}:{version}" if version else resource_path |
| |
|
| | cmd.append(resource_spec) |
| |
|
| | |
| | original_cwd = Path.cwd() |
| | try: |
| | return self._execute_download_in_directory(output_dir, resource_spec, cmd) |
| | finally: |
| | os.chdir(original_cwd) |
| |
|
| | def _execute_download_in_directory( |
| | self, output_dir: Path, resource_spec: str, cmd: list[str] |
| | ) -> Path: |
| | """ |
| | Execute download command in the specified directory and locate the downloaded resource. |
| | |
| | Args: |
| | output_dir: Directory to download into |
| | resource_spec: Resource specification string for logging |
| | cmd: Command to execute |
| | |
| | Returns: |
| | Path to the downloaded resource (most recently modified item, or output_dir if empty) |
| | |
| | Raises: |
| | NGCCLIDownloadError: If download fails |
| | """ |
| | os.chdir(output_dir) |
| | logger.info(f"Downloading {resource_spec} to {output_dir}...") |
| |
|
| | result = subprocess.run(cmd, capture_output=True, text=True, check=False) |
| |
|
| | if result.returncode != 0: |
| | error_msg = result.stderr or result.stdout |
| | raise NGCCLIDownloadError( |
| | f"Failed to download {resource_spec}:\n{error_msg}" |
| | ) |
| |
|
| | logger.info(f"Successfully downloaded {resource_spec}") |
| |
|
| | if downloaded_items := list(output_dir.iterdir()): |
| | |
| | return max(downloaded_items, key=lambda p: p.stat().st_mtime) |
| |
|
| | return output_dir |
| |
|
| | def list_resources( |
| | self, org: str | None = None, team: str | None = None |
| | ) -> list[dict[str, Any]]: |
| | """ |
| | List available resources in NGC catalog. |
| | |
| | Args: |
| | org: Optional organization filter |
| | team: Optional team filter |
| | |
| | Returns: |
| | List of resource dictionaries |
| | """ |
| | self.ensure_available() |
| |
|
| | if self.ngc_cmd is None: |
| | raise NGCCLINotFoundError("NGC CLI command not set") |
| | cmd = [*self.ngc_cmd.split(), "registry", "resource", "list"] |
| |
|
| | if org: |
| | cmd.extend(["--org", org]) |
| | if team: |
| | cmd.extend(["--team", team]) |
| |
|
| | try: |
| | subprocess.run(cmd, capture_output=True, text=True, check=True) |
| |
|
| | |
| | |
| | return [] |
| | except subprocess.CalledProcessError as e: |
| | logger.warning(f"Failed to list resources: {e.stderr}") |
| | return [] |
| |
|
| |
|
| | def get_ngc_cli(use_uv: bool = True) -> NGCCLI: |
| | """ |
| | Get an NGC CLI instance. |
| | |
| | Args: |
| | use_uv: If True, prefer uv-based installation |
| | |
| | Returns: |
| | NGCCLI instance |
| | """ |
| | return NGCCLI(use_uv=use_uv) |
| |
|
| |
|
| | def ensure_ngc_cli_configured(api_key: str | None = None) -> NGCCLI: |
| | """ |
| | Ensure NGC CLI is available and configured. |
| | |
| | Args: |
| | api_key: Optional API key to configure (if not already configured) |
| | |
| | Returns: |
| | Configured NGCCLI instance |
| | |
| | Raises: |
| | NGCCLINotFoundError: If NGC CLI cannot be found or installed |
| | NGCCLIAuthError: If configuration fails |
| | """ |
| | cli = get_ngc_cli() |
| |
|
| | if not cli.is_available(): |
| | raise NGCCLINotFoundError( |
| | "NGC CLI not available. Install with:\n" |
| | " uv pip install nvidia-pyindex nvidia-nim ngc-python-cli\n" |
| | "Or download from: https://catalog.ngc.nvidia.com" |
| | ) |
| |
|
| | |
| | try: |
| | cli.check_config() |
| | return cli |
| | except NGCCLIAuthError as err: |
| | if api_key: |
| | cli.set_config(api_key) |
| | return cli |
| | raise NGCCLIAuthError( |
| | "NGC CLI not configured. Provide API key or run: ngc config set\n" |
| | "Get API key from: https://catalog.ngc.nvidia.com" |
| | ) from err |
| |
|