#!/usr/bin/python3
"""
Hardware information collector for Prometheus Node Exporter.

This module collects detailed hardware information about CPUs, memory, network
interfaces, disks, and GPUs/accelerators, and outputs them in Prometheus metric format.
"""
import subprocess
import re
import os
from typing import Dict, List, Optional, Tuple, Any
from pathlib import Path


# ============================================================================
# Constants
# ============================================================================

# Virtual environment vendors
VIRTUAL_VENDORS = [
    'Virtuozzo', 'VMware', 'QEMU', 'Microsoft', 'Xen', 'KVM', 
    'VirtualBox', 'Parallels', 'Red Hat'
]

# Virtual CPU models
VIRTUAL_CPU_MODELS = [
    'Virtual CPU', 'Common KVM processor', 'QEMU Virtual CPU', 
    'VMware Virtual Platform'
]

# Virtual network drivers
VIRTUAL_NETWORK_DRIVERS = [
    'virtio_net', 'vmxnet3', 'e1000', 'pcnet32', 'xen-netfront'
]

# Virtual network vendors (for NICs)
VIRTUAL_NIC_VENDORS = ['Red Hat', 'QEMU', 'VMware', 'Microsoft', 'Xen']

# Virtual network models
VIRTUAL_NIC_MODELS = ['Virtio', 'VMware', 'Virtual', 'Xen']

# Virtual speed map for network interfaces
VIRTUAL_SPEED_MAP = {
    'virtio_net': '10000',
    'vmxnet3': '10000',
    'e1000': '1000',
    'pcnet32': '100'
}

# Virtual disk vendors
VIRTUAL_DISK_VENDORS = [
    'QEMU', 'VMware', 'Virtual', 'VirtualBox', 'Xen', 'Amazon', 
    'Google', 'Microsoft', 'Red Hat', 'Virtio', 'Tenstorrent'
]

# Virtual disk models
VIRTUAL_DISK_MODELS = [
    'Virtual disk', 'VBOX', 'VMware', 'Amazon EC2', 
    'Google PersistentDisk', 'Virtio'
]

# PCI vendor ID to name mapping
PCI_VENDOR_MAP = {
    '0x8086': 'Intel',
    '0x14e4': 'Broadcom',
    '0x10ee': 'Xilinx',
    '0x15b3': 'Mellanox',
    '0x1924': 'Solarflare',
    '0x1af4': 'Red Hat',
    '0x1d0f': 'Amazon',
    '0x1b36': 'QEMU'
}

# Disk vendor patterns for extraction from model names
DISK_VENDOR_PATTERNS = [
    ('INTEL', 'Intel'),
    ('SAMSUNG', 'Samsung'),
    ('WDC', 'Western Digital'),
    ('ST', 'Seagate'),
    ('HGST', 'HGST'),
    ('TOSHIBA', 'Toshiba'),
    ('CRUCIAL', 'Crucial'),
    ('KINGSTON', 'Kingston'),
    ('SANDISK', 'SanDisk'),
    ('MICRON', 'Micron'),
    ('SK HYNIX', 'SK Hynix'),
    ('HYNIX', 'SK Hynix'),
    ('ADATA', 'ADATA'),
    ('CORSAIR', 'Corsair'),
    ('TRANSCEND', 'Transcend'),
    ('PLEXTOR', 'Plextor'),
    ('OCZ', 'OCZ'),
    ('PATRIOT', 'Patriot'),
    ('MUSHKIN', 'Mushkin'),
    ('SEAGATE', 'Seagate'),
    ('HITACHI', 'Hitachi'),
    ('MAXTOR', 'Maxtor'),
    ('FUJITSU', 'Fujitsu')
]

# Disk PCI vendor map
DISK_PCI_VENDOR_MAP = {
    '0x1af4': 'Red Hat (Virtio)',
    '0x1b36': 'QEMU',
    '0x1df0': 'Tenstorrent',
    '0x8086': 'Intel',
    '0x1022': 'AMD',
    '0x1095': 'Silicon Image',
    '0x1b4b': 'Marvell',
    '0x15b7': 'SanDisk',
    '0x144d': 'Samsung',
    '0x1179': 'Toshiba',
    '0x1c5c': 'SK Hynix',
    '0xc0a9': 'Micron',
    '0x1987': 'Phison',
    '0x126f': 'Silicon Motion',
    '0x1d97': 'Realtek',
    '0x1cc1': 'ADATA'
}

# RAID controller identifiers
RAID_CONTROLLERS = ['megaraid', 'mpt2sas', 'mpt3sas', 'aacraid', 'hpsa', 'cciss']

# Devices to exclude from disk scanning
EXCLUDED_DISK_PREFIXES = ('loop', 'ram', 'dm-', 'sr', 'zram', 'nbd')

# GPU data center keywords
GPU_DATACENTER_KEYWORDS = [
    '3d controller', 'processing accelerator', 'co-processor', 'neural network',
    'tesla', 'a100', 'a40', 'a30', 'a10', 'a16', 'a2',
    'h100', 'h200', 'v100', 'p100', 'p40', 't4', 'l4', 'l40',
    'instinct', 'mi100', 'mi200', 'mi250', 'mi300',
    'ponte vecchio', 'arctic sound', 'flex', 'data center gpu', 
    'max 1', 'max 2', 'max 3', 'gaudi', 'goya', 'ipu', 'tpu',
    'alveo', 'versal'
]

# GPU exclude keywords (storage, network, audio)
GPU_EXCLUDE_KEYWORDS = [
    'sata controller', 'ahci', 'ide interface', 'raid bus controller',
    'mass storage controller', 'non-volatile memory controller',
    'ethernet controller', 'network controller', 'fibre channel',
    'usb controller', 'serial attached scsi', 'sas controller',
    'audio', 'audio device', 'multimedia'
]

# Consumer GPU skip patterns
CONSUMER_GPU_PATTERNS = [
    'geforce', 'radeon rx', 'hd graphics', 'uhd graphics', 
    'iris', 'arc a3', 'arc a5', 'arc a7'
]

# Virtual GPU vendors and keywords
VIRTUAL_GPU_VENDORS = [
    'VMware', 'Red Hat', 'QEMU', 'Microsoft', 'Xen', 
    'VirtualBox', 'Cirrus Logic'
]

VIRTUAL_GPU_KEYWORDS = [
    'virtio', 'virtual', 'vmware', 'qemu', 'cirrus', 'vga compatible',
    'cirrus logic', 'vmware svga', 'qxl', 'virtio-gpu', 'bochs'
]

# Intel Data Center GPU device ID map
INTEL_DATACENTER_GPU_MAP = {
    # Ponte Vecchio (Max Series)
    '0x0bd0': ('Intel Data Center GPU Max 1550', 'Ponte Vecchio'),
    '0x0bd4': ('Intel Data Center GPU Max 1550', 'Ponte Vecchio'),
    '0x0bd5': ('Intel Data Center GPU Max 1100', 'Ponte Vecchio'),
    '0x0bd6': ('Intel Data Center GPU Max 1100C', 'Ponte Vecchio'),
    '0x0bd7': ('Intel Data Center GPU Max 1350', 'Ponte Vecchio'),
    '0x0bd8': ('Intel Data Center GPU Max 1450', 'Ponte Vecchio'),
    '0x0bd9': ('Intel Data Center GPU Max 1550', 'Ponte Vecchio'),
    '0x0bda': ('Intel Data Center GPU Max 1100', 'Ponte Vecchio'),
    '0x0bdb': ('Intel Data Center GPU Max 1550', 'Ponte Vecchio'),
    # Arctic Sound-M (Flex Series)
    '0x56c0': ('Intel Data Center GPU Flex 170', 'Arctic Sound-M'),
    '0x56c1': ('Intel Data Center GPU Flex 140', 'Arctic Sound-M'),
    '0x56c2': ('Intel Data Center GPU Flex Series', 'Arctic Sound-M'),
    # DG1
    '0x4905': ('Intel DG1', 'DG1'),
    '0x4906': ('Intel DG1', 'DG1'),
    '0x4907': ('Intel DG1', 'DG1'),
    '0x4908': ('Intel DG1', 'DG1'),
}


# ============================================================================
# Helper Functions
# ============================================================================

def _run_command(cmd: List[str], timeout: Optional[int] = None) -> Optional[str]:
    """
    Run a command and return its output.
    
    Args:
        cmd: Command and arguments as a list
        timeout: Optional timeout in seconds
        
    Returns:
        Command output as string, or None if command fails
    """
    try:
        result = subprocess.check_output(
            cmd, 
            text=True, 
            stderr=subprocess.DEVNULL,
            timeout=timeout
        )
        return result
    except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError):
        return None


def _read_sysfs_file(path: str) -> Optional[str]:
    """
    Read a sysfs file and return its contents.
    
    Args:
        path: Path to the sysfs file
        
    Returns:
        File contents stripped of whitespace, or None if read fails
    """
    try:
        with open(path, 'r') as f:
            content = f.read().strip()
            return content if content else None
    except (FileNotFoundError, PermissionError, OSError):
        return None


def _get_pci_path(pci_addr: str, *subpaths: str) -> Path:
    """
    Construct a PCI device sysfs path.
    
    Args:
        pci_addr: PCI address (e.g., "0000:01:00.0" or "01:00.0")
        *subpaths: Optional subpaths to append
        
    Returns:
        Path object for the PCI device path
    """
    # Normalize PCI address to include domain if missing
    if pci_addr.count(':') == 1:
        pci_addr = f"0000:{pci_addr}"
    
    base_path = Path(f"/sys/bus/pci/devices/{pci_addr}")
    if subpaths:
        return base_path.joinpath(*subpaths)
    return base_path


def _try_pci_formats(pci_addr: str, operation, *args, **kwargs) -> Optional[Any]:
    """
    Try an operation with different PCI address formats.
    
    Args:
        pci_addr: PCI address to try
        operation: Function to call with the formatted PCI address
        *args: Additional arguments for the operation
        **kwargs: Additional keyword arguments for the operation
        
    Returns:
        Result of the operation, or None if all formats fail
    """
    formats = []
    if ':' in pci_addr:
        parts = pci_addr.split(':')
        if len(parts) == 2:
            formats.append(f"0000:{pci_addr}")
            formats.append(pci_addr)
        elif len(parts) == 3:
            formats.append(pci_addr)
    else:
        formats.append(f"0000:{pci_addr}")
    
    for pci_format in formats:
        try:
            result = operation(pci_format, *args, **kwargs)
            if result is not None:
                return result
        except Exception:
            continue
    
    return None


# ============================================================================
# Memory Information
# ============================================================================

def parse_dmidecode() -> List[Dict[str, Any]]:
    """
    Parse dmidecode output and return memory information for each DIMM.
    
    Returns:
        List of dictionaries containing DIMM information (vendor, type, size, etc.)
    """
    result = []
    output = _run_command(['dmidecode', '-t', 'memory'])
    if not output:
        print("# ERROR: Failed to run dmidecode")
        return result

    dimm = {}
    ecc = "Unknown"

    for line in output.splitlines():
        line = line.strip()
        if line.startswith('Physical Memory Array'):
            dimm = {}
        elif line.startswith('Error Correction Type:'):
            ecc = line.split(':', 1)[1].strip() or 'Unknown'
        elif line.startswith('Memory Device'):
            if dimm:
                result.append(dimm)
            dimm = {'ecc': ecc}
        elif line.startswith('Manufacturer:'):
            vendor = line.split(':', 1)[1].strip() or 'Unknown'
            dimm['vendor'] = vendor
            dimm['hardware'] = 'false' if any(v.lower() in vendor.lower() for v in VIRTUAL_VENDORS) else 'true'
        elif line.startswith('Type:'):
            dimm['type'] = line.split(':', 1)[1].strip() or 'Unknown'
        elif line.startswith('Size:') and 'No' not in line:
            size_match = re.search(r'(\d+)', line)
            dimm['size'] = int(size_match.group(1)) if size_match else 'Unknown'
        elif line.startswith('Locator:'):
            dimm['locator'] = line.split(':', 1)[1].strip() or 'Unknown'
        elif line.startswith('Speed:'):
            dimm['speed'] = line.split(':', 1)[1].strip() or 'Unknown'

    if dimm:
        for key in ['vendor', 'type', 'size', 'hardware', 'locator', 'speed']:
            if not dimm.get(key):
                dimm[key] = 'Unknown'
        result.append(dimm)

    return [d for d in result if isinstance(d.get('size'), int) and d.get('locator') not in [None, 'Unknown']]


def parse_lscpu() -> List[Dict[str, Any]]:
    """
    Parse lscpu output and return CPU information for each socket.
    
    Returns:
        List of dictionaries containing CPU information for each socket
    """
    output = _run_command(['lscpu'])
    if not output:
        print("# ERROR: Failed to run lscpu")
        return []

    info = {}
    for line in output.splitlines():
        if not line.strip() or ':' not in line:
            continue
        key, value = [x.strip() for x in line.split(':', 1)]
        info[key] = value

    # Extract required fields
    model = info.get('Model name', 'Unknown')
    vendor = info.get('BIOS Vendor ID', 'Unknown')
    numa_nodes = info.get('NUMA node(s)', 'Unknown')
    threads_per_core = int(info.get('Thread(s) per core', '1'))
    cores_per_socket = int(info.get('Core(s) per socket', '1'))
    sockets = int(info.get('Socket(s)', '1'))
    features = info.get('Flags', 'Unknown')
    
    # Detect virtual environment
    is_hardware = 'true'
    if any(v.lower() in val.lower() for v in VIRTUAL_VENDORS for val in [vendor, model]):
        is_hardware = 'false'
    elif any(vm.lower() in model.lower() for vm in VIRTUAL_CPU_MODELS):
        is_hardware = 'false'

    # Get microcode info from /proc/cpuinfo
    microcode_info = _get_cpu_microcode_info()

    # Create separate entry for each socket
    cpu_infos = []
    for socket_id in range(sockets):
        cpu_info = {
            'model': model,
            'vendor': vendor,
            'numa_nodes': numa_nodes,
            'multithreading': 'true' if threads_per_core > 1 else 'false',
            'cores': str(cores_per_socket),
            'features': features,
            'socket': str(socket_id),
            'hardware': is_hardware,
            'microcode': microcode_info.get(str(socket_id), 'Unknown')
        }
        cpu_infos.append(cpu_info)

    return cpu_infos


def _get_cpu_microcode_info() -> Dict[str, str]:
    """
    Extract microcode information from /proc/cpuinfo.
    
    Returns:
        Dictionary mapping socket ID to microcode version
    """
    microcode_info = {}
    try:
        with open('/proc/cpuinfo', 'r') as f:
            content = f.read()
        processors = content.split('\n\n')
        for processor in processors:
            if not processor.strip():
                continue
            processor_data = {}
            for line in processor.split('\n'):
                if ':' in line:
                    key, value = [x.strip() for x in line.split(':', 1)]
                    processor_data[key] = value
            if 'physical id' in processor_data and 'microcode' in processor_data:
                socket_id = processor_data['physical id']
                microcode_info[socket_id] = processor_data.get('microcode', 'Unknown')
    except (FileNotFoundError, PermissionError):
        pass
    
    return microcode_info


# ============================================================================
# Network Interface Information
# ============================================================================

def _get_lshw_nic_info() -> Dict[str, Dict[str, str]]:
    """
    Get hardware information using lshw for network interfaces.
    
    Returns:
        Dictionary mapping interface name to hardware info
    """
    nic_hardware_info = {}
    output = _run_command(['lshw', '-class', 'network'])
    if not output:
        return nic_hardware_info

    current_device = {}

    for line in output.splitlines():
        line = line.strip()
        if not line:
            continue
            
        if line.startswith('*-network'):
            if current_device and 'logical name' in current_device:
                nic_hardware_info[current_device['logical name']] = current_device
            current_device = {}
        elif line.startswith('description:'):
            current_device['description'] = line.split(':', 1)[1].strip()
        elif line.startswith('product:'):
            current_device['product'] = line.split(':', 1)[1].strip()
        elif line.startswith('vendor:'):
            current_device['vendor'] = line.split(':', 1)[1].strip()
        elif line.startswith('logical name:'):
            current_device['logical name'] = line.split(':', 1)[1].strip()

    # Add the last device
    if current_device and 'logical name' in current_device:
        nic_hardware_info[current_device['logical name']] = current_device

    return nic_hardware_info


def _get_network_interfaces() -> List[str]:
    """
    Get list of network interfaces excluding loopback.
    
    Returns:
        List of network interface names
    """
    interfaces = []
    output = _run_command(['ip', 'link', 'show'])
    if not output:
        return interfaces
    
    for line in output.splitlines():
        if ':' in line and not line.startswith(' '):
            parts = line.split(':')
            if len(parts) >= 2:
                interface_name = parts[1].strip()
                if interface_name and not interface_name.startswith('lo'):
                    interfaces.append(interface_name)
    
    return interfaces


def _get_driver_info(interface: str) -> str:
    """
    Get driver information for a network interface.
    
    Args:
        interface: Network interface name
        
    Returns:
        Driver name or 'Unknown'
    """
    try:
        driver_path = f"/sys/class/net/{interface}/device/driver"
        if os.path.exists(driver_path):
            return os.path.basename(os.readlink(driver_path))
    except (FileNotFoundError, OSError):
        pass
    return 'Unknown'


def _is_virtual_interface(vendor: str, driver: str, model: str) -> bool:
    """
    Check if a network interface is virtual.
    
    Args:
        vendor: Vendor name
        driver: Driver name
        model: Model name
        
    Returns:
        True if interface is virtual, False otherwise
    """
    return (vendor in VIRTUAL_NIC_VENDORS or
            driver in VIRTUAL_NETWORK_DRIVERS or
            any(vm in model for vm in VIRTUAL_NIC_MODELS))


def _get_firmware_version(interface: str) -> str:
    """
    Get firmware version for a network interface.
    
    Args:
        interface: Network interface name
        
    Returns:
        Firmware version or 'Unknown'
    """
    # Try ethtool first
    output = _run_command(['ethtool', '-i', interface])
    if output:
        for line in output.splitlines():
            if line.startswith('firmware-version:'):
                fw_version = line.split(':', 1)[1].strip()
                if fw_version and fw_version not in ['', 'N/A', 'n/a']:
                    return fw_version

    # Fallback to sysfs
    firmware_path = f"/sys/class/net/{interface}/device/firmware_version"
    fw_version = _read_sysfs_file(firmware_path)
    if fw_version:
        return fw_version

    return 'Unknown'


def _get_speed_mbps(interface: str, driver: str, is_hardware: str) -> str:
    """
    Get speed in Mbps for a network interface.
    
    Args:
        interface: Network interface name
        driver: Driver name
        is_hardware: 'true' if hardware, 'false' if virtual
        
    Returns:
        Speed in Mbps or 'Unknown'
    """
    if is_hardware == 'false':
        # Virtual interfaces - use theoretical maximums based on driver
        return VIRTUAL_SPEED_MAP.get(driver, 'Unknown')

    # Hardware interfaces - try ethtool first
    output = _run_command(['ethtool', interface])
    if output:
        for line in output.splitlines():
            if 'Speed:' in line:
                speed_match = re.search(r'(\d+)', line)
                if speed_match:
                    speed = speed_match.group(1)
                    # Convert to Mbps if needed
                    if 'Gb/s' in line:
                        speed = str(int(speed) * 1000)
                    return speed

    # Fallback to sysfs for hardware
    speed_path = f"/sys/class/net/{interface}/speed"
    speed = _read_sysfs_file(speed_path)
    if speed and speed != '-1' and speed.isdigit():
        return speed

    return 'Unknown'


def _get_link_status(interface: str) -> str:
    """
    Get link status for a network interface.
    
    Args:
        interface: Network interface name
        
    Returns:
        'true' if link is up, 'false' otherwise
    """
    # Try sysfs first
    carrier_path = f"/sys/class/net/{interface}/carrier"
    carrier = _read_sysfs_file(carrier_path)
    if carrier:
        return 'true' if carrier == '1' else 'false'

    # Fallback to ethtool
    output = _run_command(['ethtool', interface])
    if output:
        for line in output.splitlines():
            if 'Link detected:' in line:
                link_status = line.split(':', 1)[1].strip().lower()
                return 'true' if link_status == 'yes' else 'false'

    return 'false'


def _check_rdma_capability(interface: str) -> str:
    """
    Check if a network interface has RDMA capability.
    
    Args:
        interface: Network interface name
        
    Returns:
        'true' if RDMA capable, 'false' otherwise
    """
    # Method 1: Check for InfiniBand class devices
    try:
        rdma_devices_path = "/sys/class/infiniband"
        if os.path.exists(rdma_devices_path):
            device_path = f"/sys/class/net/{interface}/device"
            if os.path.exists(device_path):
                device_real_path = os.path.realpath(device_path)
                pci_addr = os.path.basename(device_real_path)

                for rdma_dev in os.listdir(rdma_devices_path):
                    rdma_device_path = f"{rdma_devices_path}/{rdma_dev}/device"
                    if os.path.exists(rdma_device_path):
                        rdma_real_path = os.path.realpath(rdma_device_path)
                        if pci_addr in rdma_real_path:
                            return 'true'
    except (FileNotFoundError, OSError):
        pass

    # Method 2: Check for RoCE capability
    roce_path = f"/sys/class/net/{interface}/device/roce"
    if os.path.exists(roce_path):
        return 'true'

    return 'false'


def _check_sriov_capability(interface: str) -> str:
    """
    Check if a network interface has SR-IOV capability.
    
    Args:
        interface: Network interface name
        
    Returns:
        'true' if SR-IOV capable, 'false' otherwise
    """
    # Check if device supports SR-IOV (has sriov_totalvfs file)
    sriov_totalvfs_path = f"/sys/class/net/{interface}/device/sriov_totalvfs"
    total_vfs = _read_sysfs_file(sriov_totalvfs_path)
    if total_vfs and total_vfs.isdigit() and int(total_vfs) > 0:
        return 'true'

    # Fallback: check if there are currently configured VFs
    sriov_numvfs_path = f"/sys/class/net/{interface}/device/sriov_numvfs"
    num_vfs = _read_sysfs_file(sriov_numvfs_path)
    if num_vfs and num_vfs.isdigit() and int(num_vfs) > 0:
        return 'true'

    return 'false'


def _get_vendor_from_pci(interface: str) -> str:
    """
    Get vendor information from PCI ID.
    
    Args:
        interface: Network interface name
        
    Returns:
        Vendor name or 'Unknown'
    """
    vendor_path = f"/sys/class/net/{interface}/device/vendor"
    vendor_id = _read_sysfs_file(vendor_path)
    if vendor_id:
        return PCI_VENDOR_MAP.get(vendor_id, 'Unknown')
    return 'Unknown'


def parse_nic_info() -> List[Dict[str, Any]]:
    """
    Parse network interface information and return NIC details.
    
    Returns:
        List of dictionaries containing NIC information
    """
    result = []
    nic_hardware_info = _get_lshw_nic_info()
    interfaces = _get_network_interfaces()

    if not interfaces:
        return result

    for interface in interfaces:
        nic_info = {
            'device': interface,
            'model': 'Unknown',
            'vendor': 'Unknown',
            'driver': 'Unknown',
            'firmware_version': 'Unknown',
            'speed_mbps': 'Unknown',
            'link': 'false',
            'rdma': 'false',
            'sriov': 'false',
            'hardware': 'true',
        }

        # Get basic interface information
        nic_info['driver'] = _get_driver_info(interface)

        # Get hardware info from lshw if available
        if interface in nic_hardware_info:
            hw_info = nic_hardware_info[interface]
            nic_info['model'] = hw_info.get('product', 'Unknown')
            nic_info['vendor'] = hw_info.get('vendor', 'Unknown')

        # Check if it's a virtual interface
        if _is_virtual_interface(nic_info['vendor'], nic_info['driver'], nic_info['model']):
            nic_info['hardware'] = 'false'

        # Get additional interface information
        nic_info['firmware_version'] = _get_firmware_version(interface)
        nic_info['speed_mbps'] = _get_speed_mbps(interface, nic_info['driver'], nic_info['hardware'])
        nic_info['link'] = _get_link_status(interface)
        nic_info['rdma'] = _check_rdma_capability(interface)
        nic_info['sriov'] = _check_sriov_capability(interface)

        # Fallback to driver name if model is unknown
        if nic_info['model'] == 'Unknown':
            nic_info['model'] = nic_info['driver']

        # Fallback to PCI vendor info if vendor is unknown
        if nic_info['vendor'] == 'Unknown':
            nic_info['vendor'] = _get_vendor_from_pci(interface)
            # Recheck hardware status with PCI vendor info
            if _is_virtual_interface(nic_info['vendor'], nic_info['driver'], nic_info['model']):
                nic_info['hardware'] = 'false'

        # Only add interfaces that have meaningful information
        if nic_info['model'] != 'Unknown' or nic_info['vendor'] != 'Unknown':
            result.append(nic_info)

    return result


def print_memory_prometheus_metric(dimms: List[Dict[str, Any]]) -> None:
    """
    Print memory information in Prometheus metric format.
    
    Args:
        dimms: List of DIMM information dictionaries
    """
    metric = 'node_hardware_memory_info'
    print(f'# HELP {metric} Memory info for each DIMM')
    print(f'# TYPE {metric} gauge')
    for dimm in dimms:
        labels = ','.join([
            f'vendor="{dimm.get("vendor", "")}"',
            f'type="{dimm.get("type", "")}"',
            f'ecc="{dimm.get("ecc", "")}"',
            f'hardware="{dimm.get("hardware", "")}"',
            f'locator="{dimm.get("locator", "")}"',
            f'speed="{dimm.get("speed", "")}"',
            f'size="{dimm.get("size", "")}"'
        ])
        print(f'{metric}{{{labels}}} 1')


def print_cpu_prometheus_metric(cpu_infos: List[Dict[str, Any]]) -> None:
    """
    Print CPU information in Prometheus metric format.
    
    Args:
        cpu_infos: List of CPU information dictionaries
    """
    metric = 'node_hardware_cpu_info'
    print(f'# HELP {metric} CPU info')
    print(f'# TYPE {metric} gauge')
    for cpu_info in cpu_infos:
        labels = ','.join([
            f'model="{cpu_info.get("model", "")}"',
            f'vendor="{cpu_info.get("vendor", "")}"',
            f'numa_nodes="{cpu_info.get("numa_nodes", "")}"',
            f'multithreading="{cpu_info.get("multithreading", "")}"',
            f'cores="{cpu_info.get("cores", "")}"',
            f'features="{cpu_info.get("features", "")}"',
            f'socket="{cpu_info.get("socket", "")}"',
            f'hardware="{cpu_info.get("hardware", "")}"',
            f'microcode="{cpu_info.get("microcode", "")}"'
        ])
        print(f'{metric}{{{labels}}} 1')


# ============================================================================
# Disk Information
# ============================================================================

def _get_disk_devices() -> List[str]:
    """
    Get list of physical disk devices (excluding partitions and loop devices).
    
    Returns:
        List of disk device names
    """
    devices = []
    
    # Try lsblk first
    output = _run_command(['lsblk', '-d', '-n', '-o', 'NAME,TYPE'])
    if output:
        for line in output.splitlines():
            parts = line.split()
            if len(parts) >= 2:
                device_name = parts[0]
                device_type = parts[1]
                # Include only disk types, exclude loop, rom, and lvm devices
                if device_type == 'disk':
                    devices.append(device_name)
    else:
        # Fallback: scan /sys/block
        try:
            block_path = "/sys/block"
            if os.path.exists(block_path):
                for device in os.listdir(block_path):
                    # Skip loop, ram, dm, nbd, and other non-physical devices
                    if not device.startswith(EXCLUDED_DISK_PREFIXES):
                        devices.append(device)
        except (FileNotFoundError, OSError):
            pass
    
    # Filter out NBD (Network Block Device) devices
    devices = [d for d in devices if not d.startswith('nbd')]
    
    return devices


def _get_disk_smartctl_info(device: str) -> Dict[str, str]:
    """
    Get disk information from smartctl.
    
    Args:
        device: Disk device name
        
    Returns:
        Dictionary containing disk information from smartctl
    """
    info = {
        'vendor': 'Unknown',
        'model': 'Unknown',
        'serial_number': 'Unknown',
        'firmware': 'Unknown',
        'disk_status': 'Unknown'
    }
    
    # Try with /dev/device
    output = _run_command(['smartctl', '-i', '-H', f'/dev/{device}'])
    if output:
        for line in output.splitlines():
            line = line.strip()
            if line.startswith('Vendor:'):
                info['vendor'] = line.split(':', 1)[1].strip() or 'Unknown'
            elif line.startswith('Device Model:') or line.startswith('Model Number:') or line.startswith('Product:'):
                info['model'] = line.split(':', 1)[1].strip() or 'Unknown'
            elif line.startswith('Serial Number:') or line.startswith('Serial number:'):
                info['serial_number'] = line.split(':', 1)[1].strip() or 'Unknown'
            elif line.startswith('Firmware Version:') or line.startswith('Revision:'):
                info['firmware'] = line.split(':', 1)[1].strip() or 'Unknown'
            elif 'SMART overall-health' in line or 'SMART Health Status:' in line:
                if 'PASSED' in line or 'OK' in line:
                    info['disk_status'] = 'ok'
                else:
                    info['disk_status'] = 'failed'
    
    return info


def _get_disk_type(device: str) -> str:
    """
    Determine disk controller type (nvme, ssd, hdd, etc.).
    
    Args:
        device: Disk device name
        
    Returns:
        Disk controller type string
    """
    # Check if NVMe
    if device.startswith('nvme'):
        return 'nvme'
    
    # Check if virtio (virtual disk) - do this before rotational check
    # as virtio devices may report rotational status
    if device.startswith('vd'):
        return 'virtio'
    
    # Check device path for virtio
    device_path = f"/sys/block/{device}/device"
    if os.path.exists(device_path):
        try:
            device_real_path = os.path.realpath(device_path)
            if 'virtio' in device_real_path.lower():
                return 'virtio'
        except OSError:
            pass
    
    # Check rotational status for SSD/HDD
    rotational_path = f"/sys/block/{device}/queue/rotational"
    rotational = _read_sysfs_file(rotational_path)
    if rotational:
        if rotational == '0':
            return 'ssd'
        elif rotational == '1':
            return 'hdd'
    
    return 'Unknown'


def _is_virtual_disk(vendor: str, model: str, device: str) -> bool:
    """
    Check if a disk is virtual based on vendor, model, or device path.
    
    Args:
        vendor: Disk vendor name
        model: Disk model name
        device: Disk device name
        
    Returns:
        True if disk is virtual, False otherwise
    """
    # Check vendor and model
    if any(v.lower() in vendor.lower() for v in VIRTUAL_DISK_VENDORS):
        return True
    if any(v.lower() in model.lower() for v in VIRTUAL_DISK_MODELS):
        return True
    
    # Check for virtio or vd* devices
    if device.startswith('vd') or 'virtio' in device:
        return True
    
    # Check device path for virtio
    device_path = f"/sys/block/{device}/device"
    if os.path.exists(device_path):
        try:
            device_real_path = os.path.realpath(device_path)
            if 'virtio' in device_real_path.lower():
                return True
        except OSError:
            pass
    
    return False


def _check_hardware_raid(device: str) -> str:
    """
    Check if disk is part of a hardware RAID array.
    
    Args:
        device: Disk device name
        
    Returns:
        'true' if part of hardware RAID, 'false' otherwise
    """
    # Check if device is under a RAID controller
    device_path = f"/sys/block/{device}/device"
    if os.path.exists(device_path):
        try:
            device_real_path = os.path.realpath(device_path)
            # Common RAID controller identifiers in path
            if any(rc in device_real_path.lower() for rc in RAID_CONTROLLERS):
                return 'true'
        except OSError:
            pass
    
    # Check using lsscsi for RAID controllers
    output = _run_command(['lsscsi'])
    if output:
        for line in output.splitlines():
            if device in line and any(raid in line.lower() for raid in ['raid', 'logical volume']):
                return 'true'
    
    return 'false'


def _check_jbod(device: str) -> str:
    """
    Check if disk is in JBOD mode (individual disk, not in RAID).
    
    Args:
        device: Disk device name
        
    Returns:
        'true' if JBOD, 'false' otherwise
    """
    # If it's a hardware RAID, it's not JBOD
    if _check_hardware_raid(device) == 'true':
        return 'false'
    
    # Check for MD (software RAID) devices
    try:
        md_path = f"/sys/block/{device}/md"
        if os.path.exists(md_path):
            return 'false'
    except Exception:
        pass
    
    # Check if device is a slave to an MD device
    try:
        slaves_path = f"/sys/block/{device}/slaves"
        if os.path.exists(slaves_path) and os.listdir(slaves_path):
            return 'false'
    except Exception:
        pass
    
    # Check if device has holders (LVM, etc.)
    try:
        holders_path = f"/sys/block/{device}/holders"
        if os.path.exists(holders_path):
            holders = os.listdir(holders_path)
            # If there are md holders, it's not JBOD
            if any(h.startswith('md') for h in holders):
                return 'false'
    except Exception:
        pass
    
    # Default to true (standalone disk)
    return 'true'


def _get_instance_name() -> str:
    """
    Get instance or node name.
    
    Returns:
        Hostname or 'Unknown'
    """
    output = _run_command(['hostname'])
    return output.strip() if output else 'Unknown'


def _extract_vendor_from_model(model: str) -> str:
    """
    Extract vendor name from disk model string.
    
    Args:
        model: Disk model name
        
    Returns:
        Extracted vendor name or 'Unknown'
    """
    if not model or model == 'Unknown':
        return 'Unknown'
    
    model_upper = model.upper()
    for pattern, vendor_name in DISK_VENDOR_PATTERNS:
        if model_upper.startswith(pattern) or f' {pattern}' in model_upper:
            return vendor_name
    
    return 'Unknown'


def _get_disk_vendor_model_from_sysfs(device: str) -> Dict[str, str]:
    """
    Get vendor and model from sysfs if smartctl fails.
    
    Args:
        device: Disk device name
        
    Returns:
        Dictionary with vendor and model information
    """
    info = {'vendor': 'Unknown', 'model': 'Unknown'}
    
    device_path = f"/sys/block/{device}/device"
    
    # Check if we can get vendor ID from PCI
    if os.path.exists(device_path):
        pci_vendor_path = os.path.join(device_path, "vendor")
        vendor_id = _read_sysfs_file(pci_vendor_path)
        if vendor_id:
            info['vendor'] = DISK_PCI_VENDOR_MAP.get(vendor_id, vendor_id)
        else:
            # Try reading vendor name directly (for SCSI/SATA)
            vendor_id_path = f"/sys/block/{device}/device/vendor"
            vendor_name = _read_sysfs_file(vendor_id_path)
            if vendor_name and vendor_name != 'Unknown':
                info['vendor'] = vendor_name
    
    # Get model
    model_path = f"/sys/block/{device}/device/model"
    model = _read_sysfs_file(model_path)
    if model:
        info['model'] = model
    
    return info


def parse_disk_info() -> List[Dict[str, Any]]:
    """
    Parse disk information and return disk details.
    
    Returns:
        List of dictionaries containing disk information
    """
    result = []
    devices = _get_disk_devices()
    instance = _get_instance_name()
    
    if not devices:
        return result
    
    for device in devices:
        disk_info = {
            'device': device,
            'vendor': 'Unknown',
            'model': 'Unknown',
            'firmware': 'Unknown',
            'hardware': 'true',
            'jbod': 'false',
            'hardware_raid': 'false',
            'disk_controller': 'Unknown',
            'disk_status': 'Unknown',
            'instance': instance,
            'serial_number': 'Unknown'
        }
        
        # Get information from smartctl
        smartctl_info = _get_disk_smartctl_info(device)
        disk_info.update(smartctl_info)
        
        # Fallback to sysfs if smartctl didn't provide vendor/model
        if disk_info['vendor'] == 'Unknown' or disk_info['model'] == 'Unknown':
            sysfs_info = _get_disk_vendor_model_from_sysfs(device)
            if disk_info['vendor'] == 'Unknown':
                disk_info['vendor'] = sysfs_info['vendor']
            if disk_info['model'] == 'Unknown':
                disk_info['model'] = sysfs_info['model']
        
        # Try to extract vendor from model name if vendor is Unknown or generic (like "ATA")
        if disk_info['vendor'] in ['Unknown', 'ATA', ''] and disk_info['model'] != 'Unknown':
            extracted_vendor = _extract_vendor_from_model(disk_info['model'])
            if extracted_vendor != 'Unknown':
                disk_info['vendor'] = extracted_vendor
        
        # Determine disk type
        disk_info['disk_controller'] = _get_disk_type(device)
        
        # Check if virtual
        if _is_virtual_disk(disk_info['vendor'], disk_info['model'], device):
            disk_info['hardware'] = 'false'
        
        # Check RAID status
        disk_info['hardware_raid'] = _check_hardware_raid(device)
        disk_info['jbod'] = _check_jbod(device)
        
        result.append(disk_info)
    
    return result


def print_nic_prometheus_metric(nic_infos: List[Dict[str, Any]]) -> None:
    """
    Print NIC information in Prometheus metric format.
    
    Args:
        nic_infos: List of NIC information dictionaries
    """
    metric = 'node_hardware_nic_info'
    print(f'# HELP {metric} Network interface card info')
    print(f'# TYPE {metric} gauge')
    for nic_info in nic_infos:
        labels = ','.join([
            f'device="{nic_info.get("device", "")}"',
            f'driver="{nic_info.get("driver", "")}"',
            f'firmware_version="{nic_info.get("firmware_version", "")}"',
            f'speed_mbps="{nic_info.get("speed_mbps", "")}"',
            f'link="{nic_info.get("link", "")}"',
            f'model="{nic_info.get("model", "")}"',
            f'vendor="{nic_info.get("vendor", "")}"',
            f'rdma="{nic_info.get("rdma", "")}"',
            f'sriov="{nic_info.get("sriov", "")}"',
            f'hardware="{nic_info.get("hardware", "")}"'
        ])
        print(f'{metric}{{{labels}}} 1')


def print_disk_prometheus_metric(disk_infos: List[Dict[str, Any]]) -> None:
    """
    Print disk information in Prometheus metric format.
    
    Args:
        disk_infos: List of disk information dictionaries
    """
    metric = 'node_hardware_disk_info'
    print(f'# HELP {metric} Disk info for each storage device')
    print(f'# TYPE {metric} gauge')
    for disk_info in disk_infos:
        labels = ','.join([
            f'vendor="{disk_info.get("vendor", "")}"',
            f'model="{disk_info.get("model", "")}"',
            f'firmware="{disk_info.get("firmware", "")}"',
            f'hardware="{disk_info.get("hardware", "")}"',
            f'jbod="{disk_info.get("jbod", "")}"',
            f'hardware_raid="{disk_info.get("hardware_raid", "")}"',
            f'disk_controller="{disk_info.get("disk_controller", "")}"',
            f'device="{disk_info.get("device", "")}"',
            f'disk_status="{disk_info.get("disk_status", "")}"',
            f'instance="{disk_info.get("instance", "")}"',
            f'serial_number="{disk_info.get("serial_number", "")}"'
        ])
        print(f'{metric}{{{labels}}} 1')


# ============================================================================
# GPU/Accelerator Information
# ============================================================================

def _get_gpu_devices() -> List[Dict[str, str]]:
    """
    Get list of GPU/accelerator devices from lspci.
    
    Focuses on data center GPUs and AI accelerators, excluding consumer GPUs.
    
    Returns:
        List of dictionaries with PCI address and raw lspci line
    """
    gpus = []
    
    output = _run_command(['lspci', '-nn'])
    if not output:
        return gpus
    
    for line in output.splitlines():
        line_lower = line.lower()
        pci_addr = line.split()[0] if line else None
        
        # Explicitly exclude storage and network controllers
        if any(keyword in line_lower for keyword in GPU_EXCLUDE_KEYWORDS):
            continue
        
        # Check if line matches data center keywords
        is_datacenter = any(keyword in line_lower for keyword in GPU_DATACENTER_KEYWORDS)
        
        # For VGA/display controllers, only include if they match datacenter keywords
        # Otherwise include 3D controllers and processing accelerators by default
        is_vga = 'vga compatible' in line_lower or 'display controller' in line_lower
        
        if is_datacenter:
            if pci_addr:
                gpus.append({'pci_addr': pci_addr, 'raw_line': line})
        elif not is_vga and ('3d controller' in line_lower or 'processing accelerator' in line_lower):
            # Include non-VGA 3D controllers and processing accelerators
            if pci_addr:
                gpus.append({'pci_addr': pci_addr, 'raw_line': line})
    
    return gpus


def _get_nvidia_gpu_info(pci_addr: str) -> Dict[str, str]:
    """
    Get detailed information for NVIDIA data center GPU using nvidia-smi.
    
    Args:
        pci_addr: PCI address of the GPU
        
    Returns:
        Dictionary containing GPU information
    """
    info = {}
    
    # Try multiple PCI address formats for nvidia-smi
    pci_formats = []
    if ':' in pci_addr:
        parts = pci_addr.split(':')
        if len(parts) == 2:
            # Short format like "d8:00.0" -> try both "0000:d8:00.0" and just the address
            pci_formats.append(f"0000:{pci_addr}")
            pci_formats.append(pci_addr)
        elif len(parts) == 3:
            # Already full format "0000:d8:00.0"
            pci_formats.append(pci_addr)
    else:
        pci_formats.append(f"0000:{pci_addr}")
    
    # Try nvidia-smi with different PCI address formats
    nvidia_smi_success = False
    for pci_formatted in pci_formats:
        try:
            # Try nvidia-smi with data center specific queries
            output = subprocess.check_output(
                ['nvidia-smi', '--query-gpu=gpu_name,driver_version,vbios_version,compute_cap,memory.total,'
                 'pcie.link.gen.max,pcie.link.width.max,mig.mode.current,compute_mode,ecc.mode.current,'
                 'persistence_mode,power.limit,clocks.max.sm,clocks.max.memory',
                 '--format=csv,noheader', '-i', pci_formatted],
                text=True,
                stderr=subprocess.DEVNULL
            )
            
            fields = [f.strip() for f in output.strip().split(',')]
            if len(fields) >= 4:
                info['model'] = fields[0] if fields[0] and fields[0] != '[Not Supported]' else 'Unknown'
                info['driver'] = fields[1] if fields[1] and fields[1] != '[Not Supported]' else 'Unknown'
                info['firmware'] = fields[2] if fields[2] and fields[2] != '[Not Supported]' else 'Unknown'
                
                # Build comprehensive features string for data center GPUs
                features = []
                if len(fields) > 3 and fields[3] and fields[3] != '[Not Supported]':
                    features.append(f"compute_cap:{fields[3]}")
                if len(fields) > 4 and fields[4] and fields[4] != '[Not Supported]':
                    features.append(f"memory:{fields[4]}")
                if len(fields) > 5 and fields[5] and fields[5] != '[Not Supported]':
                    features.append(f"pcie_gen:{fields[5]}")
                if len(fields) > 6 and fields[6] and fields[6] != '[Not Supported]':
                    features.append(f"pcie_width:{fields[6]}")
                if len(fields) > 7 and fields[7] and fields[7] != '[Not Supported]':
                    features.append(f"mig:{fields[7]}")
                if len(fields) > 8 and fields[8] and fields[8] != '[Not Supported]':
                    features.append(f"compute_mode:{fields[8]}")
                if len(fields) > 9 and fields[9] and fields[9] != '[Not Supported]':
                    features.append(f"ecc:{fields[9]}")
                if len(fields) > 10 and fields[10] and fields[10] != '[Not Supported]':
                    features.append(f"persistence:{fields[10]}")
                if len(fields) > 11 and fields[11] and fields[11] != '[Not Supported]':
                    features.append(f"power_limit:{fields[11]}")
                if len(fields) > 12 and fields[12] and fields[12] != '[Not Supported]':
                    features.append(f"max_sm_clock:{fields[12]}")
                if len(fields) > 13 and fields[13] and fields[13] != '[Not Supported]':
                    features.append(f"max_mem_clock:{fields[13]}")
                
                info['features'] = ','.join(features) if features else 'Unknown'
                nvidia_smi_success = True
                
            # Also try to get GPU architecture
            if nvidia_smi_success:
                try:
                    arch_output = subprocess.check_output(
                        ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv,noheader', '-i', pci_formatted],
                        text=True,
                        stderr=subprocess.DEVNULL
                    ).strip()
                    
                    # Determine architecture from model name
                    arch = 'Unknown'
                    if any(x in arch_output.upper() for x in ['H100', 'H200']):
                        arch = 'Hopper'
                    elif any(x in arch_output.upper() for x in ['A100', 'A40', 'A30', 'A10', 'A16', 'A2']):
                        arch = 'Ampere'
                    elif any(x in arch_output.upper() for x in ['V100']):
                        arch = 'Volta'
                    elif any(x in arch_output.upper() for x in ['P100', 'P40']):
                        arch = 'Pascal'
                    elif any(x in arch_output.upper() for x in ['T4', 'TESLA T4']):
                        arch = 'Turing'
                    elif any(x in arch_output.upper() for x in ['L4', 'L40']):
                        arch = 'Ada'
                    
                    if arch != 'Unknown' and info.get('features'):
                        info['features'] = f"arch:{arch}," + info['features']
                    elif arch != 'Unknown':
                        info['features'] = f"arch:{arch}"
                except Exception:
                    pass
            
            # If we got valid info, break the loop
            if nvidia_smi_success:
                break
                
        except Exception:
            continue
    
    # If nvidia-smi didn't work with PCI address, try without specifying device (last resort)
    # This will only work if there's a single GPU, but better than nothing
    if not nvidia_smi_success or info.get('driver') == 'Unknown' or info.get('firmware') == 'Unknown':
        try:
            output = subprocess.check_output(
                ['nvidia-smi', '--query-gpu=gpu_name,driver_version,vbios_version',
                 '--format=csv,noheader'],
                text=True,
                stderr=subprocess.DEVNULL
            )
            
            lines = output.strip().split('\n')
            # If there's exactly one GPU, we can safely use this info
            if len(lines) == 1:
                fields = [f.strip() for f in lines[0].split(',')]
                if len(fields) >= 3:
                    if not info.get('model') or info['model'] == 'Unknown':
                        info['model'] = fields[0] if fields[0] and fields[0] != '[Not Supported]' else info.get('model', 'Unknown')
                    if not info.get('driver') or info['driver'] == 'Unknown':
                        info['driver'] = fields[1] if fields[1] and fields[1] != '[Not Supported]' else info.get('driver', 'Unknown')
                    if not info.get('firmware') or info['firmware'] == 'Unknown':
                        info['firmware'] = fields[2] if fields[2] and fields[2] != '[Not Supported]' else info.get('firmware', 'Unknown')
        except Exception:
            pass
    
    return info


def _get_amd_gpu_info(pci_addr: str) -> Dict[str, str]:
    """
    Get detailed information for AMD Instinct data center GPU using rocm-smi.
    
    Args:
        pci_addr: PCI address of the GPU
        
    Returns:
        Dictionary containing GPU information
    """
    info = {}
    
    # Try rocm-smi for comprehensive info
    output = _run_command(['rocm-smi', '--showproductname', '--showvbios', '--showmeminfo', 'vram', '--showdriverversion'])
    if output:
        # Parse output (rocm-smi format varies)
        for line in output.splitlines():
            line_stripped = line.strip()
            if 'Card series:' in line or 'GPU' in line or 'Card model:' in line:
                parts = line.split(':')
                if len(parts) > 1:
                    model = parts[1].strip()
                    if model:
                        info['model'] = model
            elif 'VBIOS version:' in line or 'VBIOS Version:' in line:
                parts = line.split(':')
                if len(parts) > 1:
                    firmware = parts[1].strip()
                    if firmware:
                        info['firmware'] = firmware
            elif 'Driver version:' in line:
                parts = line.split(':')
                if len(parts) > 1:
                    driver = parts[1].strip()
                    if driver:
                        info['driver'] = driver
    
    # Get additional info from sysfs
    features = []
    try:
        # Convert PCI address to sysfs path
        pci_path = f"/sys/bus/pci/devices/0000:{pci_addr}"
        
        if os.path.exists(pci_path):
            # Memory info
            mem_path = os.path.join(pci_path, "mem_info_vram_total")
            if os.path.exists(mem_path):
                with open(mem_path, 'r') as f:
                    mem_bytes = int(f.read().strip())
                    mem_gb = mem_bytes // (1024 * 1024 * 1024)
                    features.append(f"memory:{mem_gb}GB")
            
            # GPU utilization capability
            gpu_busy_path = os.path.join(pci_path, "gpu_busy_percent")
            if os.path.exists(gpu_busy_path):
                features.append("monitoring:supported")
            
            # PCIe info
            current_link_speed_path = os.path.join(pci_path, "current_link_speed")
            if os.path.exists(current_link_speed_path):
                with open(current_link_speed_path, 'r') as f:
                    link_speed = f.read().strip()
                    if link_speed:
                        features.append(f"pcie:{link_speed}")
            
            # ECC support (AMD Instinct typically has ECC)
            ras_path = os.path.join(pci_path, "ras")
            if os.path.exists(ras_path):
                features.append("ecc:supported")
                
    except Exception:
        pass
    
    # Determine architecture from model name
    if 'model' in info:
        arch = 'Unknown'
        model_upper = info['model'].upper()
        if any(x in model_upper for x in ['MI300', 'MI3']):
            arch = 'CDNA3'
        elif any(x in model_upper for x in ['MI250', 'MI210', 'MI2']):
            arch = 'CDNA2'
        elif any(x in model_upper for x in ['MI100', 'MI1']):
            arch = 'CDNA'
        elif any(x in model_upper for x in ['INSTINCT']):
            arch = 'Instinct'
        
        if arch != 'Unknown':
            features.insert(0, f"arch:{arch}")
    
    if features:
        info['features'] = ','.join(features)
    
    return info


def _get_intel_gpu_info(pci_addr: str) -> Dict[str, str]:
    """
    Get detailed information for Intel data center GPU.
    
    Args:
        pci_addr: PCI address of the GPU
        
    Returns:
        Dictionary containing GPU information
    """
    info = {}
    features = []
    
    # Try multiple PCI address formats
    for pci_format in [f"0000:{pci_addr}", pci_addr]:
        try:
            # Try to get Intel GPU info from sysfs
            pci_path = f"/sys/bus/pci/devices/{pci_format}"
            
            if os.path.exists(pci_path):
                # Try to read device info
                device_path = os.path.join(pci_path, "device")
                device_id = _read_sysfs_file(device_path)
                        
                if device_id and device_id in INTEL_DATACENTER_GPU_MAP:
                    model_name, arch = INTEL_DATACENTER_GPU_MAP[device_id]
                    info['model'] = model_name
                    features.append(f"arch:{arch}")
                
                # Try to get memory info
                resource_path = os.path.join(pci_path, "resource")
                if os.path.exists(resource_path):
                    try:
                        with open(resource_path, 'r') as f:
                            # Parse BAR sizes for memory estimation
                            lines = f.readlines()
                            if lines:
                                # Usually BAR0 or BAR2 contains GPU memory
                                for line in lines[:3]:
                                    parts = line.split()
                                    if len(parts) >= 2:
                                        start = int(parts[0], 16)
                                        end = int(parts[1], 16)
                                        if start and end:
                                            size_bytes = end - start + 1
                                            if size_bytes > 1024 * 1024 * 1024:  # > 1GB
                                                size_gb = size_bytes // (1024 * 1024 * 1024)
                                                if size_gb > 1:
                                                    features.append(f"memory:{size_gb}GB")
                                                    break
                    except Exception:
                        pass
                
                # PCIe information
                current_link_speed_path = os.path.join(pci_path, "current_link_speed")
                current_link_width_path = os.path.join(pci_path, "current_link_width")
                
                if os.path.exists(current_link_speed_path):
                    with open(current_link_speed_path, 'r') as f:
                        link_speed = f.read().strip()
                        if link_speed:
                            features.append(f"pcie_speed:{link_speed}")
                
                if os.path.exists(current_link_width_path):
                    with open(current_link_width_path, 'r') as f:
                        link_width = f.read().strip()
                        if link_width:
                            features.append(f"pcie_width:x{link_width}")
                
                # If we found valid information, break out of the loop
                break
        except Exception:
            continue
    
    # Try Intel GPU tools if available
    try:
        # Check for intel_gpu_top for driver/firmware info
        output = subprocess.check_output(['intel_gpu_top', '-l'], text=True, stderr=subprocess.DEVNULL, timeout=2)
        # Parse intel_gpu_top output if needed
        # This is a placeholder for future Intel-specific tooling
    except Exception:
        pass
    
    # Try to get driver version from modinfo if i915 driver is in use
    try:
        output = subprocess.check_output(['modinfo', 'i915'], text=True, stderr=subprocess.DEVNULL)
        for line in output.splitlines():
            if line.startswith('version:'):
                driver_version = line.split(':', 1)[1].strip()
                if driver_version:
                    info['driver'] = f"i915:{driver_version}"
                    break
    except Exception:
        pass
    
    if features:
        info['features'] = ','.join(features)
    
    return info


def _get_other_accelerator_info(pci_addr: str, raw_line: str) -> Dict[str, str]:
    """
    Get information for other AI/compute accelerators (Habana, Graphcore, Xilinx, etc.).
    
    Args:
        pci_addr: PCI address of the accelerator
        raw_line: Raw lspci output line
        
    Returns:
        Dictionary containing accelerator information
    """
    info = {}
    features = []
    
    raw_lower = raw_line.lower() if raw_line else ''
    
    # Habana Gaudi/Goya
    if any(x in raw_lower for x in ['habana', 'gaudi', 'goya']):
        info['vendor'] = 'Habana Labs (Intel)'
        if 'gaudi' in raw_lower:
            if 'gaudi2' in raw_lower or 'gaudi 2' in raw_lower:
                info['model'] = 'Habana Gaudi2'
                features.append('arch:Gaudi2')
            elif 'gaudi3' in raw_lower or 'gaudi 3' in raw_lower:
                info['model'] = 'Habana Gaudi3'
                features.append('arch:Gaudi3')
            else:
                info['model'] = 'Habana Gaudi'
                features.append('arch:Gaudi')
        elif 'goya' in raw_lower:
            info['model'] = 'Habana Goya'
            features.append('arch:Goya')
    
    # Graphcore IPU
    elif any(x in raw_lower for x in ['graphcore', 'ipu']):
        info['vendor'] = 'Graphcore'
        if 'mk2' in raw_lower:
            info['model'] = 'Graphcore IPU Mk2'
            features.append('arch:Colossus Mk2')
        else:
            info['model'] = 'Graphcore IPU'
            features.append('arch:Colossus')
    
    # Xilinx/AMD FPGAs (Alveo, Versal)
    elif any(x in raw_lower for x in ['xilinx', 'alveo', 'versal']):
        info['vendor'] = 'Xilinx (AMD)'
        if 'alveo' in raw_lower:
            # Try to extract model (U50, U250, U280, etc.)
            for model in ['u30', 'u50', 'u55', 'u200', 'u250', 'u280', 'u520']:
                if model in raw_lower:
                    info['model'] = f'Xilinx Alveo {model.upper()}'
                    break
            if 'model' not in info:
                info['model'] = 'Xilinx Alveo'
            features.append('type:FPGA')
        elif 'versal' in raw_lower:
            info['model'] = 'Xilinx Versal'
            features.append('type:ACAP')
    
    # Google TPU (if exposed via PCIe)
    elif 'tpu' in raw_lower or 'tensor processing unit' in raw_lower:
        info['vendor'] = 'Google'
        info['model'] = 'Tensor Processing Unit'
        features.append('type:TPU')
    
    # Cerebras (if exposed)
    elif 'cerebras' in raw_lower:
        info['vendor'] = 'Cerebras'
        info['model'] = 'Wafer Scale Engine'
        features.append('type:WSE')
    
    # SambaNova
    elif 'sambanova' in raw_lower:
        info['vendor'] = 'SambaNova'
        info['model'] = 'DataScale'
        features.append('type:RDU')
    
    # Try to get generic PCIe info
    try:
        pci_path = f"/sys/bus/pci/devices/0000:{pci_addr}"
        
        if os.path.exists(pci_path):
            # PCIe information
            current_link_speed_path = os.path.join(pci_path, "current_link_speed")
            current_link_width_path = os.path.join(pci_path, "current_link_width")
            
            if os.path.exists(current_link_speed_path):
                with open(current_link_speed_path, 'r') as f:
                    link_speed = f.read().strip()
                    if link_speed:
                        features.append(f"pcie_speed:{link_speed}")
            
            if os.path.exists(current_link_width_path):
                with open(current_link_width_path, 'r') as f:
                    link_width = f.read().strip()
                    if link_width:
                        features.append(f"pcie_width:x{link_width}")
    except Exception:
        pass
    
    if features:
        info['features'] = ','.join(features)
    
    return info


def _is_virtual_gpu(vendor: str, model: str, raw_line: str) -> bool:
    """
    Check if a GPU is virtual based on vendor, model, or description.
    
    Args:
        vendor: GPU vendor name
        model: GPU model name
        raw_line: Raw lspci output line
        
    Returns:
        True if GPU is virtual, False otherwise
    """
    # Check vendor
    if any(v.lower() in vendor.lower() for v in VIRTUAL_GPU_VENDORS):
        return True
    
    # Check model
    if any(v.lower() in model.lower() for v in VIRTUAL_GPU_KEYWORDS):
        return True
    
    # Check raw line from lspci
    if raw_line:
        raw_lower = raw_line.lower()
        if any(v in raw_lower for v in ['cirrus logic', 'vmware svga', 'qxl', 'virtio-gpu', 'red hat', 'bochs']):
            return True
    
    return False


def _get_gpu_vendor_model_from_lspci(pci_addr: str, raw_line: str) -> Tuple[str, str]:
    """
    Get vendor and model from lspci output.
    
    Args:
        pci_addr: PCI address of the GPU
        raw_line: Raw lspci output line
        
    Returns:
        Tuple of (vendor, model)
    """
    vendor = 'Unknown'
    model = 'Unknown'
    
    try:
        # Extract vendor and model from raw line
        # Format: "01:00.0 VGA compatible controller: NVIDIA Corporation GP107 [GeForce GTX 1050 Ti] (rev a1)"
        if ':' in raw_line:
            parts = raw_line.split(':', 2)
            if len(parts) >= 3:
                desc = parts[2].strip()
                # Split by first bracket or use full description
                if '[' in desc:
                    vendor_part = desc.split('[')[0].strip()
                    model_part = desc.split('[')[1].split(']')[0].strip() if '[' in desc else desc
                    
                    # Extract vendor (usually first word or two)
                    vendor_words = vendor_part.split()
                    if vendor_words:
                        # Common vendors
                        if 'NVIDIA' in vendor_part or 'nvidia' in vendor_part.lower():
                            vendor = 'NVIDIA'
                        elif 'AMD' in vendor_part or 'Advanced Micro Devices' in vendor_part:
                            vendor = 'AMD'
                        elif 'Intel' in vendor_part:
                            vendor = 'Intel'
                        elif 'ASPEED' in vendor_part:
                            vendor = 'ASPEED'
                        elif 'Matrox' in vendor_part:
                            vendor = 'Matrox'
                        elif 'VMware' in vendor_part:
                            vendor = 'VMware'
                        elif 'Red Hat' in vendor_part:
                            vendor = 'Red Hat'
                        else:
                            vendor = vendor_words[0]
                    
                    model = model_part
                else:
                    # No bracket, use the description
                    words = desc.split()
                    if len(words) > 0:
                        vendor = words[0]
                        model = ' '.join(words[1:]) if len(words) > 1 else 'Unknown'
        
        # Also try lspci with verbose mode for more details
        output = subprocess.check_output(['lspci', '-v', '-s', pci_addr], text=True, stderr=subprocess.DEVNULL)
        for line in output.splitlines():
            if line.startswith('\t') and 'Subsystem:' in line:
                subsystem = line.split('Subsystem:', 1)[1].strip()
                if model == 'Unknown' and subsystem:
                    model = subsystem
    except Exception:
        pass
    
    return vendor, model


def parse_gpu_info() -> List[Dict[str, Any]]:
    """
    Parse GPU/accelerator information and return data center GPU/accelerator details.
    
    Returns:
        List of dictionaries containing GPU/accelerator information
    """
    result = []
    gpus = _get_gpu_devices()
    
    if not gpus:
        return result
    
    for gpu_index, gpu in enumerate(gpus):
        pci_addr = gpu['pci_addr']
        raw_line = gpu['raw_line']
        
        # Normalize PCI address to include domain
        normalized_pci_addr = f"0000:{pci_addr}" if pci_addr.count(':') == 1 else pci_addr
        
        gpu_info = {
            'vendor': 'Unknown',
            'model': 'Unknown',
            'firmware': 'Unknown',
            'hardware': 'true',
            'features': 'Unknown',
            'pci_addr': normalized_pci_addr,
            'device_index': str(gpu_index),
            'driver': 'Unknown',
            'driver_status': 'unknown'  # Can be: loaded, not_loaded, suboptimal, unknown
        }
        
        # Get basic vendor and model from lspci
        vendor, model = _get_gpu_vendor_model_from_lspci(pci_addr, raw_line)
        gpu_info['vendor'] = vendor
        gpu_info['model'] = model
        
        # Check for other accelerators first (Habana, Graphcore, Xilinx, etc.)
        other_accel_info = _get_other_accelerator_info(pci_addr, raw_line)
        if other_accel_info.get('vendor') and other_accel_info.get('vendor') != 'Unknown':
            gpu_info.update({k: v for k, v in other_accel_info.items() if v})
        # Get detailed info based on vendor
        elif 'NVIDIA' in vendor or 'nvidia' in vendor.lower():
            nvidia_info = _get_nvidia_gpu_info(pci_addr)
            gpu_info.update({k: v for k, v in nvidia_info.items() if v})
        elif 'AMD' in vendor or 'Advanced Micro Devices' in vendor:
            amd_info = _get_amd_gpu_info(pci_addr)
            gpu_info.update({k: v for k, v in amd_info.items() if v})
        elif 'Intel' in vendor:
            intel_info = _get_intel_gpu_info(pci_addr)
            gpu_info.update({k: v for k, v in intel_info.items() if v})
        
        # Try to get driver information from sysfs with multiple PCI address formats
        if gpu_info['driver'] == 'Unknown':
            for pci_format in [f"0000:{pci_addr}", pci_addr]:
                try:
                    driver_path = f"/sys/bus/pci/devices/{pci_format}/driver"
                    if os.path.exists(driver_path):
                        driver_name = os.path.basename(os.readlink(driver_path))
                        if driver_name:
                            gpu_info['driver'] = driver_name
                            gpu_info['driver_status'] = 'loaded'
                            # Check if it's a suboptimal driver (e.g., nouveau instead of nvidia)
                            if 'NVIDIA' in gpu_info['vendor'] and driver_name == 'nouveau':
                                gpu_info['driver_status'] = 'suboptimal'
                            break
                except Exception:
                    continue
        
        # Fallback: Try to get driver from lspci -k
        if gpu_info['driver'] == 'Unknown':
            try:
                output = subprocess.check_output(['lspci', '-k', '-s', pci_addr], text=True, stderr=subprocess.DEVNULL)
                driver_in_use = False
                available_modules = None
                
                for line in output.splitlines():
                    if 'Kernel driver in use:' in line:
                        driver = line.split(':', 1)[1].strip()
                        if driver:
                            gpu_info['driver'] = driver
                            gpu_info['driver_status'] = 'loaded'
                            driver_in_use = True
                            # Check if it's a suboptimal driver
                            if 'NVIDIA' in gpu_info['vendor'] and driver == 'nouveau':
                                gpu_info['driver_status'] = 'suboptimal'
                            break
                    elif 'Kernel modules:' in line:
                        available_modules = line.split(':', 1)[1].strip()
                
                # If no driver in use but modules are available
                if not driver_in_use and available_modules:
                    first_module = available_modules.split(',')[0].strip()
                    gpu_info['driver'] = f"none (available: {first_module})"
                    gpu_info['driver_status'] = 'not_loaded'
            except Exception:
                pass
        
        # Check if virtual
        if _is_virtual_gpu(gpu_info['vendor'], gpu_info['model'], raw_line):
            gpu_info['hardware'] = 'false'
        
        # Try to get generic firmware info from sysfs if not found
        if gpu_info['firmware'] == 'Unknown':
            # First, try lspci -vvv to get ROM/BIOS information
            try:
                output = subprocess.check_output(['lspci', '-vvv', '-s', pci_addr], text=True, stderr=subprocess.DEVNULL)
                for line in output.splitlines():
                    line_stripped = line.strip()
                    # Look for expansion ROM information
                    if 'Expansion ROM' in line_stripped and 'disabled' not in line_stripped.lower():
                        # Try to extract version info if present
                        if '[' in line_stripped and ']' in line_stripped:
                            version = line_stripped.split('[')[1].split(']')[0]
                            if version:
                                gpu_info['firmware'] = version
                                break
            except Exception:
                pass
            
            # If still unknown, try sysfs paths
            if gpu_info['firmware'] == 'Unknown':
                for pci_format in [f"0000:{pci_addr}", pci_addr]:
                    try:
                        # Try direct firmware_version file
                        firmware_path = f"/sys/bus/pci/devices/{pci_format}/firmware_version"
                        if os.path.exists(firmware_path):
                            with open(firmware_path, 'r') as f:
                                fw = f.read().strip()
                                if fw:
                                    gpu_info['firmware'] = fw
                                    break
                        
                        # Try VBIOS version file (common for GPUs)
                        vbios_path = f"/sys/bus/pci/devices/{pci_format}/vbios_version"
                        if os.path.exists(vbios_path):
                            with open(vbios_path, 'r') as f:
                                fw = f.read().strip()
                                if fw:
                                    gpu_info['firmware'] = fw
                                    break
                        
                        # For NVIDIA GPUs, try debugfs (requires root/proper permissions)
                        if 'NVIDIA' in gpu_info['vendor']:
                            # Try to read vbios version from proc or debugfs
                            try:
                                # Check if there's a GPU device directory
                                gpu_dirs = ['/proc/driver/nvidia/gpus']
                                for base_dir in gpu_dirs:
                                    if os.path.exists(base_dir):
                                        # List GPU directories
                                        for gpu_dir in os.listdir(base_dir):
                                            info_file = os.path.join(base_dir, gpu_dir, 'information')
                                            if os.path.exists(info_file):
                                                with open(info_file, 'r') as f:
                                                    for line in f:
                                                        if 'VBIOS Version' in line or 'Video BIOS' in line:
                                                            parts = line.split(':', 1)
                                                            if len(parts) > 1:
                                                                vbios = parts[1].strip()
                                                                if vbios:
                                                                    gpu_info['firmware'] = vbios
                                                                    break
                                                if gpu_info['firmware'] != 'Unknown':
                                                    break
                            except Exception:
                                pass
                        
                        if gpu_info['firmware'] != 'Unknown':
                            break
                        
                        # Try reading ROM/BIOS information
                        rom_path = f"/sys/bus/pci/devices/{pci_format}/rom"
                        if os.path.exists(rom_path):
                            # Check if we can read ROM version from sysfs attributes
                            subsystem_device = f"/sys/bus/pci/devices/{pci_format}/subsystem_device"
                            revision = f"/sys/bus/pci/devices/{pci_format}/revision"
                            if os.path.exists(revision):
                                with open(revision, 'r') as f:
                                    rev = f.read().strip()
                                    if rev and rev != '0x00':
                                        gpu_info['firmware'] = f"rev:{rev}"
                                        break
                    except Exception:
                        continue
        
        # Build generic features if not already set
        if gpu_info['features'] == 'Unknown':
            features = []
            try:
                # Get PCI link info
                link_speed_path = f"/sys/bus/pci/devices/0000:{pci_addr}/current_link_speed"
                link_width_path = f"/sys/bus/pci/devices/0000:{pci_addr}/current_link_width"
                
                if os.path.exists(link_speed_path):
                    with open(link_speed_path, 'r') as f:
                        link_speed = f.read().strip()
                        if link_speed:
                            features.append(f"pcie_speed:{link_speed}")
                
                if os.path.exists(link_width_path):
                    with open(link_width_path, 'r') as f:
                        link_width = f.read().strip()
                        if link_width:
                            features.append(f"pcie_width:x{link_width}")
            except Exception:
                pass
            
            gpu_info['features'] = ','.join(features) if features else 'Unknown'
        
        # Only add data center GPUs/accelerators that have meaningful information
        # Skip if it's a consumer/integrated GPU
        if gpu_info['vendor'] != 'Unknown' or gpu_info['model'] != 'Unknown':
            # Filter out consumer GPUs that might have slipped through
            skip_patterns = ['geforce', 'radeon rx', 'hd graphics', 'uhd graphics', 'iris', 'arc a3', 'arc a5', 'arc a7']
            model_lower = gpu_info['model'].lower()
            if not any(pattern in model_lower for pattern in skip_patterns):
                result.append(gpu_info)
    
    return result


def print_gpu_prometheus_metric(gpu_infos: List[Dict[str, Any]]) -> None:
    """
    Print GPU/accelerator information in Prometheus metric format.
    
    Args:
        gpu_infos: List of GPU/accelerator information dictionaries
    """
    metric = 'node_hardware_accelerator_info'
    print(f'# HELP {metric} Data center GPU and AI accelerator info')
    print(f'# TYPE {metric} gauge')
    for gpu_info in gpu_infos:
        labels = ','.join([
            f'device_index="{gpu_info.get("device_index", "")}"',
            f'vendor="{gpu_info.get("vendor", "")}"',
            f'model="{gpu_info.get("model", "")}"',
            f'driver="{gpu_info.get("driver", "")}"',
            f'driver_status="{gpu_info.get("driver_status", "unknown")}"',
            f'firmware="{gpu_info.get("firmware", "")}"',
            f'hardware="{gpu_info.get("hardware", "")}"',
            f'pci_addr="{gpu_info.get("pci_addr", "")}"',
            f'features="{gpu_info.get("features", "")}"'
        ])
        print(f'{metric}{{{labels}}} 1')


if __name__ == "__main__":
    dimms = parse_dmidecode()
    if dimms:
        print_memory_prometheus_metric(dimms)

    cpu_infos = parse_lscpu()
    if cpu_infos:
        print_cpu_prometheus_metric(cpu_infos)

    nic_infos = parse_nic_info()
    if nic_infos:
        print_nic_prometheus_metric(nic_infos)

    disk_infos = parse_disk_info()
    if disk_infos:
        print_disk_prometheus_metric(disk_infos)

    gpu_infos = parse_gpu_info()
    if gpu_infos:
        print_gpu_prometheus_metric(gpu_infos)
