import io import os import re import subprocess from typing import List, Set import sys from typing import List from packaging.version import parse, Version from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext from shutil import which import torch from torch.utils.cpp_extension import CUDA_HOME ROOT_DIR = os.path.dirname(__file__) MAIN_CUDA_VERSION = "12.1" def is_sccache_available() -> bool: return which("sccache") is not None def is_ccache_available() -> bool: return which("ccache") is not None def is_ninja_available() -> bool: return which("ninja") is not None def remove_prefix(text, prefix): if text.startswith(prefix): return text[len(prefix):] return text class CMakeExtension(Extension): def __init__(self, name: str, cmake_lists_dir: str = '.', **kwa) -> None: super().__init__(name, sources=[], **kwa) self.cmake_lists_dir = os.path.abspath(cmake_lists_dir) class cmake_build_ext(build_ext): # A dict of extension directories that have been configured. did_config = {} # Determine number of compilation jobs and optionally nvcc compile threads. def compute_num_jobs(self): try: # os.sched_getaffinity() isn't universally available, so fall back # to os.cpu_count() if we get an error here. num_jobs = len(os.sched_getaffinity(0)) except AttributeError: num_jobs = os.cpu_count() nvcc_cuda_version = get_nvcc_cuda_version() if nvcc_cuda_version >= Version("11.2"): nvcc_threads = int(os.getenv("NVCC_THREADS", 8)) num_jobs = max(1, round(num_jobs / (nvcc_threads / 4))) else: nvcc_threads = None return num_jobs, nvcc_threads # Perform cmake configuration for a single extension. def configure(self, ext: CMakeExtension) -> None: # If we've already configured using the CMakeLists.txt for # this extension, exit early. if ext.cmake_lists_dir in cmake_build_ext.did_config: return cmake_build_ext.did_config[ext.cmake_lists_dir] = True # Select the build type. # Note: optimization level + debug info are set by the build type default_cfg = "Debug" if self.debug else "RelWithDebInfo" cfg = os.getenv("CMAKE_BUILD_TYPE", default_cfg) # where .so files will be written, should be the same for all extensions # that use the same CMakeLists.txt. outdir = os.path.abspath( os.path.dirname(self.get_ext_fullpath(ext.name))) cmake_args = [ '-DCMAKE_BUILD_TYPE={}'.format(cfg), '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir), '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp), ] verbose = bool(int(os.getenv('VERBOSE', '0'))) if verbose: cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON'] if is_sccache_available(): cmake_args += [ '-DCMAKE_CXX_COMPILER_LAUNCHER=sccache', '-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache', ] elif is_ccache_available(): cmake_args += [ '-DCMAKE_CXX_COMPILER_LAUNCHER=ccache', '-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache', ] # Pass the python executable to cmake so it can find an exact # match. cmake_args += ['-DAPHRODITE_PYTHON_EXECUTABLE={}'.format(sys.executable)] if _install_punica(): cmake_args += ['-DAPHRODITE_INSTALL_PUNICA_KERNELS=ON'] if _install_hadamard(): cmake_args += ['-DAPHRODITE_INSTALL_HADAMARD_KERNELS=ON'] # # Setup parallelism and build tool # num_jobs, nvcc_threads = self.compute_num_jobs() if nvcc_threads: cmake_args += ['-DNVCC_THREADS={}'.format(nvcc_threads)] if is_ninja_available(): build_tool = ['-G', 'Ninja'] cmake_args += [ '-DCMAKE_JOB_POOL_COMPILE:STRING=compile', '-DCMAKE_JOB_POOLS:STRING=compile={}'.format(num_jobs), ] else: # Default build tool to whatever cmake picks. build_tool = [] subprocess.check_call( ['cmake', ext.cmake_lists_dir, *build_tool, *cmake_args], cwd=self.build_temp) def build_extensions(self) -> None: # Ensure that CMake is present and working try: subprocess.check_output(['cmake', '--version']) except OSError as e: raise RuntimeError('Cannot find CMake executable') from e # Create build directory if it does not exist. if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) # Build all the extensions for ext in self.extensions: self.configure(ext) ext_target_name = remove_prefix(ext.name, "aphrodite.") num_jobs, _ = self.compute_num_jobs() build_args = [ '--build', '.', '--target', ext_target_name, '-j', str(num_jobs) ] subprocess.check_call(['cmake', *build_args], cwd=self.build_temp) def _is_hip() -> bool: return torch.version.hip is not None def _is_cuda() -> bool: return torch.version.cuda is not None def _install_punica() -> bool: return bool(int(os.getenv("APHRODITE_INSTALL_PUNICA_KERNELS", "0"))) def _install_hadamard() -> bool: return bool(int(os.getenv("APHRODITE_INSTALL_HADAMARD_KERNELS", "0"))) def get_path(*filepath) -> str: return os.path.join(ROOT_DIR, *filepath) def find_version(filepath: str) -> str: """Extract version information from the given filepath. Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py """ with open(filepath) as fp: version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", fp.read(), re.M) if version_match: return version_match.group(1) raise RuntimeError("Unable to find version string.") def get_hipcc_rocm_version(): # Run the hipcc --version command result = subprocess.run(['hipcc', '--version'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) # Check if the command was executed successfully if result.returncode != 0: print("Error running 'hipcc --version'") return None # Extract the version using a regular expression match = re.search(r'HIP version: (\S+)', result.stdout) if match: # Return the version string return match.group(1) else: print("Could not find HIP version in the output") return None def get_nvcc_cuda_version() -> Version: """Get the CUDA version from nvcc. Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py """ nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True) output = nvcc_output.split() release_idx = output.index("release") + 1 nvcc_cuda_version = parse(output[release_idx].split(",")[0]) return nvcc_cuda_version def get_aphrodite_version() -> str: version = find_version(get_path("aphrodite", "__init__.py")) if _is_hip(): # get the HIP version hipcc_version = get_hipcc_rocm_version() if hipcc_version != MAIN_CUDA_VERSION: rocm_version_str = hipcc_version.replace(".", "")[:3] version += f"+rocm{rocm_version_str}" else: cuda_version = str(get_nvcc_cuda_version()) if cuda_version != MAIN_CUDA_VERSION: cuda_version_str = cuda_version.replace(".", "")[:3] version += f"+cu{cuda_version_str}" return version def read_readme() -> str: """Read the README file if present.""" p = get_path("README.md") if os.path.isfile(p): return io.open(get_path("README.md"), "r", encoding="utf-8").read() else: return "" def get_requirements() -> List[str]: """Get Python package dependencies from requirements.txt.""" if _is_hip(): with open(get_path("requirements-rocm.txt")) as f: requirements = f.read().strip().split("\n") else: with open(get_path("requirements.txt")) as f: requirements = f.read().strip().split("\n") return requirements ext_modules = [] if _is_cuda(): ext_modules.append(CMakeExtension(name="aphrodite._moe_C")) if _install_punica(): ext_modules.append(CMakeExtension(name="aphrodite._punica_C")) if _install_hadamard(): ext_modules.append(CMakeExtension(name="aphrodite._hadamard_C")) ext_modules.append(CMakeExtension(name="aphrodite._C")) setup( name="aphrodite-engine", version=get_aphrodite_version(), author="PygmalionAI", license="AGPL 3.0", description="The inference engine for PygmalionAI models", long_description=read_readme(), long_description_content_type="text/markdown", url="https://github.com/PygmalionAI/aphrodite-engine", project_urls={ "Homepage": "https://pygmalion.chat", "Documentation": "https://docs.pygmalion.chat", "GitHub": "https://github.com/PygmalionAI", "Huggingface": "https://huggingface.co/PygmalionAI", }, classifiers=[ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", # noqa: E501 "Topic :: Scientific/Engineering :: Artificial Intelligence", ], packages=find_packages(exclude=("kernels", "examples", "tests")), python_requires=">=3.8", install_requires=get_requirements(), ext_modules=ext_modules, cmdclass={"build_ext": cmake_build_ext}, package_data={ "aphrodite": [ "endpoints/kobold/klite.embd", "modeling/layers/quantization/hadamard.safetensors", "py.typed" ] }, include_package_data=True, )