123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- import io
- import os
- import re
- import subprocess
- from typing import List, Set
- import sys
- from typing import List
- from packaging.version import parse, Version
- from setuptools import setup, find_packages, Extension
- from setuptools.command.build_ext import build_ext
- from shutil import which
- import torch
- from torch.utils.cpp_extension import CUDA_HOME
- ROOT_DIR = os.path.dirname(__file__)
- MAIN_CUDA_VERSION = "12.1"
- def is_sccache_available() -> bool:
- return which("sccache") is not None
- def is_ccache_available() -> bool:
- return which("ccache") is not None
- def is_ninja_available() -> bool:
- return which("ninja") is not None
- def remove_prefix(text, prefix):
- if text.startswith(prefix):
- return text[len(prefix):]
- return text
- class CMakeExtension(Extension):
- def __init__(self, name: str, cmake_lists_dir: str = '.', **kwa) -> None:
- super().__init__(name, sources=[], **kwa)
- self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
- class cmake_build_ext(build_ext):
- # A dict of extension directories that have been configured.
- did_config = {}
- # Determine number of compilation jobs and optionally nvcc compile threads.
- def compute_num_jobs(self):
- try:
- # os.sched_getaffinity() isn't universally available, so fall back
- # to os.cpu_count() if we get an error here.
- num_jobs = len(os.sched_getaffinity(0))
- except AttributeError:
- num_jobs = os.cpu_count()
- nvcc_cuda_version = get_nvcc_cuda_version()
- if nvcc_cuda_version >= Version("11.2"):
- nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
- num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
- else:
- nvcc_threads = None
- return num_jobs, nvcc_threads
- # Perform cmake configuration for a single extension.
- def configure(self, ext: CMakeExtension) -> None:
- # If we've already configured using the CMakeLists.txt for
- # this extension, exit early.
- if ext.cmake_lists_dir in cmake_build_ext.did_config:
- return
- cmake_build_ext.did_config[ext.cmake_lists_dir] = True
- # Select the build type.
- # Note: optimization level + debug info are set by the build type
- default_cfg = "Debug" if self.debug else "RelWithDebInfo"
- cfg = os.getenv("CMAKE_BUILD_TYPE", default_cfg)
- # where .so files will be written, should be the same for all extensions
- # that use the same CMakeLists.txt.
- outdir = os.path.abspath(
- os.path.dirname(self.get_ext_fullpath(ext.name)))
- cmake_args = [
- '-DCMAKE_BUILD_TYPE={}'.format(cfg),
- '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
- '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
- ]
- verbose = bool(int(os.getenv('VERBOSE', '0')))
- if verbose:
- cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON']
- if is_sccache_available():
- cmake_args += [
- '-DCMAKE_CXX_COMPILER_LAUNCHER=sccache',
- '-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache',
- ]
- elif is_ccache_available():
- cmake_args += [
- '-DCMAKE_CXX_COMPILER_LAUNCHER=ccache',
- '-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache',
- ]
- # Pass the python executable to cmake so it can find an exact
- # match.
- cmake_args += ['-DAPHRODITE_PYTHON_EXECUTABLE={}'.format(sys.executable)]
- if _install_punica():
- cmake_args += ['-DAPHRODITE_INSTALL_PUNICA_KERNELS=ON']
-
- if _install_hadamard():
- cmake_args += ['-DAPHRODITE_INSTALL_HADAMARD_KERNELS=ON']
- #
- # Setup parallelism and build tool
- #
- num_jobs, nvcc_threads = self.compute_num_jobs()
- if nvcc_threads:
- cmake_args += ['-DNVCC_THREADS={}'.format(nvcc_threads)]
- if is_ninja_available():
- build_tool = ['-G', 'Ninja']
- cmake_args += [
- '-DCMAKE_JOB_POOL_COMPILE:STRING=compile',
- '-DCMAKE_JOB_POOLS:STRING=compile={}'.format(num_jobs),
- ]
- else:
- # Default build tool to whatever cmake picks.
- build_tool = []
- subprocess.check_call(
- ['cmake', ext.cmake_lists_dir, *build_tool, *cmake_args],
- cwd=self.build_temp)
- def build_extensions(self) -> None:
- # Ensure that CMake is present and working
- try:
- subprocess.check_output(['cmake', '--version'])
- except OSError as e:
- raise RuntimeError('Cannot find CMake executable') from e
- # Create build directory if it does not exist.
- if not os.path.exists(self.build_temp):
- os.makedirs(self.build_temp)
- # Build all the extensions
- for ext in self.extensions:
- self.configure(ext)
- ext_target_name = remove_prefix(ext.name, "aphrodite.")
- num_jobs, _ = self.compute_num_jobs()
- build_args = [
- '--build', '.', '--target', ext_target_name, '-j',
- str(num_jobs)
- ]
- subprocess.check_call(['cmake', *build_args], cwd=self.build_temp)
- def _is_hip() -> bool:
- return torch.version.hip is not None
- def _is_cuda() -> bool:
- return torch.version.cuda is not None
- def _install_punica() -> bool:
- return bool(int(os.getenv("APHRODITE_INSTALL_PUNICA_KERNELS", "0")))
- def _install_hadamard() -> bool:
- return bool(int(os.getenv("APHRODITE_INSTALL_HADAMARD_KERNELS", "0")))
- def get_path(*filepath) -> str:
- return os.path.join(ROOT_DIR, *filepath)
- def find_version(filepath: str) -> str:
- """Extract version information from the given filepath.
- Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py
- """
- with open(filepath) as fp:
- version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
- fp.read(), re.M)
- if version_match:
- return version_match.group(1)
- raise RuntimeError("Unable to find version string.")
- def get_hipcc_rocm_version():
- # Run the hipcc --version command
- result = subprocess.run(['hipcc', '--version'],
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- text=True)
- # Check if the command was executed successfully
- if result.returncode != 0:
- print("Error running 'hipcc --version'")
- return None
- # Extract the version using a regular expression
- match = re.search(r'HIP version: (\S+)', result.stdout)
- if match:
- # Return the version string
- return match.group(1)
- else:
- print("Could not find HIP version in the output")
- return None
- def get_nvcc_cuda_version() -> Version:
- """Get the CUDA version from nvcc.
- Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py
- """
- nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"],
- universal_newlines=True)
- output = nvcc_output.split()
- release_idx = output.index("release") + 1
- nvcc_cuda_version = parse(output[release_idx].split(",")[0])
- return nvcc_cuda_version
- def get_aphrodite_version() -> str:
- version = find_version(get_path("aphrodite", "__init__.py"))
- if _is_hip():
- # get the HIP version
- hipcc_version = get_hipcc_rocm_version()
- if hipcc_version != MAIN_CUDA_VERSION:
- rocm_version_str = hipcc_version.replace(".", "")[:3]
- version += f"+rocm{rocm_version_str}"
- else:
- cuda_version = str(get_nvcc_cuda_version())
- if cuda_version != MAIN_CUDA_VERSION:
- cuda_version_str = cuda_version.replace(".", "")[:3]
- version += f"+cu{cuda_version_str}"
- return version
- def read_readme() -> str:
- """Read the README file if present."""
- p = get_path("README.md")
- if os.path.isfile(p):
- return io.open(get_path("README.md"), "r", encoding="utf-8").read()
- else:
- return ""
- def get_requirements() -> List[str]:
- """Get Python package dependencies from requirements.txt."""
- if _is_hip():
- with open(get_path("requirements-rocm.txt")) as f:
- requirements = f.read().strip().split("\n")
- else:
- with open(get_path("requirements.txt")) as f:
- requirements = f.read().strip().split("\n")
- return requirements
- ext_modules = []
- if _is_cuda():
- ext_modules.append(CMakeExtension(name="aphrodite._moe_C"))
- if _install_punica():
- ext_modules.append(CMakeExtension(name="aphrodite._punica_C"))
- if _install_hadamard():
- ext_modules.append(CMakeExtension(name="aphrodite._hadamard_C"))
-
- ext_modules.append(CMakeExtension(name="aphrodite._C"))
- setup(
- name="aphrodite-engine",
- version=get_aphrodite_version(),
- author="PygmalionAI",
- license="AGPL 3.0",
- description="The inference engine for PygmalionAI models",
- long_description=read_readme(),
- long_description_content_type="text/markdown",
- url="https://github.com/PygmalionAI/aphrodite-engine",
- project_urls={
- "Homepage": "https://pygmalion.chat",
- "Documentation": "https://docs.pygmalion.chat",
- "GitHub": "https://github.com/PygmalionAI",
- "Huggingface": "https://huggingface.co/PygmalionAI",
- },
- classifiers=[
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
- "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", # noqa: E501
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
- ],
- packages=find_packages(exclude=("kernels", "examples",
- "tests")),
- python_requires=">=3.8",
- install_requires=get_requirements(),
- ext_modules=ext_modules,
- cmdclass={"build_ext": cmake_build_ext},
- package_data={
- "aphrodite": [
- "endpoints/kobold/klite.embd",
- "modeling/layers/quantization/hadamard.safetensors", "py.typed"
- ]
- },
- include_package_data=True,
- )
|