import json import logging import os import signal import time import subprocess import venv from datetime import datetime from pathlib import Path from typing import Optional from dataclasses import dataclass import jsonschema logger = logging.getLogger(__name__) DEPLOYMENT_CONFIG_SCHEMA_FILE: Path = Path(__file__).parent.parent.parent / "deployment_config_schema.json" DEPLOYMENT_DIR: str = os.path.expanduser(os.environ.get("CK_DEPLOYMENT_DIR", "~/deployment")) TESTS_DIR: str = os.path.expanduser(os.environ.get("CK_TESTS_DIR", "~/gtat-tech-career-kickstarter/solution/tests")) TEST_STARTED_FLAG: str = "started.flag" CHECK_INTERVAL: int = 30 # seconds WORKERS_COUNT: int = 4 TEST_TIMEOUT: int = 20 * 60 # 20 minutes GRACEFUL_STOP_TIMEOUT: int = 30 # seconds @dataclass class DeployedInstance: user_dir: Path deployment_dir: Path wheel_file: Path venv_dir: Optional[Path] = None @property def py(self) -> str: assert self.venv_dir is not None return str(self.venv_dir / "bin" / "python") def __repr__(self) -> str: return f"{self.user_dir}//{self.deployment_dir}" def find_oldest_untested_deployment() -> Optional[DeployedInstance]: oldest_deployment: Optional[DeployedInstance] = None for user_dir in Path(DEPLOYMENT_DIR).iterdir(): if not user_dir.is_dir(): continue for deployment_dir in user_dir.iterdir(): if not deployment_dir.is_dir(): continue started_flag = deployment_dir / TEST_STARTED_FLAG if started_flag.exists(): logger.debug(f"Skipping {deployment_dir} as it has already been marked as started.") continue wheels_files = list(deployment_dir.glob("*.whl")) if not wheels_files: logger.debug(f"Skipping {deployment_dir} as it does not contain any wheel files.") continue if len(wheels_files) > 1: raise ValueError("Multiple wheel files found in the same directory.") deployment = DeployedInstance(user_dir, deployment_dir, wheels_files[0]) if oldest_deployment is None or deployment_dir.stat().st_mtime < oldest_deployment.deployment_dir.stat().st_mtime: oldest_deployment = deployment return oldest_deployment def create_venv(deployed_instance: DeployedInstance) -> None: logger.info(f"Creating virtual environment for {deployed_instance}...") deployed_instance.venv_dir = deployed_instance.deployment_dir / "venv" venv.create(str(deployed_instance.venv_dir), with_pip=True) logger.info(f"Virtual environment created for {deployed_instance}.") def install_wheel(deployed_instance: DeployedInstance) -> None: logger.info(f"Installing wheel {deployed_instance.wheel_file} into its virtual environment...") subprocess.run([deployed_instance.py, "-m", "pip", "install", str(deployed_instance.wheel_file)], check=True) logger.info(f"Wheel {deployed_instance.wheel_file} installed into its virtual environment.") def _resolve_test_files(deployed_instance: DeployedInstance) -> list[Path]: config_file = deployed_instance.deployment_dir / "deployment_config.json" with config_file.open() as f: config = json.load(f) with DEPLOYMENT_CONFIG_SCHEMA_FILE.open() as f: schema = json.load(f) jsonschema.validate(instance=config, schema=schema) system_tests = config["systemTests"] test_files = [Path(TESTS_DIR) / f"test_{system_test}_system.py" for system_test in system_tests] missing = [str(p) for p in test_files if not p.exists()] if missing: raise FileNotFoundError(f"Test files not found: {missing}") return test_files def run_pytest(deployed_instance: DeployedInstance) -> list[Path]: logger.info(f"Running pytest for {deployed_instance}...") test_files = _resolve_test_files(deployed_instance) logger.info(f"Running system tests: {[str(f) for f in test_files]}") junit_report_file = deployed_instance.deployment_dir / "test_results.xml" final_report_file = deployed_instance.deployment_dir / "final_report.json" deployment_config_file = deployed_instance.deployment_dir / "deployment_config.json" command = [ deployed_instance.py, "-m", "pytest", *[str(f) for f in test_files], f"-n {WORKERS_COUNT}", "-W error::pytest.PytestUnhandledThreadExceptionWarning", f"--venv-path={str(deployed_instance.venv_dir)}", f"--deployment-config={str(deployment_config_file)}", f"--junit-xml={junit_report_file}", ] logger.debug(f"Running command: {command}") process = subprocess.Popen(command, cwd=deployed_instance.deployment_dir, start_new_session=True) logger.debug(f"Process started with PID: {process.pid}") started_at = time.monotonic() timed_out = False try: process.wait(timeout=TEST_TIMEOUT) except subprocess.TimeoutExpired: timed_out = True logger.warning(f"Pytest process {process.pid} exceeded timeout ({TEST_TIMEOUT} seconds) for {deployed_instance}. Shutting down...") _gracefully_stop_process(process, timeout=GRACEFUL_STOP_TIMEOUT) testing_duration_ms = int((time.monotonic() - started_at) * 1000) logger.debug(f"Testing done in {testing_duration_ms}ms") _write_final_report( final_report_file=final_report_file, testing_duration_ms=testing_duration_ms, testing_timed_out=timed_out, ) logger.info(f"Testing completed for {deployed_instance}.") return [junit_report_file, final_report_file] def _write_final_report(final_report_file: Path, testing_duration_ms: int, testing_timed_out: bool) -> None: report_content = { "testing_duration_ms": testing_duration_ms, "testing_timed_out": testing_timed_out, } with final_report_file.open("w", encoding="utf-8") as f: json.dump(report_content, f) logger.info(f"Final test run report written to {final_report_file}") def _gracefully_stop_process(process: subprocess.Popen[bytes], timeout: int = GRACEFUL_STOP_TIMEOUT) -> int: FORCE_KILL_TIMEOUT: int = 5 # seconds if process.poll() is not None: return process.returncode logger.info(f"Sending SIGINT to process {process.pid}") os.killpg(process.pid, signal.SIGINT) try: process.wait(timeout=timeout) return process.returncode except subprocess.TimeoutExpired: logger.warning( f"Process {process.pid} did not stop after SIGINT grace period ({timeout} seconds). " "Sending SIGTERM..." ) if process.poll() is not None: return process.returncode os.killpg(process.pid, signal.SIGTERM) try: process.wait(timeout=FORCE_KILL_TIMEOUT) return process.returncode except subprocess.TimeoutExpired: logger.warning(f"Process {process.pid} still running after SIGTERM. Sending SIGKILL...") if process.poll() is not None: return process.returncode os.killpg(process.pid, signal.SIGKILL) process.wait(timeout=FORCE_KILL_TIMEOUT) return process.returncode def copy_test_results_to_dev_server(report_files: list[Path], dev_server_host: str) -> None: logger.info(f"Copying test result files to dev server {dev_server_host}...") for report_file in report_files: subprocess.run(["scp", str(report_file), f"{dev_server_host}:/tmp/{report_file.name}"], check=True) def copy_logs_to_dev_server(deployed_instance: DeployedInstance, dev_server_host: str) -> None: logger.info(f"Copying logs to dev server {dev_server_host}...") subprocess.run(["scp", str(deployed_instance.deployment_dir / "logs/*"), f"{dev_server_host}:/tmp/pytest.log"], check=True) def main() -> None: logger.info("Starting test runner...") while True: logger.info("Checking for untested deployments...") untested_deployment = find_oldest_untested_deployment() if untested_deployment: logger.info(f"Found untested deployment in {untested_deployment.deployment_dir}") started_flag = untested_deployment.deployment_dir / TEST_STARTED_FLAG started_flag.touch() try: create_venv(untested_deployment) install_wheel(untested_deployment) except subprocess.CalledProcessError as e: logger.exception(f"Error during installation: {e}") continue report_files = None try: report_files = run_pytest(untested_deployment) except subprocess.SubprocessError as e: logger.exception(f"Error while running tests: {e}") continue try: username = untested_deployment.user_dir.name if report_files is None: logger.error("Report files are None, skipping copy to dev server.") continue copy_test_results_to_dev_server(report_files, username) copy_logs_to_dev_server(untested_deployment, username) logger.info(f"Test results copied to dev server for user: {username}") except subprocess.CalledProcessError as e: logger.exception(f"Error copying test results to dev server: {e}") continue else: logger.info(f"No untested deployments found. Retrying in {CHECK_INTERVAL} seconds...") time.sleep(CHECK_INTERVAL) def setup_logging() -> None: log_file_name = Path(__file__).parent / "logs" / f"test_finder_{datetime.now():%Y%m%d_%H%M%S}.log" logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler(log_file_name), logging.StreamHandler() ] ) logger.info(f"Logging setup complete. Log file: {log_file_name}") if __name__ == "__main__": setup_logging() main()