gtat-tech-career-kickstarte.../solution/tests/continuous_deployment/test_finder.py

import json
import logging
import os
import signal
import time
import subprocess
import venv
from datetime import datetime
from pathlib import Path
from typing import Optional
from dataclasses import dataclass

import jsonschema

logger = logging.getLogger(__name__)


DEPLOYMENT_CONFIG_SCHEMA_FILE: Path = Path(__file__).parent.parent.parent / "deployment_config_schema.json"

DEPLOYMENT_DIR: str = os.path.expanduser(os.environ.get("CK_DEPLOYMENT_DIR", "~/deployment"))
TESTS_DIR: str = os.path.expanduser(os.environ.get("CK_TESTS_DIR", "~/gtat-tech-career-kickstarter/solution/tests"))
TEST_STARTED_FLAG: str = "started.flag"
CHECK_INTERVAL: int = 30  # seconds

WORKERS_COUNT: int = 4
TEST_TIMEOUT: int = 20 * 60  # 20 minutes
GRACEFUL_STOP_TIMEOUT: int = 30  # seconds


@dataclass
class DeployedInstance:
    user_dir: Path
    deployment_dir: Path
    wheel_file: Path
    venv_dir: Optional[Path] = None

    @property
    def py(self) -> str:
        assert self.venv_dir is not None
        return str(self.venv_dir / "bin" / "python")

    def __repr__(self) -> str:
        return f"{self.user_dir}//{self.deployment_dir}"


def find_oldest_untested_deployment() -> Optional[DeployedInstance]:
    oldest_deployment: Optional[DeployedInstance] = None
    for user_dir in Path(DEPLOYMENT_DIR).iterdir():
        if not user_dir.is_dir():
            continue

        for deployment_dir in user_dir.iterdir():
            if not deployment_dir.is_dir():
                continue

            started_flag = deployment_dir / TEST_STARTED_FLAG
            if started_flag.exists():
                logger.debug(f"Skipping {deployment_dir} as it has already been marked as started.")
                continue

            wheels_files = list(deployment_dir.glob("*.whl"))
            if not wheels_files:
                logger.debug(f"Skipping {deployment_dir} as it does not contain any wheel files.")
                continue

            if len(wheels_files) > 1:
                raise ValueError("Multiple wheel files found in the same directory.")

            deployment = DeployedInstance(user_dir, deployment_dir, wheels_files[0])
            if oldest_deployment is None or deployment_dir.stat().st_mtime < oldest_deployment.deployment_dir.stat().st_mtime:
                oldest_deployment = deployment

    return oldest_deployment

def create_venv(deployed_instance: DeployedInstance) -> None:
    logger.info(f"Creating virtual environment for {deployed_instance}...")
    deployed_instance.venv_dir = deployed_instance.deployment_dir / "venv"
    venv.create(str(deployed_instance.venv_dir), with_pip=True)
    logger.info(f"Virtual environment created for {deployed_instance}.")

def install_wheel(deployed_instance: DeployedInstance) -> None:
    logger.info(f"Installing wheel {deployed_instance.wheel_file} into its virtual environment...")
    subprocess.run([deployed_instance.py, "-m", "pip", "install", str(deployed_instance.wheel_file)], check=True)
    logger.info(f"Wheel {deployed_instance.wheel_file} installed into its virtual environment.")

def _resolve_test_files(deployed_instance: DeployedInstance) -> list[Path]:
    config_file = deployed_instance.deployment_dir / "deployment_config.json"
    with config_file.open() as f:
        config = json.load(f)
    with DEPLOYMENT_CONFIG_SCHEMA_FILE.open() as f:
        schema = json.load(f)
    jsonschema.validate(instance=config, schema=schema)
    system_tests = config["systemTests"]
    test_files = [Path(TESTS_DIR) / f"test_{system_test}_system.py" for system_test in system_tests]
    missing = [str(p) for p in test_files if not p.exists()]
    if missing:
        raise FileNotFoundError(f"Test files not found: {missing}")
    return test_files

def run_pytest(deployed_instance: DeployedInstance) -> list[Path]:
    logger.info(f"Running pytest for {deployed_instance}...")
    test_files = _resolve_test_files(deployed_instance)
    logger.info(f"Running system tests: {[str(f) for f in test_files]}")

    junit_report_file = deployed_instance.deployment_dir / "test_results.xml"
    final_report_file = deployed_instance.deployment_dir / "final_report.json"
    deployment_config_file = deployed_instance.deployment_dir / "deployment_config.json"
    command = [
        deployed_instance.py, "-m", "pytest", *[str(f) for f in test_files],
        f"-n {WORKERS_COUNT}",
        "-W error::pytest.PytestUnhandledThreadExceptionWarning",
        f"--venv-path={str(deployed_instance.venv_dir)}",
        f"--deployment-config={str(deployment_config_file)}",
        f"--junit-xml={junit_report_file}",
    ]

    logger.debug(f"Running command: {command}")
    process = subprocess.Popen(command, cwd=deployed_instance.deployment_dir, start_new_session=True)
    logger.debug(f"Process started with PID: {process.pid}")

    started_at = time.monotonic()
    timed_out = False
    try:
        process.wait(timeout=TEST_TIMEOUT)
    except subprocess.TimeoutExpired:
        timed_out = True
        logger.warning(f"Pytest process {process.pid} exceeded timeout ({TEST_TIMEOUT} seconds) for {deployed_instance}. Shutting down...")
        _gracefully_stop_process(process, timeout=GRACEFUL_STOP_TIMEOUT)

    testing_duration_ms = int((time.monotonic() - started_at) * 1000)
    logger.debug(f"Testing done in {testing_duration_ms}ms")

    _write_final_report(
        final_report_file=final_report_file,
        testing_duration_ms=testing_duration_ms,
        testing_timed_out=timed_out,
    )

    logger.info(f"Testing completed for {deployed_instance}.")
    return [junit_report_file, final_report_file]

def _write_final_report(final_report_file: Path, testing_duration_ms: int, testing_timed_out: bool) -> None:
    report_content = {
        "testing_duration_ms": testing_duration_ms,
        "testing_timed_out": testing_timed_out,
    }
    with final_report_file.open("w", encoding="utf-8") as f:
        json.dump(report_content, f)
    logger.info(f"Final test run report written to {final_report_file}")

def _gracefully_stop_process(process: subprocess.Popen[bytes], timeout: int = GRACEFUL_STOP_TIMEOUT) -> int:
    FORCE_KILL_TIMEOUT: int = 5 # seconds

    if process.poll() is not None:
        return process.returncode

    logger.info(f"Sending SIGINT to process {process.pid}")
    os.killpg(process.pid, signal.SIGINT)
    try:
        process.wait(timeout=timeout)
        return process.returncode
    except subprocess.TimeoutExpired:
        logger.warning(
            f"Process {process.pid} did not stop after SIGINT grace period ({timeout} seconds). "
            "Sending SIGTERM..."
        )

    if process.poll() is not None:
        return process.returncode

    os.killpg(process.pid, signal.SIGTERM)
    try:
        process.wait(timeout=FORCE_KILL_TIMEOUT)
        return process.returncode
    except subprocess.TimeoutExpired:
        logger.warning(f"Process {process.pid} still running after SIGTERM. Sending SIGKILL...")

    if process.poll() is not None:
        return process.returncode

    os.killpg(process.pid, signal.SIGKILL)
    process.wait(timeout=FORCE_KILL_TIMEOUT)
    return process.returncode

def copy_test_results_to_dev_server(report_files: list[Path], dev_server_host: str) -> None:
    logger.info(f"Copying test result files to dev server {dev_server_host}...")
    for report_file in report_files:
        subprocess.run(["scp", str(report_file), f"{dev_server_host}:/tmp/{report_file.name}"], check=True)

def copy_logs_to_dev_server(deployed_instance: DeployedInstance, dev_server_host: str) -> None:
    logger.info(f"Copying logs to dev server {dev_server_host}...")
    subprocess.run(["scp", str(deployed_instance.deployment_dir / "logs/*"), f"{dev_server_host}:/tmp/pytest.log"], check=True)

def main() -> None:
    logger.info("Starting test runner...")
    while True:
        logger.info("Checking for untested deployments...")
        untested_deployment = find_oldest_untested_deployment()
        if untested_deployment:
            logger.info(f"Found untested deployment in {untested_deployment.deployment_dir}")
            started_flag = untested_deployment.deployment_dir / TEST_STARTED_FLAG
            started_flag.touch()

            try:
                create_venv(untested_deployment)
                install_wheel(untested_deployment)
            except subprocess.CalledProcessError as e:
                logger.exception(f"Error during installation: {e}")
                continue

            report_files = None
            try:
                report_files = run_pytest(untested_deployment)
            except subprocess.SubprocessError as e:
                logger.exception(f"Error while running tests: {e}")
                continue

            try:
                username = untested_deployment.user_dir.name
                if report_files is None:
                    logger.error("Report files are None, skipping copy to dev server.")
                    continue
                copy_test_results_to_dev_server(report_files, username)
                copy_logs_to_dev_server(untested_deployment, username)
                logger.info(f"Test results copied to dev server for user: {username}")
            except subprocess.CalledProcessError as e:
                logger.exception(f"Error copying test results to dev server: {e}")
                continue
        else:
            logger.info(f"No untested deployments found. Retrying in {CHECK_INTERVAL} seconds...")

        time.sleep(CHECK_INTERVAL)

def setup_logging() -> None:
    log_file_name = Path(__file__).parent / "logs" / f"test_finder_{datetime.now():%Y%m%d_%H%M%S}.log"
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        handlers=[
            logging.FileHandler(log_file_name),
            logging.StreamHandler()
        ]
    )
    logger.info(f"Logging setup complete. Log file: {log_file_name}")


if __name__ == "__main__":
    setup_logging()
    main()