gtat-tech-career-kickstarte.../solution/tests/continuous_deployment/test_finder.py

250 lines
9.8 KiB
Python

import json
import logging
import os
import signal
import time
import subprocess
import venv
from datetime import datetime
from pathlib import Path
from typing import Optional
from dataclasses import dataclass
import jsonschema
logger = logging.getLogger(__name__)
DEPLOYMENT_CONFIG_SCHEMA_FILE: Path = Path(__file__).parent.parent.parent / "deployment_config_schema.json"
DEPLOYMENT_DIR: str = os.path.expanduser(os.environ.get("CK_DEPLOYMENT_DIR", "~/deployment"))
TESTS_DIR: str = os.path.expanduser(os.environ.get("CK_TESTS_DIR", "~/gtat-tech-career-kickstarter/solution/tests"))
TEST_STARTED_FLAG: str = "started.flag"
CHECK_INTERVAL: int = 30 # seconds
WORKERS_COUNT: int = 4
TEST_TIMEOUT: int = 20 * 60 # 20 minutes
GRACEFUL_STOP_TIMEOUT: int = 30 # seconds
@dataclass
class DeployedInstance:
user_dir: Path
deployment_dir: Path
wheel_file: Path
venv_dir: Optional[Path] = None
@property
def py(self) -> str:
assert self.venv_dir is not None
return str(self.venv_dir / "bin" / "python")
def __repr__(self) -> str:
return f"{self.user_dir}//{self.deployment_dir}"
def find_oldest_untested_deployment() -> Optional[DeployedInstance]:
oldest_deployment: Optional[DeployedInstance] = None
for user_dir in Path(DEPLOYMENT_DIR).iterdir():
if not user_dir.is_dir():
continue
for deployment_dir in user_dir.iterdir():
if not deployment_dir.is_dir():
continue
started_flag = deployment_dir / TEST_STARTED_FLAG
if started_flag.exists():
logger.debug(f"Skipping {deployment_dir} as it has already been marked as started.")
continue
wheels_files = list(deployment_dir.glob("*.whl"))
if not wheels_files:
logger.debug(f"Skipping {deployment_dir} as it does not contain any wheel files.")
continue
if len(wheels_files) > 1:
raise ValueError("Multiple wheel files found in the same directory.")
deployment = DeployedInstance(user_dir, deployment_dir, wheels_files[0])
if oldest_deployment is None or deployment_dir.stat().st_mtime < oldest_deployment.deployment_dir.stat().st_mtime:
oldest_deployment = deployment
return oldest_deployment
def create_venv(deployed_instance: DeployedInstance) -> None:
logger.info(f"Creating virtual environment for {deployed_instance}...")
deployed_instance.venv_dir = deployed_instance.deployment_dir / "venv"
venv.create(str(deployed_instance.venv_dir), with_pip=True)
logger.info(f"Virtual environment created for {deployed_instance}.")
def install_wheel(deployed_instance: DeployedInstance) -> None:
logger.info(f"Installing wheel {deployed_instance.wheel_file} into its virtual environment...")
subprocess.run([deployed_instance.py, "-m", "pip", "install", str(deployed_instance.wheel_file)], check=True)
logger.info(f"Wheel {deployed_instance.wheel_file} installed into its virtual environment.")
def _resolve_test_files(deployed_instance: DeployedInstance) -> list[Path]:
config_file = deployed_instance.deployment_dir / "deployment_config.json"
with config_file.open() as f:
config = json.load(f)
with DEPLOYMENT_CONFIG_SCHEMA_FILE.open() as f:
schema = json.load(f)
jsonschema.validate(instance=config, schema=schema)
system_tests = config["systemTests"]
test_files = [Path(TESTS_DIR) / f"test_{system_test}_system.py" for system_test in system_tests]
missing = [str(p) for p in test_files if not p.exists()]
if missing:
raise FileNotFoundError(f"Test files not found: {missing}")
return test_files
def run_pytest(deployed_instance: DeployedInstance) -> list[Path]:
logger.info(f"Running pytest for {deployed_instance}...")
test_files = _resolve_test_files(deployed_instance)
logger.info(f"Running system tests: {[str(f) for f in test_files]}")
junit_report_file = deployed_instance.deployment_dir / "test_results.xml"
final_report_file = deployed_instance.deployment_dir / "final_report.json"
deployment_config_file = deployed_instance.deployment_dir / "deployment_config.json"
command = [
deployed_instance.py, "-m", "pytest", *[str(f) for f in test_files],
f"-n {WORKERS_COUNT}",
"-W error::pytest.PytestUnhandledThreadExceptionWarning",
f"--venv-path={str(deployed_instance.venv_dir)}",
f"--deployment-config={str(deployment_config_file)}",
f"--junit-xml={junit_report_file}",
]
logger.debug(f"Running command: {command}")
process = subprocess.Popen(command, cwd=deployed_instance.deployment_dir, start_new_session=True)
logger.debug(f"Process started with PID: {process.pid}")
started_at = time.monotonic()
timed_out = False
try:
process.wait(timeout=TEST_TIMEOUT)
except subprocess.TimeoutExpired:
timed_out = True
logger.warning(f"Pytest process {process.pid} exceeded timeout ({TEST_TIMEOUT} seconds) for {deployed_instance}. Shutting down...")
_gracefully_stop_process(process, timeout=GRACEFUL_STOP_TIMEOUT)
testing_duration_ms = int((time.monotonic() - started_at) * 1000)
logger.debug(f"Testing done in {testing_duration_ms}ms")
_write_final_report(
final_report_file=final_report_file,
testing_duration_ms=testing_duration_ms,
testing_timed_out=timed_out,
)
logger.info(f"Testing completed for {deployed_instance}.")
return [junit_report_file, final_report_file]
def _write_final_report(final_report_file: Path, testing_duration_ms: int, testing_timed_out: bool) -> None:
report_content = {
"testing_duration_ms": testing_duration_ms,
"testing_timed_out": testing_timed_out,
}
with final_report_file.open("w", encoding="utf-8") as f:
json.dump(report_content, f)
logger.info(f"Final test run report written to {final_report_file}")
def _gracefully_stop_process(process: subprocess.Popen[bytes], timeout: int = GRACEFUL_STOP_TIMEOUT) -> int:
FORCE_KILL_TIMEOUT: int = 5 # seconds
if process.poll() is not None:
return process.returncode
logger.info(f"Sending SIGINT to process {process.pid}")
os.killpg(process.pid, signal.SIGINT)
try:
process.wait(timeout=timeout)
return process.returncode
except subprocess.TimeoutExpired:
logger.warning(
f"Process {process.pid} did not stop after SIGINT grace period ({timeout} seconds). "
"Sending SIGTERM..."
)
if process.poll() is not None:
return process.returncode
os.killpg(process.pid, signal.SIGTERM)
try:
process.wait(timeout=FORCE_KILL_TIMEOUT)
return process.returncode
except subprocess.TimeoutExpired:
logger.warning(f"Process {process.pid} still running after SIGTERM. Sending SIGKILL...")
if process.poll() is not None:
return process.returncode
os.killpg(process.pid, signal.SIGKILL)
process.wait(timeout=FORCE_KILL_TIMEOUT)
return process.returncode
def copy_test_results_to_dev_server(report_files: list[Path], dev_server_host: str) -> None:
logger.info(f"Copying test result files to dev server {dev_server_host}...")
for report_file in report_files:
subprocess.run(["scp", str(report_file), f"{dev_server_host}:/tmp/{report_file.name}"], check=True)
def copy_logs_to_dev_server(deployed_instance: DeployedInstance, dev_server_host: str) -> None:
logger.info(f"Copying logs to dev server {dev_server_host}...")
subprocess.run(["scp", str(deployed_instance.deployment_dir / "logs/*"), f"{dev_server_host}:/tmp/pytest.log"], check=True)
def main() -> None:
logger.info("Starting test runner...")
while True:
logger.info("Checking for untested deployments...")
untested_deployment = find_oldest_untested_deployment()
if untested_deployment:
logger.info(f"Found untested deployment in {untested_deployment.deployment_dir}")
started_flag = untested_deployment.deployment_dir / TEST_STARTED_FLAG
started_flag.touch()
try:
create_venv(untested_deployment)
install_wheel(untested_deployment)
except subprocess.CalledProcessError as e:
logger.exception(f"Error during installation: {e}")
continue
report_files = None
try:
report_files = run_pytest(untested_deployment)
except subprocess.SubprocessError as e:
logger.exception(f"Error while running tests: {e}")
continue
try:
username = untested_deployment.user_dir.name
if report_files is None:
logger.error("Report files are None, skipping copy to dev server.")
continue
copy_test_results_to_dev_server(report_files, username)
copy_logs_to_dev_server(untested_deployment, username)
logger.info(f"Test results copied to dev server for user: {username}")
except subprocess.CalledProcessError as e:
logger.exception(f"Error copying test results to dev server: {e}")
continue
else:
logger.info(f"No untested deployments found. Retrying in {CHECK_INTERVAL} seconds...")
time.sleep(CHECK_INTERVAL)
def setup_logging() -> None:
log_file_name = Path(__file__).parent / "logs" / f"test_finder_{datetime.now():%Y%m%d_%H%M%S}.log"
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler(log_file_name),
logging.StreamHandler()
]
)
logger.info(f"Logging setup complete. Log file: {log_file_name}")
if __name__ == "__main__":
setup_logging()
main()