diff options
author | SooChan Lim <sc1.lim@samsung.com> | 2023-10-10 15:05:42 +0900 |
---|---|---|
committer | Xuelian Bai <xuelian.bai@samsung.com> | 2024-01-18 09:31:56 +0800 |
commit | 4e643fe7bf7c1e37078e69bc81a07176e4ff5780 (patch) | |
tree | 8593646c29ce51e4615161e57e8b4fe9bec2a46c /.gitlab-ci/lava | |
parent | 7a9577385e4bd0a54ec0cb12a509e0c47886294e (diff) | |
download | mesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.tar.gz mesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.tar.bz2 mesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.zip |
remove gitlab stuffs
These are useless.
Change-Id: I33405f08ccd0b3cccc5a737a099aa4adc2d011ce
Diffstat (limited to '.gitlab-ci/lava')
-rw-r--r-- | .gitlab-ci/lava/__init__.py | 0 | ||||
-rw-r--r-- | .gitlab-ci/lava/exceptions.py | 29 | ||||
-rwxr-xr-x | .gitlab-ci/lava/lava-gitlab-ci.yml | 157 | ||||
-rwxr-xr-x | .gitlab-ci/lava/lava-pytest.sh | 22 | ||||
-rwxr-xr-x | .gitlab-ci/lava/lava-submit.sh | 61 | ||||
-rwxr-xr-x | .gitlab-ci/lava/lava_job_submitter.py | 537 | ||||
-rw-r--r-- | .gitlab-ci/lava/requirements-test.txt | 6 | ||||
-rw-r--r-- | .gitlab-ci/lava/requirements.txt | 2 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/__init__.py | 18 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/console_format.py | 10 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/gitlab_section.py | 103 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/lava_farm.py | 35 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/lava_job.py | 186 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/lava_job_definition.py | 150 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/lava_log_hints.py | 43 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/lava_proxy.py | 44 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/log_follower.py | 310 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/log_section.py | 113 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/ssh_job_definition.py | 208 | ||||
-rw-r--r-- | .gitlab-ci/lava/utils/uart_job_definition.py | 171 |
20 files changed, 0 insertions, 2205 deletions
diff --git a/.gitlab-ci/lava/__init__.py b/.gitlab-ci/lava/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 --- a/.gitlab-ci/lava/__init__.py +++ /dev/null diff --git a/.gitlab-ci/lava/exceptions.py b/.gitlab-ci/lava/exceptions.py deleted file mode 100644 index f877b024510..00000000000 --- a/.gitlab-ci/lava/exceptions.py +++ /dev/null @@ -1,29 +0,0 @@ -from datetime import timedelta - - -class MesaCIException(Exception): - pass - - -class MesaCITimeoutError(MesaCIException): - def __init__(self, *args, timeout_duration: timedelta) -> None: - super().__init__(*args) - self.timeout_duration = timeout_duration - - -class MesaCIRetryError(MesaCIException): - def __init__(self, *args, retry_count: int, last_job: None) -> None: - super().__init__(*args) - self.retry_count = retry_count - self.last_job = last_job - - -class MesaCIParseException(MesaCIException): - pass - - -class MesaCIKnownIssueException(MesaCIException): - """Exception raised when the Mesa CI script finds something in the logs that - is known to cause the LAVA job to eventually fail""" - - pass diff --git a/.gitlab-ci/lava/lava-gitlab-ci.yml b/.gitlab-ci/lava/lava-gitlab-ci.yml deleted file mode 100755 index de589595a99..00000000000 --- a/.gitlab-ci/lava/lava-gitlab-ci.yml +++ /dev/null @@ -1,157 +0,0 @@ -variables: - LAVA_SSH_CLIENT_IMAGE: "${CI_REGISTRY_IMAGE}/alpine/x86_64_lava_ssh_client:${ALPINE_X86_64_LAVA_SSH_TAG}--${MESA_TEMPLATES_COMMIT}" - - -.lava-test: - # Cancel job if a newer commit is pushed to the same branch - interruptible: true - variables: - GIT_STRATEGY: none # testing doesn't build anything from source - FDO_CI_CONCURRENT: 6 # should be replaced by per-machine definitions - # proxy used to cache data locally - FDO_HTTP_CACHE_URI: "http://caching-proxy/cache/?uri=" - # base system generated by the container build job, shared between many pipelines - BASE_SYSTEM_HOST_PREFIX: "${S3_HOST}/mesa-lava" - BASE_SYSTEM_MAINLINE_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${FDO_UPSTREAM_REPO}/${DISTRIBUTION_TAG}/${DEBIAN_ARCH}" - BASE_SYSTEM_FORK_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${CI_PROJECT_PATH}/${DISTRIBUTION_TAG}/${DEBIAN_ARCH}" - # per-job build artifacts - JOB_ROOTFS_OVERLAY_PATH: "${JOB_ARTIFACTS_BASE}/job-rootfs-overlay.tar.gz" - JOB_RESULTS_PATH: "${JOB_ARTIFACTS_BASE}/results.tar.zst" - S3_ARTIFACT_NAME: "mesa-${ARCH}-default-debugoptimized" - S3_RESULTS_UPLOAD: "${JOB_ARTIFACTS_BASE}" - PIGLIT_NO_WINDOW: 1 - VISIBILITY_GROUP: "Collabora+fdo" - script: - - ./artifacts/lava/lava-submit.sh - artifacts: - name: "${CI_PROJECT_NAME}_${CI_JOB_NAME}" - when: always - paths: - - results/ - exclude: - - results/*.shader_cache - reports: - junit: results/junit.xml - tags: - - $RUNNER_TAG - after_script: - - curl -L --retry 4 -f --retry-all-errors --retry-delay 60 -s "https://${JOB_RESULTS_PATH}" | tar --zstd -x - needs: - - alpine/x86_64_lava_ssh_client - - !reference [.required-for-hardware-jobs, needs] - -.lava-test:arm32: - variables: - ARCH: arm32 - DEBIAN_ARCH: armhf - KERNEL_IMAGE_NAME: zImage - KERNEL_IMAGE_TYPE: "zimage" - BOOT_METHOD: u-boot - extends: - - .use-debian/arm64_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm32 - - .use-debian/x86_64_build - - .lava-test - - .use-kernel+rootfs-arm - needs: - - !reference [.lava-test, needs] - - kernel+rootfs_arm32 - - debian/x86_64_build - - debian-arm32 - -.lava-test-deqp:arm32: - extends: - - .lava-test:arm32 - variables: - HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" - -.lava-test:arm64: - variables: - ARCH: arm64 - DEBIAN_ARCH: arm64 - KERNEL_IMAGE_NAME: Image - KERNEL_IMAGE_TYPE: "image" - BOOT_METHOD: u-boot - extends: - - .use-debian/arm64_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm64 - - .use-debian/x86_64_build - - .lava-test - - .use-kernel+rootfs-arm - dependencies: - - debian-arm64 - needs: - - !reference [.lava-test, needs] - - kernel+rootfs_arm64 - - debian/x86_64_build - - debian-arm64 - -.lava-test-deqp:arm64: - variables: - HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" - extends: - - .lava-test:arm64 - -.lava-test:x86_64: - variables: - ARCH: x86_64 - DEBIAN_ARCH: amd64 - KERNEL_IMAGE_NAME: bzImage - KERNEL_IMAGE_TYPE: "zimage" - BOOT_METHOD: u-boot - extends: - - .use-debian/x86_64_build-base # for same $MESA_ARTIFACTS_BASE_TAG as in kernel+rootfs_x86_64 - - .use-debian/x86_64_build - - .lava-test - - .use-kernel+rootfs-x86_64 - needs: - - !reference [.lava-test, needs] - - kernel+rootfs_x86_64 - - debian-testing - -.lava-test-deqp:x86_64: - variables: - HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" - extends: - - .lava-test:x86_64 - -.lava-traces-base: - variables: - HWCI_TEST_SCRIPT: "/install/piglit/piglit-traces.sh" - # until we overcome Infrastructure issues, give traces extra 5 min before timeout - DEVICE_HANGING_TIMEOUT_SEC: 600 - artifacts: - reports: - junit: results/junit.xml - -.lava-piglit: - variables: - PIGLIT_REPLAY_DEVICE_NAME: "gl-${GPU_VERSION}" - PIGLIT_RESULTS: "${GPU_VERSION}-${PIGLIT_PROFILES}" - HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" - -.lava-piglit-traces:x86_64: - extends: - - .lava-test:x86_64 - - .lava-piglit - - .lava-traces-base - -.lava-piglit-traces:arm32: - extends: - - .lava-test:arm32 - - .lava-piglit - - .lava-traces-base - -.lava-piglit-traces:arm64: - extends: - - .lava-test:arm64 - - .lava-piglit - - .lava-traces-base - -.lava-piglit:x86_64: - extends: - - .lava-test:x86_64 - - .lava-piglit - -.lava-piglit:arm64: - extends: - - .lava-test:arm64 - - .lava-piglit diff --git a/.gitlab-ci/lava/lava-pytest.sh b/.gitlab-ci/lava/lava-pytest.sh deleted file mode 100755 index 786a669b917..00000000000 --- a/.gitlab-ci/lava/lava-pytest.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# SPDX-License-Identifier: MIT -# © Collabora Limited -# Author: Guilherme Gallo <guilherme.gallo@collabora.com> - -# This script runs unit/integration tests related with LAVA CI tools -# shellcheck disable=SC1091 # The relative paths in this file only become valid at runtime. - -set -ex - -# Use this script in a python virtualenv for isolation -python3 -m venv .venv -. .venv/bin/activate -python3 -m pip install --break-system-packages -r "${CI_PROJECT_DIR}/.gitlab-ci/lava/requirements-test.txt" - -TEST_DIR=${CI_PROJECT_DIR}/.gitlab-ci/tests - -PYTHONPATH="${TEST_DIR}:${PYTHONPATH}" python3 -m \ - pytest "${TEST_DIR}" \ - -W ignore::DeprecationWarning \ - --junitxml=artifacts/ci_scripts_report.xml \ - -m 'not slow' diff --git a/.gitlab-ci/lava/lava-submit.sh b/.gitlab-ci/lava/lava-submit.sh deleted file mode 100755 index e02bcb24cba..00000000000 --- a/.gitlab-ci/lava/lava-submit.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2086 # we want word splitting - -set -ex - -# If we run in the fork (not from mesa or Marge-bot), reuse mainline kernel and rootfs, if exist. -BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_MAINLINE_HOST_PATH}" -if [ "$CI_PROJECT_PATH" != "$FDO_UPSTREAM_REPO" ]; then - if ! curl -s -X HEAD -L --retry 4 -f --retry-delay 60 \ - "https://${BASE_SYSTEM_MAINLINE_HOST_PATH}/done"; then - echo "Using kernel and rootfs from the fork, cached from mainline is unavailable." - BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_FORK_HOST_PATH}" - else - echo "Using the cached mainline kernel and rootfs." - fi -fi - -rm -rf results -mkdir -p results/job-rootfs-overlay/ - -cp artifacts/ci-common/capture-devcoredump.sh results/job-rootfs-overlay/ -cp artifacts/ci-common/init-*.sh results/job-rootfs-overlay/ -cp artifacts/ci-common/intel-gpu-freq.sh results/job-rootfs-overlay/ -cp artifacts/ci-common/kdl.sh results/job-rootfs-overlay/ -cp "$SCRIPTS_DIR"/setup-test-env.sh results/job-rootfs-overlay/ - -# Prepare env vars for upload. -section_start variables "Variables passed through:" -artifacts/ci-common/generate-env.sh | tee results/job-rootfs-overlay/set-job-env-vars.sh -section_end variables - -tar zcf job-rootfs-overlay.tar.gz -C results/job-rootfs-overlay/ . -ci-fairy s3cp --token-file "${CI_JOB_JWT_FILE}" job-rootfs-overlay.tar.gz "https://${JOB_ROOTFS_OVERLAY_PATH}" - -ARTIFACT_URL="${FDO_HTTP_CACHE_URI:-}https://${PIPELINE_ARTIFACTS_BASE}/${S3_ARTIFACT_NAME:?}.tar.zst" - -touch results/lava.log -tail -f results/lava.log & -PYTHONPATH=artifacts/ artifacts/lava/lava_job_submitter.py \ - submit \ - --dump-yaml \ - --pipeline-info "$CI_JOB_NAME: $CI_PIPELINE_URL on $CI_COMMIT_REF_NAME ${CI_NODE_INDEX}/${CI_NODE_TOTAL}" \ - --rootfs-url-prefix "https://${BASE_SYSTEM_HOST_PATH}" \ - --kernel-url-prefix "${KERNEL_IMAGE_BASE}/${DEBIAN_ARCH}" \ - --build-url "${ARTIFACT_URL}" \ - --job-rootfs-overlay-url "${FDO_HTTP_CACHE_URI:-}https://${JOB_ROOTFS_OVERLAY_PATH}" \ - --job-timeout-min ${JOB_TIMEOUT:-30} \ - --first-stage-init artifacts/ci-common/init-stage1.sh \ - --ci-project-dir "${CI_PROJECT_DIR}" \ - --device-type "${DEVICE_TYPE}" \ - --dtb-filename "${DTB}" \ - --jwt-file "${CI_JOB_JWT_FILE}" \ - --kernel-image-name "${KERNEL_IMAGE_NAME}" \ - --kernel-image-type "${KERNEL_IMAGE_TYPE}" \ - --boot-method "${BOOT_METHOD}" \ - --visibility-group "${VISIBILITY_GROUP}" \ - --lava-tags "${LAVA_TAGS}" \ - --mesa-job-name "$CI_JOB_NAME" \ - --structured-log-file "results/lava_job_detail.json" \ - --ssh-client-image "${LAVA_SSH_CLIENT_IMAGE}" \ - >> results/lava.log diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py deleted file mode 100755 index b2d8e5306e7..00000000000 --- a/.gitlab-ci/lava/lava_job_submitter.py +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2020 - 2023 Collabora Limited -# Authors: -# Gustavo Padovan <gustavo.padovan@collabora.com> -# Guilherme Gallo <guilherme.gallo@collabora.com> -# -# SPDX-License-Identifier: MIT - -"""Send a job to LAVA, track it and collect log back""" - -import contextlib -import json -import pathlib -import sys -import time -from collections import defaultdict -from dataclasses import dataclass, fields -from datetime import datetime, timedelta -from io import StringIO -from os import environ, getenv, path -from typing import Any, Optional - -import fire -from lava.exceptions import ( - MesaCIException, - MesaCIParseException, - MesaCIRetryError, - MesaCITimeoutError, -) -from lava.utils import CONSOLE_LOG -from lava.utils import DEFAULT_GITLAB_SECTION_TIMEOUTS as GL_SECTION_TIMEOUTS -from lava.utils import ( - GitlabSection, - LAVAJob, - LogFollower, - LogSectionType, - call_proxy, - fatal_err, - generate_lava_job_definition, - hide_sensitive_data, - print_log, - setup_lava_proxy, -) -from lavacli.utils import flow_yaml as lava_yaml - -# Initialize structural logging with a defaultdict, it can be changed for more -# sophisticated dict-like data abstractions. -STRUCTURAL_LOG = defaultdict(list) - -try: - from ci.structured_logger import StructuredLogger -except ImportError as e: - print_log( - f"Could not import StructuredLogger library: {e}. " - "Falling back to defaultdict based structured logger." - ) - -# Timeout in seconds to decide if the device from the dispatched LAVA job has -# hung or not due to the lack of new log output. -DEVICE_HANGING_TIMEOUT_SEC = int(getenv("DEVICE_HANGING_TIMEOUT_SEC", 5*60)) - -# How many seconds the script should wait before try a new polling iteration to -# check if the dispatched LAVA job is running or waiting in the job queue. -WAIT_FOR_DEVICE_POLLING_TIME_SEC = int( - getenv("LAVA_WAIT_FOR_DEVICE_POLLING_TIME_SEC", 1) -) - -# How many seconds the script will wait to let LAVA finalize the job and give -# the final details. -WAIT_FOR_LAVA_POST_PROCESSING_SEC = int(getenv("LAVA_WAIT_LAVA_POST_PROCESSING_SEC", 5)) -WAIT_FOR_LAVA_POST_PROCESSING_RETRIES = int( - getenv("LAVA_WAIT_LAVA_POST_PROCESSING_RETRIES", 6) -) - -# How many seconds to wait between log output LAVA RPC calls. -LOG_POLLING_TIME_SEC = int(getenv("LAVA_LOG_POLLING_TIME_SEC", 5)) - -# How many retries should be made when a timeout happen. -NUMBER_OF_RETRIES_TIMEOUT_DETECTION = int( - getenv("LAVA_NUMBER_OF_RETRIES_TIMEOUT_DETECTION", 2) -) - - -def raise_exception_from_metadata(metadata: dict, job_id: int) -> None: - """ - Investigate infrastructure errors from the job metadata. - If it finds an error, raise it as MesaCIException. - """ - if "result" not in metadata or metadata["result"] != "fail": - return - if "error_type" in metadata: - error_type = metadata["error_type"] - if error_type == "Infrastructure": - raise MesaCIException( - f"LAVA job {job_id} failed with Infrastructure Error. Retry." - ) - if error_type == "Job": - # This happens when LAVA assumes that the job cannot terminate or - # with mal-formed job definitions. As we are always validating the - # jobs, only the former is probable to happen. E.g.: When some LAVA - # action timed out more times than expected in job definition. - raise MesaCIException( - f"LAVA job {job_id} failed with JobError " - "(possible LAVA timeout misconfiguration/bug). Retry." - ) - if "case" in metadata and metadata["case"] == "validate": - raise MesaCIException( - f"LAVA job {job_id} failed validation (possible download error). Retry." - ) - - -def raise_lava_error(job) -> None: - # Look for infrastructure errors, raise them, and retry if we see them. - results_yaml = call_proxy(job.proxy.results.get_testjob_results_yaml, job.job_id) - results = lava_yaml.load(results_yaml) - for res in results: - metadata = res["metadata"] - raise_exception_from_metadata(metadata, job.job_id) - - # If we reach this far, it means that the job ended without hwci script - # result and no LAVA infrastructure problem was found - job.status = "fail" - - -def show_final_job_data(job, colour=f"{CONSOLE_LOG['BOLD']}{CONSOLE_LOG['FG_GREEN']}"): - with GitlabSection( - "job_data", - "LAVA job info", - type=LogSectionType.LAVA_POST_PROCESSING, - start_collapsed=True, - colour=colour, - ): - wait_post_processing_retries: int = WAIT_FOR_LAVA_POST_PROCESSING_RETRIES - while not job.is_post_processed() and wait_post_processing_retries > 0: - # Wait a little until LAVA finishes processing metadata - time.sleep(WAIT_FOR_LAVA_POST_PROCESSING_SEC) - wait_post_processing_retries -= 1 - - if not job.is_post_processed(): - waited_for_sec: int = ( - WAIT_FOR_LAVA_POST_PROCESSING_RETRIES - * WAIT_FOR_LAVA_POST_PROCESSING_SEC - ) - print_log( - f"Waited for {waited_for_sec} seconds " - "for LAVA to post-process the job, it haven't finished yet. " - "Dumping it's info anyway" - ) - - details: dict[str, str] = job.show() - for field, value in details.items(): - print(f"{field:<15}: {value}") - job.refresh_log() - - -def fetch_logs(job, max_idle_time, log_follower) -> None: - is_job_hanging(job, max_idle_time) - - time.sleep(LOG_POLLING_TIME_SEC) - new_log_lines = fetch_new_log_lines(job) - parsed_lines = parse_log_lines(job, log_follower, new_log_lines) - - for line in parsed_lines: - print_log(line) - - -def is_job_hanging(job, max_idle_time): - # Poll to check for new logs, assuming that a prolonged period of - # silence means that the device has died and we should try it again - if datetime.now() - job.last_log_time > max_idle_time: - max_idle_time_min = max_idle_time.total_seconds() / 60 - - raise MesaCITimeoutError( - f"{CONSOLE_LOG['BOLD']}" - f"{CONSOLE_LOG['FG_YELLOW']}" - f"LAVA job {job.job_id} does not respond for {max_idle_time_min} " - "minutes. Retry." - f"{CONSOLE_LOG['RESET']}", - timeout_duration=max_idle_time, - ) - - -def parse_log_lines(job, log_follower, new_log_lines): - - if log_follower.feed(new_log_lines): - # If we had non-empty log data, we can assure that the device is alive. - job.heartbeat() - parsed_lines = log_follower.flush() - - # Only parse job results when the script reaches the end of the logs. - # Depending on how much payload the RPC scheduler.jobs.logs get, it may - # reach the LAVA_POST_PROCESSING phase. - if log_follower.current_section.type in ( - LogSectionType.TEST_CASE, - LogSectionType.LAVA_POST_PROCESSING, - ): - parsed_lines = job.parse_job_result_from_log(parsed_lines) - return parsed_lines - - -def fetch_new_log_lines(job): - - # The XMLRPC binary packet may be corrupted, causing a YAML scanner error. - # Retry the log fetching several times before exposing the error. - for _ in range(5): - with contextlib.suppress(MesaCIParseException): - new_log_lines = job.get_logs() - break - else: - raise MesaCIParseException - return new_log_lines - - -def submit_job(job): - try: - job.submit() - except Exception as mesa_ci_err: - raise MesaCIException( - f"Could not submit LAVA job. Reason: {mesa_ci_err}" - ) from mesa_ci_err - - -def wait_for_job_get_started(job): - print_log(f"Waiting for job {job.job_id} to start.") - while not job.is_started(): - time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC) - job.refresh_log() - print_log(f"Job {job.job_id} started.") - - -def bootstrap_log_follower() -> LogFollower: - gl = GitlabSection( - id="lava_boot", - header="LAVA boot", - type=LogSectionType.LAVA_BOOT, - start_collapsed=True, - ) - print(gl.start()) - return LogFollower(starting_section=gl) - - -def follow_job_execution(job, log_follower): - with log_follower: - max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC) - # Start to check job's health - job.heartbeat() - while not job.is_finished: - fetch_logs(job, max_idle_time, log_follower) - structural_log_phases(job, log_follower) - - # Mesa Developers expect to have a simple pass/fail job result. - # If this does not happen, it probably means a LAVA infrastructure error - # happened. - if job.status not in ["pass", "fail"]: - raise_lava_error(job) - - # LogFollower does some cleanup after the early exit (trigger by - # `hwci: pass|fail` regex), let's update the phases after the cleanup. - structural_log_phases(job, log_follower) - - -def structural_log_phases(job, log_follower): - phases: dict[str, Any] = { - s.header.split(" - ")[0]: { - k: str(getattr(s, k)) for k in ("start_time", "end_time") - } - for s in log_follower.section_history - } - job.log["dut_job_phases"] = phases - - -def print_job_final_status(job): - if job.status == "running": - job.status = "hung" - - color = LAVAJob.COLOR_STATUS_MAP.get(job.status, CONSOLE_LOG["FG_RED"]) - print_log( - f"{color}" - f"LAVA Job finished with status: {job.status}" - f"{CONSOLE_LOG['RESET']}" - ) - - job.refresh_log() - show_final_job_data(job, colour=f"{CONSOLE_LOG['BOLD']}{color}") - - -def execute_job_with_retries( - proxy, job_definition, retry_count, jobs_log -) -> Optional[LAVAJob]: - last_failed_job = None - for attempt_no in range(1, retry_count + 2): - # Need to get the logger value from its object to enable autosave - # features, if AutoSaveDict is enabled from StructuredLogging module - jobs_log.append({}) - job_log = jobs_log[-1] - job = LAVAJob(proxy, job_definition, job_log) - STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no - try: - job_log["submitter_start_time"] = datetime.now().isoformat() - submit_job(job) - wait_for_job_get_started(job) - log_follower: LogFollower = bootstrap_log_follower() - follow_job_execution(job, log_follower) - return job - - except (MesaCIException, KeyboardInterrupt) as exception: - job.handle_exception(exception) - - finally: - print_job_final_status(job) - # If LAVA takes too long to post process the job, the submitter - # gives up and proceeds. - job_log["submitter_end_time"] = datetime.now().isoformat() - last_failed_job = job - print_log( - f"{CONSOLE_LOG['BOLD']}" - f"Finished executing LAVA job in the attempt #{attempt_no}" - f"{CONSOLE_LOG['RESET']}" - ) - - return last_failed_job - - -def retriable_follow_job(proxy, job_definition) -> LAVAJob: - number_of_retries = NUMBER_OF_RETRIES_TIMEOUT_DETECTION - - last_attempted_job = execute_job_with_retries( - proxy, job_definition, number_of_retries, STRUCTURAL_LOG["dut_jobs"] - ) - - if last_attempted_job.exception is not None: - # Infra failed in all attempts - raise MesaCIRetryError( - f"{CONSOLE_LOG['BOLD']}" - f"{CONSOLE_LOG['FG_RED']}" - "Job failed after it exceeded the number of " - f"{number_of_retries} retries." - f"{CONSOLE_LOG['RESET']}", - retry_count=number_of_retries, - last_job=last_attempted_job, - ) - - return last_attempted_job - - -@dataclass -class PathResolver: - def __post_init__(self): - for field in fields(self): - value = getattr(self, field.name) - if not value: - continue - if field.type == pathlib.Path: - value = pathlib.Path(value) - setattr(self, field.name, value.resolve()) - - -@dataclass -class LAVAJobSubmitter(PathResolver): - boot_method: str - ci_project_dir: str - device_type: str - job_timeout_min: int # The job timeout in minutes - build_url: str = None - dtb_filename: str = None - dump_yaml: bool = False # Whether to dump the YAML payload to stdout - first_stage_init: str = None - jwt_file: pathlib.Path = None - kernel_image_name: str = None - kernel_image_type: str = "" - kernel_url_prefix: str = None - lava_tags: str = "" # Comma-separated LAVA tags for the job - mesa_job_name: str = "mesa_ci_job" - pipeline_info: str = "" - rootfs_url_prefix: str = None - validate_only: bool = False # Whether to only validate the job, not execute it - visibility_group: str = None # Only affects LAVA farm maintainers - job_rootfs_overlay_url: str = None - structured_log_file: pathlib.Path = None # Log file path with structured LAVA log - ssh_client_image: str = None # x86_64 SSH client image to follow the job's output - __structured_log_context = contextlib.nullcontext() # Structured Logger context - - def __post_init__(self) -> None: - super().__post_init__() - # Remove mesa job names with spaces, which breaks the lava-test-case command - self.mesa_job_name = self.mesa_job_name.split(" ")[0] - - if not self.structured_log_file: - return - - self.__structured_log_context = StructuredLoggerWrapper(self).logger_context() - self.proxy = setup_lava_proxy() - - def __prepare_submission(self) -> str: - # Overwrite the timeout for the testcases with the value offered by the - # user. The testcase running time should be at least 4 times greater than - # the other sections (boot and setup), so we can safely ignore them. - # If LAVA fails to stop the job at this stage, it will fall back to the - # script section timeout with a reasonable delay. - GL_SECTION_TIMEOUTS[LogSectionType.TEST_CASE] = timedelta( - minutes=self.job_timeout_min - ) - - job_definition = generate_lava_job_definition(self) - - if self.dump_yaml: - self.dump_job_definition(job_definition) - - validation_job = LAVAJob(self.proxy, job_definition) - if errors := validation_job.validate(): - fatal_err(f"Error in LAVA job definition: {errors}") - print_log("LAVA job definition validated successfully") - - return job_definition - - @classmethod - def is_under_ci(cls): - ci_envvar: str = getenv("CI", "false") - return ci_envvar.lower() == "true" - - def dump_job_definition(self, job_definition) -> None: - with GitlabSection( - "yaml_dump", - "LAVA job definition (YAML)", - type=LogSectionType.LAVA_BOOT, - start_collapsed=True, - ): - print(hide_sensitive_data(job_definition)) - - def submit(self) -> None: - """ - Prepares and submits the LAVA job. - If `validate_only` is True, it validates the job without submitting it. - If the job finishes with a non-pass status or encounters an exception, - the program exits with a non-zero return code. - """ - job_definition: str = self.__prepare_submission() - - if self.validate_only: - return - - with self.__structured_log_context: - last_attempt_job = None - try: - last_attempt_job = retriable_follow_job(self.proxy, job_definition) - - except MesaCIRetryError as retry_exception: - last_attempt_job = retry_exception.last_job - - except Exception as exception: - STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception) - raise exception - - finally: - self.finish_script(last_attempt_job) - - def print_log_artifact_url(self): - base_url = "https://$CI_PROJECT_ROOT_NAMESPACE.pages.freedesktop.org/" - artifacts_path = "-/$CI_PROJECT_NAME/-/jobs/$CI_JOB_ID/artifacts/" - relative_log_path = self.structured_log_file.relative_to(pathlib.Path.cwd()) - full_path = f"{base_url}{artifacts_path}{relative_log_path}" - artifact_url = path.expandvars(full_path) - - print_log(f"Structural Logging data available at: {artifact_url}") - - def finish_script(self, last_attempt_job): - if self.is_under_ci() and self.structured_log_file: - self.print_log_artifact_url() - - if not last_attempt_job: - # No job was run, something bad happened - STRUCTURAL_LOG["job_combined_status"] = "script_crash" - current_exception = str(sys.exc_info()[0]) - STRUCTURAL_LOG["job_combined_fail_reason"] = current_exception - raise SystemExit(1) - - STRUCTURAL_LOG["job_combined_status"] = last_attempt_job.status - - if last_attempt_job.status != "pass": - raise SystemExit(1) - - -class StructuredLoggerWrapper: - def __init__(self, submitter: LAVAJobSubmitter) -> None: - self.__submitter: LAVAJobSubmitter = submitter - - def _init_logger(self): - STRUCTURAL_LOG["fixed_tags"] = self.__submitter.lava_tags - STRUCTURAL_LOG["dut_job_type"] = self.__submitter.device_type - STRUCTURAL_LOG["job_combined_fail_reason"] = None - STRUCTURAL_LOG["job_combined_status"] = "not_submitted" - STRUCTURAL_LOG["dut_attempt_counter"] = 0 - - # Initialize dut_jobs list to enable appends - STRUCTURAL_LOG["dut_jobs"] = [] - - @contextlib.contextmanager - def _simple_logger_context(self): - log_file = pathlib.Path(self.__submitter.structured_log_file) - log_file.parent.mkdir(parents=True, exist_ok=True) - try: - # Truncate the file - log_file.write_text("") - yield - finally: - log_file.write_text(json.dumps(STRUCTURAL_LOG, indent=2)) - - def logger_context(self): - context = contextlib.nullcontext() - try: - - global STRUCTURAL_LOG - STRUCTURAL_LOG = StructuredLogger( - self.__submitter.structured_log_file, truncate=True - ).data - except NameError: - context = self._simple_logger_context() - - self._init_logger() - return context - - -if __name__ == "__main__": - # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us -> - # GitLab runner -> GitLab primary -> user, safe to say we don't need any - # more buffering - sys.stdout.reconfigure(line_buffering=True) - sys.stderr.reconfigure(line_buffering=True) - # LAVA farm is giving datetime in UTC timezone, let's set it locally for the - # script run. - # Setting environ here will not affect the system time, as the os.environ - # lifetime follows the script one. - environ["TZ"] = "UTC" - time.tzset() - - fire.Fire(LAVAJobSubmitter) diff --git a/.gitlab-ci/lava/requirements-test.txt b/.gitlab-ci/lava/requirements-test.txt deleted file mode 100644 index 0ff561db901..00000000000 --- a/.gitlab-ci/lava/requirements-test.txt +++ /dev/null @@ -1,6 +0,0 @@ --r requirements.txt -freezegun==1.1.0 -hypothesis==6.67.1 -pytest==7.2.1 -pytest-cov==3.0.0 -PyYAML==5.3.1 diff --git a/.gitlab-ci/lava/requirements.txt b/.gitlab-ci/lava/requirements.txt deleted file mode 100644 index e89021f3fd5..00000000000 --- a/.gitlab-ci/lava/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -lavacli==1.5.2 -fire==0.5.0 diff --git a/.gitlab-ci/lava/utils/__init__.py b/.gitlab-ci/lava/utils/__init__.py deleted file mode 100644 index 349d2b32561..00000000000 --- a/.gitlab-ci/lava/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from .console_format import CONSOLE_LOG -from .gitlab_section import GitlabSection -from .lava_job import LAVAJob -from .lava_job_definition import generate_lava_job_definition -from .lava_proxy import call_proxy, setup_lava_proxy -from .log_follower import ( - LogFollower, - fatal_err, - fix_lava_gitlab_section_log, - hide_sensitive_data, - print_log, -) -from .log_section import ( - DEFAULT_GITLAB_SECTION_TIMEOUTS, - FALLBACK_GITLAB_SECTION_TIMEOUT, - LogSection, - LogSectionType, -) diff --git a/.gitlab-ci/lava/utils/console_format.py b/.gitlab-ci/lava/utils/console_format.py deleted file mode 100644 index 3ad7600591b..00000000000 --- a/.gitlab-ci/lava/utils/console_format.py +++ /dev/null @@ -1,10 +0,0 @@ -CONSOLE_LOG = { - "FG_GREEN": "\x1b[1;32;5;197m", - "FG_RED": "\x1b[1;38;5;197m", - "FG_YELLOW": "\x1b[1;33;5;197m", - "FG_MAGENTA": "\x1b[1;35;5;197m", - "RESET": "\x1b[0m", - "UNDERLINED": "\x1b[3m", - "BOLD": "\x1b[1m", - "DIM": "\x1b[2m", -} diff --git a/.gitlab-ci/lava/utils/gitlab_section.py b/.gitlab-ci/lava/utils/gitlab_section.py deleted file mode 100644 index 034afb4eb33..00000000000 --- a/.gitlab-ci/lava/utils/gitlab_section.py +++ /dev/null @@ -1,103 +0,0 @@ -from __future__ import annotations - -import re -from dataclasses import dataclass, field -from datetime import datetime, timedelta -from typing import TYPE_CHECKING, Optional - -from lava.utils.console_format import CONSOLE_LOG - -if TYPE_CHECKING: - from lava.utils.log_section import LogSectionType - - -# TODO: Add section final status to assist with monitoring -@dataclass -class GitlabSection: - id: str - header: str - type: LogSectionType - start_collapsed: bool = False - escape: str = "\x1b[0K" - colour: str = f"{CONSOLE_LOG['BOLD']}{CONSOLE_LOG['FG_GREEN']}" - __start_time: Optional[datetime] = field(default=None, init=False) - __end_time: Optional[datetime] = field(default=None, init=False) - - @classmethod - def section_id_filter(cls, value) -> str: - return str(re.sub(r"[^\w_-]+", "-", value)) - - def __post_init__(self): - self.id = self.section_id_filter(self.id) - - @property - def has_started(self) -> bool: - return self.__start_time is not None - - @property - def has_finished(self) -> bool: - return self.__end_time is not None - - @property - def start_time(self) -> datetime: - return self.__start_time - - @property - def end_time(self) -> Optional[datetime]: - return self.__end_time - - def get_timestamp(self, time: datetime) -> str: - unix_ts = datetime.timestamp(time) - return str(int(unix_ts)) - - def section(self, marker: str, header: str, time: datetime) -> str: - preamble = f"{self.escape}section_{marker}" - collapse = marker == "start" and self.start_collapsed - collapsed = "[collapsed=true]" if collapse else "" - section_id = f"{self.id}{collapsed}" - - timestamp = self.get_timestamp(time) - before_header = ":".join([preamble, timestamp, section_id]) - colored_header = f"{self.colour}{header}\x1b[0m" if header else "" - header_wrapper = "\r" + f"{self.escape}{colored_header}" - - return f"{before_header}{header_wrapper}" - - def __str__(self) -> str: - status = "NS" if not self.has_started else "F" if self.has_finished else "IP" - delta = self.delta_time() - elapsed_time = "N/A" if delta is None else str(delta) - return ( - f"GitlabSection({self.id}, {self.header}, {self.type}, " - f"SC={self.start_collapsed}, S={status}, ST={self.start_time}, " - f"ET={self.end_time}, ET={elapsed_time})" - ) - - def __enter__(self): - print(self.start()) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - print(self.end()) - - def start(self) -> str: - assert not self.has_finished, "Starting an already finished section" - self.__start_time = datetime.now() - return self.section(marker="start", header=self.header, time=self.__start_time) - - def end(self) -> str: - assert self.has_started, "Ending an uninitialized section" - self.__end_time = datetime.now() - assert ( - self.__end_time >= self.__start_time - ), "Section execution time will be negative" - return self.section(marker="end", header="", time=self.__end_time) - - def delta_time(self) -> Optional[timedelta]: - if self.__start_time and self.__end_time: - return self.__end_time - self.__start_time - - if self.has_started: - return datetime.now() - self.__start_time - - return None diff --git a/.gitlab-ci/lava/utils/lava_farm.py b/.gitlab-ci/lava/utils/lava_farm.py deleted file mode 100644 index dfd51ab9b92..00000000000 --- a/.gitlab-ci/lava/utils/lava_farm.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -import re -from enum import Enum - - -class LavaFarm(Enum): - """Enum class representing the different LAVA farms.""" - - LIMA = 1 - COLLABORA = 2 - UNKNOWN = 3 - - -LAVA_FARM_RUNNER_PATTERNS: dict[LavaFarm, str] = { - # Lima pattern comes first, since it has the same prefix as the - # Collabora pattern. - LavaFarm.LIMA: r"^mesa-ci-[\x01-\x7F]+-lava-lima$", - LavaFarm.COLLABORA: r"^mesa-ci-[\x01-\x7F]+-lava-[\x01-\x7F]+$", - LavaFarm.UNKNOWN: r"^[\x01-\x7F]+", -} - - -def get_lava_farm() -> LavaFarm: - """ - Returns the LAVA farm based on the RUNNER_TAG environment variable. - - :return: The LAVA farm - """ - runner_tag: str = os.getenv("RUNNER_TAG", "unknown") - - for farm, pattern in LAVA_FARM_RUNNER_PATTERNS.items(): - if re.match(pattern, runner_tag): - return farm - - raise ValueError(f"Unknown LAVA runner tag: {runner_tag}") diff --git a/.gitlab-ci/lava/utils/lava_job.py b/.gitlab-ci/lava/utils/lava_job.py deleted file mode 100644 index b69f8b9fbb7..00000000000 --- a/.gitlab-ci/lava/utils/lava_job.py +++ /dev/null @@ -1,186 +0,0 @@ -import re -import xmlrpc -from collections import defaultdict -from datetime import datetime -from typing import Any, Optional - -from lava.exceptions import ( - MesaCIException, - MesaCIKnownIssueException, - MesaCIParseException, - MesaCITimeoutError, -) -from lava.utils import CONSOLE_LOG -from lava.utils.log_follower import print_log -from lavacli.utils import flow_yaml as lava_yaml - -from .lava_proxy import call_proxy - - -class LAVAJob: - COLOR_STATUS_MAP: dict[str, str] = { - "pass": CONSOLE_LOG["FG_GREEN"], - "hung": CONSOLE_LOG["FG_YELLOW"], - "fail": CONSOLE_LOG["FG_RED"], - "canceled": CONSOLE_LOG["FG_MAGENTA"], - } - - def __init__(self, proxy, definition, log=defaultdict(str)) -> None: - self._job_id = None - self.proxy = proxy - self.definition = definition - self.last_log_line = 0 - self.last_log_time = None - self._is_finished = False - self.log: dict[str, Any] = log - self.status = "not_submitted" - self.__exception: Optional[str] = None - - def heartbeat(self) -> None: - self.last_log_time: datetime = datetime.now() - self.status = "running" - - @property - def status(self) -> str: - return self._status - - @status.setter - def status(self, new_status: str) -> None: - self._status = new_status - self.log["status"] = self._status - - @property - def job_id(self) -> int: - return self._job_id - - @job_id.setter - def job_id(self, new_id: int) -> None: - self._job_id = new_id - self.log["lava_job_id"] = self._job_id - - @property - def is_finished(self) -> bool: - return self._is_finished - - @property - def exception(self) -> str: - return self.__exception - - @exception.setter - def exception(self, exception: Exception) -> None: - self.__exception = repr(exception) - self.log["dut_job_fail_reason"] = self.__exception - - def validate(self) -> Optional[dict]: - """Returns a dict with errors, if the validation fails. - - Returns: - Optional[dict]: a dict with the validation errors, if any - """ - return call_proxy(self.proxy.scheduler.jobs.validate, self.definition, True) - - def show(self) -> dict[str, str]: - return call_proxy(self.proxy.scheduler.jobs.show, self._job_id) - - def get_lava_time(self, key, data) -> Optional[str]: - return data[key].value if data[key] else None - - def refresh_log(self) -> None: - details = self.show() - self.log["dut_start_time"] = self.get_lava_time("start_time", details) - self.log["dut_submit_time"] = self.get_lava_time("submit_time", details) - self.log["dut_end_time"] = self.get_lava_time("end_time", details) - self.log["dut_name"] = details.get("device") - self.log["dut_state"] = details.get("state") - - def submit(self) -> bool: - try: - self.job_id = call_proxy(self.proxy.scheduler.jobs.submit, self.definition) - self.status = "submitted" - self.refresh_log() - except MesaCIException: - return False - return True - - def lava_state(self) -> str: - job_state: dict[str, str] = call_proxy( - self.proxy.scheduler.job_state, self._job_id - ) - return job_state["job_state"] - - def cancel(self): - if self._job_id: - self.proxy.scheduler.jobs.cancel(self._job_id) - # If we don't have yet set another job's status, let's update it - # with canceled one - if self.status == "running": - self.status = "canceled" - - def is_started(self) -> bool: - waiting_states = ("Submitted", "Scheduling", "Scheduled") - return self.lava_state() not in waiting_states - - def is_post_processed(self) -> bool: - return self.lava_state() != "Running" - - def _load_log_from_data(self, data) -> list[str]: - lines = [] - if isinstance(data, xmlrpc.client.Binary): - # We are dealing with xmlrpc.client.Binary - # Let's extract the data - data = data.data - # When there is no new log data, the YAML is empty - if loaded_lines := lava_yaml.load(data): - lines: list[str] = loaded_lines - self.last_log_line += len(lines) - return lines - - def get_logs(self) -> list[str]: - try: - (finished, data) = call_proxy( - self.proxy.scheduler.jobs.logs, self._job_id, self.last_log_line - ) - self._is_finished = finished - return self._load_log_from_data(data) - - except Exception as mesa_ci_err: - raise MesaCIParseException( - f"Could not get LAVA job logs. Reason: {mesa_ci_err}" - ) from mesa_ci_err - - def parse_job_result_from_log( - self, lava_lines: list[dict[str, str]] - ) -> list[dict[str, str]]: - """Use the console log to catch if the job has completed successfully or - not. Returns the list of log lines until the result line.""" - - last_line = None # Print all lines. lines[:None] == lines[:] - - for idx, line in enumerate(lava_lines): - if result := re.search(r"hwci: mesa: (pass|fail)", line): - self._is_finished = True - self.status = result[1] - - last_line = idx + 1 - # We reached the log end here. hwci script has finished. - break - return lava_lines[:last_line] - - def handle_exception(self, exception: Exception): - print_log(exception) - self.cancel() - self.exception = exception - - # Give more accurate status depending on exception - if isinstance(exception, MesaCIKnownIssueException): - self.status = "canceled" - elif isinstance(exception, MesaCITimeoutError): - self.status = "hung" - elif isinstance(exception, MesaCIException): - self.status = "failed" - elif isinstance(exception, KeyboardInterrupt): - self.status = "interrupted" - print_log("LAVA job submitter was interrupted. Cancelling the job.") - raise - else: - self.status = "job_submitter_error" diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py b/.gitlab-ci/lava/utils/lava_job_definition.py deleted file mode 100644 index c7b43658cb5..00000000000 --- a/.gitlab-ci/lava/utils/lava_job_definition.py +++ /dev/null @@ -1,150 +0,0 @@ -from io import StringIO -from typing import TYPE_CHECKING, Any - -import re -from lava.utils.lava_farm import LavaFarm, get_lava_farm -from ruamel.yaml.scalarstring import LiteralScalarString -from ruamel.yaml import YAML -from os import getenv - -if TYPE_CHECKING: - from lava.lava_job_submitter import LAVAJobSubmitter - -# How many attempts should be made when a timeout happen during LAVA device boot. -NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3)) - -# Supports any integers in [0, 100]. -# The scheduler considers the job priority when ordering the queue -# to consider which job should run next. -JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75)) - - -def has_ssh_support(job_submitter: "LAVAJobSubmitter") -> bool: - force_uart = bool(getenv("LAVA_FORCE_UART", False)) - - if force_uart: - return False - - # Only Collabora's farm supports to run docker container as a LAVA actions, - # which is required to follow the job in a SSH section - current_farm = get_lava_farm() - - # SSH job definition still needs to add support for fastboot. - job_uses_fastboot: bool = job_submitter.boot_method == "fastboot" - - return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot - - -def generate_lava_yaml_payload(job_submitter: "LAVAJobSubmitter") -> dict[str, Any]: - """ - Bridge function to use the supported job definition depending on some Mesa - CI job characteristics. - - The strategy here, is to use LAVA with a containerized SSH session to follow - the job output, escaping from dumping data to the UART, which proves to be - error prone in some devices. - """ - from lava.utils.ssh_job_definition import ( - generate_lava_yaml_payload as ssh_lava_yaml, - ) - from lava.utils.uart_job_definition import ( - generate_lava_yaml_payload as uart_lava_yaml, - ) - - if has_ssh_support(job_submitter): - return ssh_lava_yaml(job_submitter) - - return uart_lava_yaml(job_submitter) - - -def generate_lava_job_definition(job_submitter: "LAVAJobSubmitter") -> str: - job_stream = StringIO() - yaml = YAML() - yaml.width = 4096 - yaml.dump(generate_lava_yaml_payload(job_submitter), job_stream) - return job_stream.getvalue() - - -def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString: - def escape_envvar(match): - return "\\" + match.group(0) - - filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")] - final_str = "\n".join(filtered_array) - - for escape_var in escape_vars: - # Find env vars and add '\\' before them - final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str) - return LiteralScalarString(final_str) - - -def generate_metadata(args) -> dict[str, Any]: - # General metadata and permissions - values = { - "job_name": f"mesa: {args.pipeline_info}", - "device_type": args.device_type, - "visibility": {"group": [args.visibility_group]}, - "priority": JOB_PRIORITY, - "context": { - "extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k" - }, - "timeouts": { - "job": {"minutes": args.job_timeout_min}, - "actions": { - "depthcharge-retry": { - # Could take between 1 and 1.5 min in slower boots - "minutes": 4 - }, - "depthcharge-start": { - # Should take less than 1 min. - "minutes": 1, - }, - "depthcharge-action": { - # This timeout englobes the entire depthcharge timing, - # including retries - "minutes": 5 - * NUMBER_OF_ATTEMPTS_LAVA_BOOT, - }, - }, - }, - } - - if args.lava_tags: - values["tags"] = args.lava_tags.split(",") - - return values - - -def artifact_download_steps(args): - """ - This function is responsible for setting up the SSH server in the DUT and to - export the first boot environment to a file. - """ - # Putting JWT pre-processing and mesa download, within init-stage1.sh file, - # as we do with non-SSH version. - download_steps = [ - "set -ex", - "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " - f"{args.job_rootfs_overlay_url} | tar -xz -C /", - f"mkdir -p {args.ci_project_dir}", - f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {args.build_url} | " - f"tar --zstd -x -C {args.ci_project_dir}", - ] - - # If the JWT file is provided, we will use it to authenticate with the cloud - # storage provider and will hide it from the job output in Gitlab. - if args.jwt_file: - with open(args.jwt_file) as jwt_file: - download_steps += [ - "set +x # HIDE_START", - f'echo -n "{jwt_file.read()}" > "{args.jwt_file}"', - "set -x # HIDE_END", - f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh', - ] - else: - download_steps += [ - "echo Could not find jwt file, disabling S3 requests...", - "sed -i '/S3_RESULTS_UPLOAD/d' /set-job-env-vars.sh", - ] - - return download_steps diff --git a/.gitlab-ci/lava/utils/lava_log_hints.py b/.gitlab-ci/lava/utils/lava_log_hints.py deleted file mode 100644 index b147a8747ea..00000000000 --- a/.gitlab-ci/lava/utils/lava_log_hints.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -import re -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any - -if TYPE_CHECKING: - from lava.utils import LogFollower - -from lava.exceptions import MesaCIKnownIssueException -from lava.utils.console_format import CONSOLE_LOG -from lava.utils.log_section import LogSectionType - - -@dataclass -class LAVALogHints: - log_follower: LogFollower - has_r8152_issue_history: bool = field(default=False, init=False) - - def detect_failure(self, new_lines: list[dict[str, Any]]): - for line in new_lines: - self.detect_r8152_issue(line) - - def detect_r8152_issue(self, line): - if ( - self.log_follower.phase == LogSectionType.TEST_CASE - and line["lvl"] == "target" - ): - if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]): - self.has_r8152_issue_history = True - return - - if self.has_r8152_issue_history and re.search( - r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying", - line["msg"], - ): - raise MesaCIKnownIssueException( - f"{CONSOLE_LOG['FG_MAGENTA']}" - "Probable network issue failure encountered, retrying the job" - f"{CONSOLE_LOG['RESET']}" - ) - - self.has_r8152_issue_history = False diff --git a/.gitlab-ci/lava/utils/lava_proxy.py b/.gitlab-ci/lava/utils/lava_proxy.py deleted file mode 100644 index 581ec46038e..00000000000 --- a/.gitlab-ci/lava/utils/lava_proxy.py +++ /dev/null @@ -1,44 +0,0 @@ -import time -import traceback -import urllib -import urllib.parse -import xmlrpc -import xmlrpc.client - -import lavacli - -from .log_follower import fatal_err, print_log - - -def setup_lava_proxy(): - config = lavacli.load_config("default") - uri, usr, tok = (config.get(key) for key in ("uri", "username", "token")) - uri_obj = urllib.parse.urlparse(uri) - uri_str = f"{uri_obj.scheme}://{usr}:{tok}@{uri_obj.netloc}{uri_obj.path}" - transport = lavacli.RequestsTransport( - uri_obj.scheme, - config.get("proxy"), - config.get("timeout", 120.0), - config.get("verify_ssl_cert", True), - ) - proxy = xmlrpc.client.ServerProxy(uri_str, allow_none=True, transport=transport) - - print_log(f'Proxy for {config["uri"]} created.') - - return proxy - - -def call_proxy(fn, *args): - retries = 60 - for n in range(1, retries + 1): - try: - return fn(*args) - except xmlrpc.client.ProtocolError as err: - if n == retries: - traceback.print_exc() - fatal_err(f"A protocol error occurred (Err {err.errcode} {err.errmsg})") - else: - time.sleep(15) - except xmlrpc.client.Fault as err: - traceback.print_exc() - fatal_err(f"FATAL: Fault: {err.faultString} (code: {err.faultCode})", err) diff --git a/.gitlab-ci/lava/utils/log_follower.py b/.gitlab-ci/lava/utils/log_follower.py deleted file mode 100644 index 1fdf490bcb8..00000000000 --- a/.gitlab-ci/lava/utils/log_follower.py +++ /dev/null @@ -1,310 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (C) 2022 Collabora Limited -# Author: Guilherme Gallo <guilherme.gallo@collabora.com> -# -# SPDX-License-Identifier: MIT - -""" -Some utilities to analyse logs, create gitlab sections and other quality of life -improvements -""" - -import logging -import re -import sys -from dataclasses import dataclass, field -from datetime import datetime, timedelta -from typing import Optional, Union - -from lava.exceptions import MesaCITimeoutError -from lava.utils.console_format import CONSOLE_LOG -from lava.utils.gitlab_section import GitlabSection -from lava.utils.lava_farm import LavaFarm, get_lava_farm -from lava.utils.lava_log_hints import LAVALogHints -from lava.utils.log_section import ( - DEFAULT_GITLAB_SECTION_TIMEOUTS, - FALLBACK_GITLAB_SECTION_TIMEOUT, - LOG_SECTIONS, - LogSectionType, -) - - -@dataclass -class LogFollower: - starting_section: Optional[GitlabSection] = None - _current_section: Optional[GitlabSection] = None - section_history: list[GitlabSection] = field(default_factory=list, init=False) - timeout_durations: dict[LogSectionType, timedelta] = field( - default_factory=lambda: DEFAULT_GITLAB_SECTION_TIMEOUTS, - ) - fallback_timeout: timedelta = FALLBACK_GITLAB_SECTION_TIMEOUT - _buffer: list[str] = field(default_factory=list, init=False) - log_hints: LAVALogHints = field(init=False) - lava_farm: LavaFarm = field(init=False, default=get_lava_farm()) - _merge_next_line: str = field(default_factory=str, init=False) - - def __post_init__(self): - # Make it trigger current_section setter to populate section history - self.current_section = self.starting_section - section_is_created = bool(self._current_section) - section_has_started = bool( - self._current_section and self._current_section.has_started - ) - self.log_hints = LAVALogHints(self) - assert ( - section_is_created == section_has_started - ), "Can't follow logs beginning from uninitialized GitLab sections." - - # Initialize fix_lava_gitlab_section_log generator - self.gl_section_fix_gen = fix_lava_gitlab_section_log() - next(self.gl_section_fix_gen) - - @property - def current_section(self): - return self._current_section - - @current_section.setter - def current_section(self, new_section: GitlabSection) -> None: - if old_section := self._current_section: - self.section_history.append(old_section) - self._current_section = new_section - - @property - def phase(self) -> LogSectionType: - return ( - self._current_section.type - if self._current_section - else LogSectionType.UNKNOWN - ) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Cleanup existing buffer if this object gets out from the context""" - self.clear_current_section() - last_lines = self.flush() - for line in last_lines: - print(line) - - def watchdog(self): - if not self._current_section: - return - - timeout_duration = self.timeout_durations.get( - self._current_section.type, self.fallback_timeout - ) - - if self._current_section.delta_time() > timeout_duration: - raise MesaCITimeoutError( - f"Gitlab Section {self._current_section} has timed out", - timeout_duration=timeout_duration, - ) - - def clear_current_section(self): - if self._current_section and not self._current_section.has_finished: - self._buffer.append(self._current_section.end()) - self.current_section = None - - def update_section(self, new_section: GitlabSection): - # Sections can have redundant regex to find them to mitigate LAVA - # interleaving kmsg and stderr/stdout issue. - if self.current_section and self.current_section.id == new_section.id: - return - self.clear_current_section() - self.current_section = new_section - self._buffer.append(new_section.start()) - - def manage_gl_sections(self, line): - if isinstance(line["msg"], list): - logging.debug("Ignoring messages as list. Kernel dumps.") - return - - for log_section in LOG_SECTIONS: - if new_section := log_section.from_log_line_to_section(line): - self.update_section(new_section) - break - - def detect_kernel_dump_line(self, line: dict[str, Union[str, list]]) -> bool: - # line["msg"] can be a list[str] when there is a kernel dump - if isinstance(line["msg"], list): - return line["lvl"] == "debug" - - # result level has dict line["msg"] - if not isinstance(line["msg"], str): - return False - - # we have a line, check if it is a kernel message - if re.search(r"\[[\d\s]{5}\.[\d\s]{6}\] +\S{2,}", line["msg"]): - print_log(f"{CONSOLE_LOG['BOLD']}{line['msg']}{CONSOLE_LOG['RESET']}") - return True - - return False - - def remove_trailing_whitespace(self, line: dict[str, str]) -> None: - """ - Removes trailing whitespace from the end of the `msg` value in the log line dictionary. - - Args: - line: A dictionary representing a single log line. - - Note: - LAVA treats carriage return characters as a line break, so each carriage return in an output console - is mapped to a console line in LAVA. This method removes trailing `\r\n` characters from log lines. - """ - msg: Optional[str] = line.get("msg") - if not msg: - return False - - messages = [msg] if isinstance(msg, str) else msg - - for message in messages: - # LAVA logs brings raw messages, which includes newlines characters as \r\n. - line["msg"]: str = re.sub(r"\r\n$", "", message) - - def merge_carriage_return_lines(self, line: dict[str, str]) -> bool: - """ - Merges lines that end with a carriage return character into a single line. - - Args: - line: A dictionary representing a single log line. - - Returns: - A boolean indicating whether the current line has been merged with the next line. - - Note: - LAVA treats carriage return characters as a line break, so each carriage return in an output console - is mapped to a console line in LAVA. - """ - if line["msg"].endswith("\r"): - self._merge_next_line += line["msg"] - return True - - if self._merge_next_line: - line["msg"] = self._merge_next_line + line["msg"] - self._merge_next_line = "" - - return False - - - def feed(self, new_lines: list[dict[str, str]]) -> bool: - """Input data to be processed by LogFollower instance - Returns true if the DUT (device under test) seems to be alive. - """ - - self.watchdog() - - # No signal of job health in the log - is_job_healthy = False - - for line in new_lines: - self.remove_trailing_whitespace(line) - - if self.detect_kernel_dump_line(line): - continue - - if self.merge_carriage_return_lines(line): - continue - - # At least we are fed with a non-kernel dump log, it seems that the - # job is progressing - is_job_healthy = True - self.manage_gl_sections(line) - if parsed_line := self.parse_lava_line(line): - self._buffer.append(parsed_line) - - self.log_hints.detect_failure(new_lines) - - return is_job_healthy - - def flush(self) -> list[str]: - buffer = self._buffer - self._buffer = [] - return buffer - - def parse_lava_line(self, line) -> Optional[str]: - prefix = "" - suffix = "" - - if line["lvl"] in ["results", "feedback", "debug"]: - return - elif line["lvl"] in ["warning", "error"]: - prefix = CONSOLE_LOG["FG_RED"] - suffix = CONSOLE_LOG["RESET"] - elif line["lvl"] == "input": - prefix = "$ " - suffix = "" - elif line["lvl"] == "target" and self.lava_farm != LavaFarm.COLLABORA: - # gl_section_fix_gen will output the stored line if it can't find a - # match for the first split line - # So we can recover it and put it back to the buffer - if recovered_first_line := self.gl_section_fix_gen.send(line): - self._buffer.append(recovered_first_line) - - return f'{prefix}{line["msg"]}{suffix}' - -def fix_lava_gitlab_section_log(): - """This function is a temporary solution for the Gitlab section markers - splitting problem. Gitlab parses the following lines to define a collapsible - gitlab section in their log: - - \x1b[0Ksection_start:timestamp:section_id[collapsible=true/false]\r\x1b[0Ksection_header - - \x1b[0Ksection_end:timestamp:section_id\r\x1b[0K - There is some problem in message passing between the LAVA dispatcher and the - device under test (DUT), that replaces \r control characters into \n. When - this problem is fixed on the LAVA side, one should remove this function. - """ - while True: - line = yield False - first_line = None - split_line_pattern = re.compile(r"\x1b\[0K(section_\w+):(\d+):([^\s\r]+)$") - second_line_pattern = re.compile(r"\x1b\[0K([\S ]+)?") - - if not re.search(split_line_pattern, line["msg"]): - continue - - first_line = line["msg"] - # Delete the current line and hold this log line stream to be able to - # possibly merge it with the next line. - line["msg"] = "" - line = yield False - - # This code reached when we detect a possible first split line - if re.search(second_line_pattern, line["msg"]): - assert first_line - line["msg"] = f"{first_line}\r{line['msg']}" - else: - # The current line doesn't match with the previous one, send back the - # latter to give the user the chance to recover it. - yield first_line - - - -def print_log(msg: str, *args) -> None: - # Reset color from timestamp, since `msg` can tint the terminal color - print(f"{CONSOLE_LOG['RESET']}{datetime.now()}: {msg}", *args) - - -def fatal_err(msg, exception=None): - colored_msg = f"{CONSOLE_LOG['FG_RED']}" - print_log(colored_msg, f"{msg}", f"{CONSOLE_LOG['RESET']}") - if exception: - raise exception - sys.exit(1) - - -def hide_sensitive_data(yaml_data: str, start_hide: str = "HIDE_START", end_hide: str = "HIDE_END") -> str: - skip_line = False - dump_data: list[str] = [] - for line in yaml_data.splitlines(True): - if start_hide in line: - skip_line = True - elif end_hide in line: - skip_line = False - - if skip_line: - continue - - dump_data.append(line) - - return "".join(dump_data) diff --git a/.gitlab-ci/lava/utils/log_section.py b/.gitlab-ci/lava/utils/log_section.py deleted file mode 100644 index 25620a6155b..00000000000 --- a/.gitlab-ci/lava/utils/log_section.py +++ /dev/null @@ -1,113 +0,0 @@ -import re -from dataclasses import dataclass -from datetime import timedelta -from enum import Enum, auto -from os import getenv -from typing import Optional, Pattern, Union - -from lava.utils.gitlab_section import GitlabSection - - -class LogSectionType(Enum): - UNKNOWN = auto() - LAVA_BOOT = auto() - TEST_DUT_SUITE = auto() - TEST_SUITE = auto() - TEST_CASE = auto() - LAVA_POST_PROCESSING = auto() - - -# Empirically, successful device boot in LAVA time takes less than 3 -# minutes. -# LAVA itself is configured to attempt thrice to boot the device, -# summing up to 9 minutes. -# It is better to retry the boot than cancel the job and re-submit to avoid -# the enqueue delay. -LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 9)) - -# Test DUT suite phase is where the initialization happens in DUT, not on docker. -# The device will be listening to SSH session until the end of the job. -LAVA_TEST_DUT_SUITE_TIMEOUT = int(getenv("JOB_TIMEOUT", 60)) - -# Test suite phase is where the initialization happens on docker. -LAVA_TEST_SUITE_TIMEOUT = int(getenv("LAVA_TEST_SUITE_TIMEOUT", 5)) - -# Test cases may take a long time, this script has no right to interrupt -# them. But if the test case takes almost 1h, it will never succeed due to -# Gitlab job timeout. -LAVA_TEST_CASE_TIMEOUT = int(getenv("JOB_TIMEOUT", 60)) - -# LAVA post processing may refer to a test suite teardown, or the -# adjustments to start the next test_case -LAVA_POST_PROCESSING_TIMEOUT = int(getenv("LAVA_POST_PROCESSING_TIMEOUT", 5)) - -FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10) -DEFAULT_GITLAB_SECTION_TIMEOUTS = { - LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT), - LogSectionType.TEST_DUT_SUITE: timedelta(minutes=LAVA_TEST_DUT_SUITE_TIMEOUT), - LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT), - LogSectionType.TEST_CASE: timedelta(minutes=LAVA_TEST_CASE_TIMEOUT), - LogSectionType.LAVA_POST_PROCESSING: timedelta( - minutes=LAVA_POST_PROCESSING_TIMEOUT - ), -} - - -@dataclass(frozen=True) -class LogSection: - regex: Union[Pattern, str] - levels: tuple[str] - section_id: str - section_header: str - section_type: LogSectionType - collapsed: bool = False - - def from_log_line_to_section( - self, lava_log_line: dict[str, str] - ) -> Optional[GitlabSection]: - if lava_log_line["lvl"] not in self.levels: - return - - if match := re.search(self.regex, lava_log_line["msg"]): - section_id = self.section_id.format(*match.groups()) - section_header = self.section_header.format(*match.groups()) - timeout = DEFAULT_GITLAB_SECTION_TIMEOUTS[self.section_type] - return GitlabSection( - id=section_id, - header=f"{section_header} - Timeout: {timeout}", - type=self.section_type, - start_collapsed=self.collapsed, - ) - - -LOG_SECTIONS = ( - LogSection( - regex=re.compile(r"<?STARTTC>? ([^>]*)"), - levels=("target", "debug"), - section_id="{}", - section_header="test_case {}", - section_type=LogSectionType.TEST_CASE, - ), - LogSection( - regex=re.compile(r"<?STARTRUN>? ([^>]*ssh.*server.*)"), - levels=("debug"), - section_id="{}", - section_header="[dut] test_suite {}", - section_type=LogSectionType.TEST_DUT_SUITE, - ), - LogSection( - regex=re.compile(r"<?STARTRUN>? ([^>]*)"), - levels=("debug"), - section_id="{}", - section_header="[docker] test_suite {}", - section_type=LogSectionType.TEST_SUITE, - ), - LogSection( - regex=re.compile(r"ENDTC>? ([^>]+)"), - levels=("target", "debug"), - section_id="post-{}", - section_header="Post test_case {}", - collapsed=True, - section_type=LogSectionType.LAVA_POST_PROCESSING, - ), -) diff --git a/.gitlab-ci/lava/utils/ssh_job_definition.py b/.gitlab-ci/lava/utils/ssh_job_definition.py deleted file mode 100644 index 1308e5ca92a..00000000000 --- a/.gitlab-ci/lava/utils/ssh_job_definition.py +++ /dev/null @@ -1,208 +0,0 @@ -""" -In a few words: some devices in Mesa CI has problematic serial connection, they -may hang (become silent) intermittently. Every time it hangs for minutes, the -job is retried, causing delays in the overall pipeline executing, ultimately -blocking legit MRs to merge. - -To reduce reliance on UART, we explored LAVA features, such as running docker -containers as a test alongside the DUT one, to be able to create an SSH server -in the DUT the earliest possible and an SSH client in a docker container, to -establish a SSH session between both, allowing the console output to be passed -via SSH pseudo terminal, instead of relying in the error-prone UART. - -In more detail, we aim to use "export -p" to share the initial boot environment -with SSH LAVA test-cases. -The "init-stage1.sh" script handles tasks such as system mounting and network -setup, which are necessary for allocating a pseudo-terminal under "/dev/pts". -Although these chores are not required for establishing an SSH session, they are -essential for proper functionality to the target script given by HWCI_SCRIPT -environment variable. - -Therefore, we have divided the job definition into four parts: - -1. [DUT] Logging in to DUT and run the SSH server with root access. -2. [DUT] Running the "init-stage1.sh" script for the first SSH test case. -3. [DUT] Export the first boot environment to `/dut-env-vars.sh` file. -4. [SSH] Enabling the pseudo-terminal for colors and running the "init-stage2.sh" -script after sourcing "dut-env-vars.sh" again for the second SSH test case. -""" - - -from pathlib import Path -from typing import Any - -from .lava_job_definition import ( - NUMBER_OF_ATTEMPTS_LAVA_BOOT, - artifact_download_steps, - generate_metadata, - to_yaml_block, -) - -# Very early SSH server setup. Uses /dut_ready file to flag it is done. -SSH_SERVER_COMMANDS = { - "auto_login": { - "login_commands": [ - "dropbear -R -B", - "touch /dut_ready", - ], - "login_prompt": "ogin:", - # To login as root, the username should be empty - "username": "", - } -} - -# TODO: Extract this inline script to a shell file, like we do with -# init-stage[12].sh -# The current way is difficult to maintain because one has to deal with escaping -# characters for both Python and the resulting job definition YAML. -# Plus, it always good to lint bash scripts with shellcheck. -DOCKER_COMMANDS = [ - """set -ex -timeout 1m bash << EOF -while [ -z "$(lava-target-ip)" ]; do - echo Waiting for DUT to join LAN; - sleep 1; -done -EOF - -ping -c 5 -w 60 $(lava-target-ip) - -lava_ssh_test_case() { - set -x - local test_case="${1}" - shift - lava-test-case \"${test_case}\" --shell \\ - ssh ${SSH_PTY_ARGS:--T} \\ - -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\ - root@$(lava-target-ip) \"${@}\" -}""", -] - - -def generate_dut_test(args): - # Commands executed on DUT. - # Trying to execute the minimal number of commands, because the console data is - # retrieved via UART, which is hang-prone in some devices. - - first_stage_steps: list[str] = Path(args.first_stage_init).read_text().splitlines() - return { - "namespace": "dut", - "definitions": [ - { - "from": "inline", - "name": "setup-ssh-server", - "path": "inline-setup-ssh-server", - "repository": { - "metadata": { - "format": "Lava-Test Test Definition 1.0", - "name": "dut-env-export", - }, - "run": { - "steps": [ - to_yaml_block(first_stage_steps), - "export -p > /dut-env-vars.sh", # Exporting the first boot environment - ], - }, - }, - } - ], - } - - -def generate_docker_test(args): - # This is a growing list of commands that will be executed by the docker - # guest, which will be the SSH client. - docker_commands = [] - - # LAVA test wrapping Mesa CI job in a SSH session. - init_stages_test = { - "namespace": "container", - "timeout": {"minutes": args.job_timeout_min}, - "failure_retry": 3, - "definitions": [ - { - "name": "docker_ssh_client", - "from": "inline", - "path": "inline/docker_ssh_client.yaml", - "repository": { - "metadata": { - "name": "mesa", - "description": "Mesa test plan", - "format": "Lava-Test Test Definition 1.0", - }, - "run": {"steps": docker_commands}, - }, - } - ], - "docker": { - "image": args.ssh_client_image, - }, - } - - docker_commands += [ - to_yaml_block(DOCKER_COMMANDS, escape_vars=["LAVA_TARGET_IP"]), - "lava_ssh_test_case 'wait_for_dut_login' << EOF", - "while [ ! -e /dut_ready ]; do sleep 1; done;", - "EOF", - to_yaml_block( - ( - "lava_ssh_test_case 'artifact_download' 'bash --' << EOF", - "source /dut-env-vars.sh", - *artifact_download_steps(args), - "EOF", - ) - ), - "export SSH_PTY_ARGS=-tt", - # Putting CI_JOB name as the testcase name, it may help LAVA farm - # maintainers with monitoring - f"lava_ssh_test_case 'mesa-ci_{args.mesa_job_name}' " - # Changing directory to /, as the HWCI_SCRIPT expects that - "'\"cd / && /init-stage2.sh\"'", - ] - - return init_stages_test - - -def generate_lava_yaml_payload(args) -> dict[str, Any]: - values = generate_metadata(args) - - # URLs to our kernel rootfs to boot from, both generated by the base - # container build - deploy = { - "namespace": "dut", - "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "timeout": {"minutes": 10}, - "timeouts": {"http-download": {"minutes": 2}}, - "to": "tftp", - "os": "oe", - "kernel": {"url": f"{args.kernel_url_prefix}/{args.kernel_image_name}"}, - "nfsrootfs": { - "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst", - "compression": "zstd", - }, - } - if args.kernel_image_type: - deploy["kernel"]["type"] = args.kernel_image_type - if args.dtb_filename: - deploy["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"} - - # always boot over NFS - boot = { - "namespace": "dut", - "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "method": args.boot_method, - "commands": "nfs", - "prompts": ["lava-shell:"], - **SSH_SERVER_COMMANDS, - } - - # only declaring each job as a single 'test' since LAVA's test parsing is - # not useful to us - values["actions"] = [ - {"deploy": deploy}, - {"boot": boot}, - {"test": generate_dut_test(args)}, - {"test": generate_docker_test(args)}, - ] - - return values diff --git a/.gitlab-ci/lava/utils/uart_job_definition.py b/.gitlab-ci/lava/utils/uart_job_definition.py deleted file mode 100644 index cd239c3215f..00000000000 --- a/.gitlab-ci/lava/utils/uart_job_definition.py +++ /dev/null @@ -1,171 +0,0 @@ -from typing import Any -from .lava_job_definition import ( - generate_metadata, - NUMBER_OF_ATTEMPTS_LAVA_BOOT, - artifact_download_steps, -) - - -def generate_lava_yaml_payload(args) -> dict[str, Any]: - values = generate_metadata(args) - - # URLs to our kernel rootfs to boot from, both generated by the base - # container build - - nfsrootfs = { - "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst", - "compression": "zstd", - } - - fastboot_deploy_nfs = { - "timeout": {"minutes": 10}, - "to": "nfs", - "nfsrootfs": nfsrootfs, - } - - fastboot_deploy_prepare = { - "timeout": {"minutes": 5}, - "to": "downloads", - "os": "oe", - "images": { - "kernel": { - "url": f"{args.kernel_url_prefix}/{args.kernel_image_name}", - }, - }, - "postprocess": { - "docker": { - "image": "registry.gitlab.collabora.com/lava/health-check-docker", - "steps": [ - f"cat Image.gz {args.dtb_filename}.dtb > Image.gz+dtb", - "mkbootimg --kernel Image.gz+dtb" - + ' --cmdline "root=/dev/nfs rw nfsroot=$NFS_SERVER_IP:$NFS_ROOTFS,tcp,hard rootwait ip=dhcp init=/init"' - + " --pagesize 4096 --base 0x80000000 -o boot.img", - ], - } - }, - } - if args.kernel_image_type: - fastboot_deploy_prepare["images"]["kernel"]["type"] = args.kernel_image_type - if args.dtb_filename: - fastboot_deploy_prepare["images"]["dtb"] = { - "url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb" - } - - tftp_deploy = { - "timeout": {"minutes": 5}, - "to": "tftp", - "os": "oe", - "kernel": { - "url": f"{args.kernel_url_prefix}/{args.kernel_image_name}", - }, - "nfsrootfs": nfsrootfs, - } - if args.kernel_image_type: - tftp_deploy["kernel"]["type"] = args.kernel_image_type - if args.dtb_filename: - tftp_deploy["dtb"] = { - "url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb" - } - - fastboot_deploy = { - "timeout": {"minutes": 2}, - "to": "fastboot", - "docker": { - "image": "registry.gitlab.collabora.com/lava/health-check-docker", - }, - "images": { - "boot": {"url": "downloads://boot.img"}, - }, - } - - fastboot_boot = { - "timeout": {"minutes": 2}, - "docker": {"image": "registry.gitlab.collabora.com/lava/health-check-docker"}, - "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "method": args.boot_method, - "prompts": ["lava-shell:"], - "commands": ["set_active a"], - } - - tftp_boot = { - "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, - "method": args.boot_method, - "prompts": ["lava-shell:"], - "commands": "nfs", - } - - # skeleton test definition: only declaring each job as a single 'test' - # since LAVA's test parsing is not useful to us - run_steps = [] - test = { - "timeout": {"minutes": args.job_timeout_min}, - "failure_retry": 1, - "definitions": [ - { - "name": "mesa", - "from": "inline", - "lava-signal": "kmsg", - "path": "inline/mesa.yaml", - "repository": { - "metadata": { - "name": "mesa", - "description": "Mesa test plan", - "os": ["oe"], - "scope": ["functional"], - "format": "Lava-Test Test Definition 1.0", - }, - "run": {"steps": run_steps}, - }, - } - ], - } - - # job execution script: - # - inline .gitlab-ci/common/init-stage1.sh - # - fetch and unpack per-pipeline build artifacts from build job - # - fetch and unpack per-job environment from lava-submit.sh - # - exec .gitlab-ci/common/init-stage2.sh - - with open(args.first_stage_init, "r") as init_sh: - run_steps += [ - x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip() - ] - # We cannot distribute the Adreno 660 shader firmware inside rootfs, - # since the license isn't bundled inside the repository - if args.device_type == "sm8350-hdk": - run_steps.append( - "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 " - + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn " - + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"' - ) - - run_steps += artifact_download_steps(args) - - run_steps += [ - f"mkdir -p {args.ci_project_dir}", - f"curl {args.build_url} | tar --zstd -x -C {args.ci_project_dir}", - # Sleep a bit to give time for bash to dump shell xtrace messages into - # console which may cause interleaving with LAVA_SIGNAL_STARTTC in some - # devices like a618. - "sleep 1", - # Putting CI_JOB name as the testcase name, it may help LAVA farm - # maintainers with monitoring - f"lava-test-case 'mesa-ci_{args.mesa_job_name}' --shell /init-stage2.sh", - ] - - if args.boot_method == "fastboot": - values["actions"] = [ - {"deploy": fastboot_deploy_nfs}, - {"deploy": fastboot_deploy_prepare}, - {"deploy": fastboot_deploy}, - {"boot": fastboot_boot}, - {"test": test}, - ] - else: # tftp - values["actions"] = [ - {"deploy": tftp_deploy}, - {"boot": tftp_boot}, - {"test": test}, - ] - - return values |