summaryrefslogtreecommitdiff
path: root/.gitlab-ci/lava
diff options
context:
space:
mode:
authorSooChan Lim <sc1.lim@samsung.com>2023-10-10 15:05:42 +0900
committerXuelian Bai <xuelian.bai@samsung.com>2024-01-18 09:31:56 +0800
commit4e643fe7bf7c1e37078e69bc81a07176e4ff5780 (patch)
tree8593646c29ce51e4615161e57e8b4fe9bec2a46c /.gitlab-ci/lava
parent7a9577385e4bd0a54ec0cb12a509e0c47886294e (diff)
downloadmesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.tar.gz
mesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.tar.bz2
mesa-4e643fe7bf7c1e37078e69bc81a07176e4ff5780.zip
remove gitlab stuffs
These are useless. Change-Id: I33405f08ccd0b3cccc5a737a099aa4adc2d011ce
Diffstat (limited to '.gitlab-ci/lava')
-rw-r--r--.gitlab-ci/lava/__init__.py0
-rw-r--r--.gitlab-ci/lava/exceptions.py29
-rwxr-xr-x.gitlab-ci/lava/lava-gitlab-ci.yml157
-rwxr-xr-x.gitlab-ci/lava/lava-pytest.sh22
-rwxr-xr-x.gitlab-ci/lava/lava-submit.sh61
-rwxr-xr-x.gitlab-ci/lava/lava_job_submitter.py537
-rw-r--r--.gitlab-ci/lava/requirements-test.txt6
-rw-r--r--.gitlab-ci/lava/requirements.txt2
-rw-r--r--.gitlab-ci/lava/utils/__init__.py18
-rw-r--r--.gitlab-ci/lava/utils/console_format.py10
-rw-r--r--.gitlab-ci/lava/utils/gitlab_section.py103
-rw-r--r--.gitlab-ci/lava/utils/lava_farm.py35
-rw-r--r--.gitlab-ci/lava/utils/lava_job.py186
-rw-r--r--.gitlab-ci/lava/utils/lava_job_definition.py150
-rw-r--r--.gitlab-ci/lava/utils/lava_log_hints.py43
-rw-r--r--.gitlab-ci/lava/utils/lava_proxy.py44
-rw-r--r--.gitlab-ci/lava/utils/log_follower.py310
-rw-r--r--.gitlab-ci/lava/utils/log_section.py113
-rw-r--r--.gitlab-ci/lava/utils/ssh_job_definition.py208
-rw-r--r--.gitlab-ci/lava/utils/uart_job_definition.py171
20 files changed, 0 insertions, 2205 deletions
diff --git a/.gitlab-ci/lava/__init__.py b/.gitlab-ci/lava/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/.gitlab-ci/lava/__init__.py
+++ /dev/null
diff --git a/.gitlab-ci/lava/exceptions.py b/.gitlab-ci/lava/exceptions.py
deleted file mode 100644
index f877b024510..00000000000
--- a/.gitlab-ci/lava/exceptions.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from datetime import timedelta
-
-
-class MesaCIException(Exception):
- pass
-
-
-class MesaCITimeoutError(MesaCIException):
- def __init__(self, *args, timeout_duration: timedelta) -> None:
- super().__init__(*args)
- self.timeout_duration = timeout_duration
-
-
-class MesaCIRetryError(MesaCIException):
- def __init__(self, *args, retry_count: int, last_job: None) -> None:
- super().__init__(*args)
- self.retry_count = retry_count
- self.last_job = last_job
-
-
-class MesaCIParseException(MesaCIException):
- pass
-
-
-class MesaCIKnownIssueException(MesaCIException):
- """Exception raised when the Mesa CI script finds something in the logs that
- is known to cause the LAVA job to eventually fail"""
-
- pass
diff --git a/.gitlab-ci/lava/lava-gitlab-ci.yml b/.gitlab-ci/lava/lava-gitlab-ci.yml
deleted file mode 100755
index de589595a99..00000000000
--- a/.gitlab-ci/lava/lava-gitlab-ci.yml
+++ /dev/null
@@ -1,157 +0,0 @@
-variables:
- LAVA_SSH_CLIENT_IMAGE: "${CI_REGISTRY_IMAGE}/alpine/x86_64_lava_ssh_client:${ALPINE_X86_64_LAVA_SSH_TAG}--${MESA_TEMPLATES_COMMIT}"
-
-
-.lava-test:
- # Cancel job if a newer commit is pushed to the same branch
- interruptible: true
- variables:
- GIT_STRATEGY: none # testing doesn't build anything from source
- FDO_CI_CONCURRENT: 6 # should be replaced by per-machine definitions
- # proxy used to cache data locally
- FDO_HTTP_CACHE_URI: "http://caching-proxy/cache/?uri="
- # base system generated by the container build job, shared between many pipelines
- BASE_SYSTEM_HOST_PREFIX: "${S3_HOST}/mesa-lava"
- BASE_SYSTEM_MAINLINE_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${FDO_UPSTREAM_REPO}/${DISTRIBUTION_TAG}/${DEBIAN_ARCH}"
- BASE_SYSTEM_FORK_HOST_PATH: "${BASE_SYSTEM_HOST_PREFIX}/${CI_PROJECT_PATH}/${DISTRIBUTION_TAG}/${DEBIAN_ARCH}"
- # per-job build artifacts
- JOB_ROOTFS_OVERLAY_PATH: "${JOB_ARTIFACTS_BASE}/job-rootfs-overlay.tar.gz"
- JOB_RESULTS_PATH: "${JOB_ARTIFACTS_BASE}/results.tar.zst"
- S3_ARTIFACT_NAME: "mesa-${ARCH}-default-debugoptimized"
- S3_RESULTS_UPLOAD: "${JOB_ARTIFACTS_BASE}"
- PIGLIT_NO_WINDOW: 1
- VISIBILITY_GROUP: "Collabora+fdo"
- script:
- - ./artifacts/lava/lava-submit.sh
- artifacts:
- name: "${CI_PROJECT_NAME}_${CI_JOB_NAME}"
- when: always
- paths:
- - results/
- exclude:
- - results/*.shader_cache
- reports:
- junit: results/junit.xml
- tags:
- - $RUNNER_TAG
- after_script:
- - curl -L --retry 4 -f --retry-all-errors --retry-delay 60 -s "https://${JOB_RESULTS_PATH}" | tar --zstd -x
- needs:
- - alpine/x86_64_lava_ssh_client
- - !reference [.required-for-hardware-jobs, needs]
-
-.lava-test:arm32:
- variables:
- ARCH: arm32
- DEBIAN_ARCH: armhf
- KERNEL_IMAGE_NAME: zImage
- KERNEL_IMAGE_TYPE: "zimage"
- BOOT_METHOD: u-boot
- extends:
- - .use-debian/arm64_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm32
- - .use-debian/x86_64_build
- - .lava-test
- - .use-kernel+rootfs-arm
- needs:
- - !reference [.lava-test, needs]
- - kernel+rootfs_arm32
- - debian/x86_64_build
- - debian-arm32
-
-.lava-test-deqp:arm32:
- extends:
- - .lava-test:arm32
- variables:
- HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
-
-.lava-test:arm64:
- variables:
- ARCH: arm64
- DEBIAN_ARCH: arm64
- KERNEL_IMAGE_NAME: Image
- KERNEL_IMAGE_TYPE: "image"
- BOOT_METHOD: u-boot
- extends:
- - .use-debian/arm64_build # for same $MESA_ARTIFACTS_TAG as in kernel+rootfs_arm64
- - .use-debian/x86_64_build
- - .lava-test
- - .use-kernel+rootfs-arm
- dependencies:
- - debian-arm64
- needs:
- - !reference [.lava-test, needs]
- - kernel+rootfs_arm64
- - debian/x86_64_build
- - debian-arm64
-
-.lava-test-deqp:arm64:
- variables:
- HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
- extends:
- - .lava-test:arm64
-
-.lava-test:x86_64:
- variables:
- ARCH: x86_64
- DEBIAN_ARCH: amd64
- KERNEL_IMAGE_NAME: bzImage
- KERNEL_IMAGE_TYPE: "zimage"
- BOOT_METHOD: u-boot
- extends:
- - .use-debian/x86_64_build-base # for same $MESA_ARTIFACTS_BASE_TAG as in kernel+rootfs_x86_64
- - .use-debian/x86_64_build
- - .lava-test
- - .use-kernel+rootfs-x86_64
- needs:
- - !reference [.lava-test, needs]
- - kernel+rootfs_x86_64
- - debian-testing
-
-.lava-test-deqp:x86_64:
- variables:
- HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
- extends:
- - .lava-test:x86_64
-
-.lava-traces-base:
- variables:
- HWCI_TEST_SCRIPT: "/install/piglit/piglit-traces.sh"
- # until we overcome Infrastructure issues, give traces extra 5 min before timeout
- DEVICE_HANGING_TIMEOUT_SEC: 600
- artifacts:
- reports:
- junit: results/junit.xml
-
-.lava-piglit:
- variables:
- PIGLIT_REPLAY_DEVICE_NAME: "gl-${GPU_VERSION}"
- PIGLIT_RESULTS: "${GPU_VERSION}-${PIGLIT_PROFILES}"
- HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
-
-.lava-piglit-traces:x86_64:
- extends:
- - .lava-test:x86_64
- - .lava-piglit
- - .lava-traces-base
-
-.lava-piglit-traces:arm32:
- extends:
- - .lava-test:arm32
- - .lava-piglit
- - .lava-traces-base
-
-.lava-piglit-traces:arm64:
- extends:
- - .lava-test:arm64
- - .lava-piglit
- - .lava-traces-base
-
-.lava-piglit:x86_64:
- extends:
- - .lava-test:x86_64
- - .lava-piglit
-
-.lava-piglit:arm64:
- extends:
- - .lava-test:arm64
- - .lava-piglit
diff --git a/.gitlab-ci/lava/lava-pytest.sh b/.gitlab-ci/lava/lava-pytest.sh
deleted file mode 100755
index 786a669b917..00000000000
--- a/.gitlab-ci/lava/lava-pytest.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-# SPDX-License-Identifier: MIT
-# © Collabora Limited
-# Author: Guilherme Gallo <guilherme.gallo@collabora.com>
-
-# This script runs unit/integration tests related with LAVA CI tools
-# shellcheck disable=SC1091 # The relative paths in this file only become valid at runtime.
-
-set -ex
-
-# Use this script in a python virtualenv for isolation
-python3 -m venv .venv
-. .venv/bin/activate
-python3 -m pip install --break-system-packages -r "${CI_PROJECT_DIR}/.gitlab-ci/lava/requirements-test.txt"
-
-TEST_DIR=${CI_PROJECT_DIR}/.gitlab-ci/tests
-
-PYTHONPATH="${TEST_DIR}:${PYTHONPATH}" python3 -m \
- pytest "${TEST_DIR}" \
- -W ignore::DeprecationWarning \
- --junitxml=artifacts/ci_scripts_report.xml \
- -m 'not slow'
diff --git a/.gitlab-ci/lava/lava-submit.sh b/.gitlab-ci/lava/lava-submit.sh
deleted file mode 100755
index e02bcb24cba..00000000000
--- a/.gitlab-ci/lava/lava-submit.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-# shellcheck disable=SC2086 # we want word splitting
-
-set -ex
-
-# If we run in the fork (not from mesa or Marge-bot), reuse mainline kernel and rootfs, if exist.
-BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_MAINLINE_HOST_PATH}"
-if [ "$CI_PROJECT_PATH" != "$FDO_UPSTREAM_REPO" ]; then
- if ! curl -s -X HEAD -L --retry 4 -f --retry-delay 60 \
- "https://${BASE_SYSTEM_MAINLINE_HOST_PATH}/done"; then
- echo "Using kernel and rootfs from the fork, cached from mainline is unavailable."
- BASE_SYSTEM_HOST_PATH="${BASE_SYSTEM_FORK_HOST_PATH}"
- else
- echo "Using the cached mainline kernel and rootfs."
- fi
-fi
-
-rm -rf results
-mkdir -p results/job-rootfs-overlay/
-
-cp artifacts/ci-common/capture-devcoredump.sh results/job-rootfs-overlay/
-cp artifacts/ci-common/init-*.sh results/job-rootfs-overlay/
-cp artifacts/ci-common/intel-gpu-freq.sh results/job-rootfs-overlay/
-cp artifacts/ci-common/kdl.sh results/job-rootfs-overlay/
-cp "$SCRIPTS_DIR"/setup-test-env.sh results/job-rootfs-overlay/
-
-# Prepare env vars for upload.
-section_start variables "Variables passed through:"
-artifacts/ci-common/generate-env.sh | tee results/job-rootfs-overlay/set-job-env-vars.sh
-section_end variables
-
-tar zcf job-rootfs-overlay.tar.gz -C results/job-rootfs-overlay/ .
-ci-fairy s3cp --token-file "${CI_JOB_JWT_FILE}" job-rootfs-overlay.tar.gz "https://${JOB_ROOTFS_OVERLAY_PATH}"
-
-ARTIFACT_URL="${FDO_HTTP_CACHE_URI:-}https://${PIPELINE_ARTIFACTS_BASE}/${S3_ARTIFACT_NAME:?}.tar.zst"
-
-touch results/lava.log
-tail -f results/lava.log &
-PYTHONPATH=artifacts/ artifacts/lava/lava_job_submitter.py \
- submit \
- --dump-yaml \
- --pipeline-info "$CI_JOB_NAME: $CI_PIPELINE_URL on $CI_COMMIT_REF_NAME ${CI_NODE_INDEX}/${CI_NODE_TOTAL}" \
- --rootfs-url-prefix "https://${BASE_SYSTEM_HOST_PATH}" \
- --kernel-url-prefix "${KERNEL_IMAGE_BASE}/${DEBIAN_ARCH}" \
- --build-url "${ARTIFACT_URL}" \
- --job-rootfs-overlay-url "${FDO_HTTP_CACHE_URI:-}https://${JOB_ROOTFS_OVERLAY_PATH}" \
- --job-timeout-min ${JOB_TIMEOUT:-30} \
- --first-stage-init artifacts/ci-common/init-stage1.sh \
- --ci-project-dir "${CI_PROJECT_DIR}" \
- --device-type "${DEVICE_TYPE}" \
- --dtb-filename "${DTB}" \
- --jwt-file "${CI_JOB_JWT_FILE}" \
- --kernel-image-name "${KERNEL_IMAGE_NAME}" \
- --kernel-image-type "${KERNEL_IMAGE_TYPE}" \
- --boot-method "${BOOT_METHOD}" \
- --visibility-group "${VISIBILITY_GROUP}" \
- --lava-tags "${LAVA_TAGS}" \
- --mesa-job-name "$CI_JOB_NAME" \
- --structured-log-file "results/lava_job_detail.json" \
- --ssh-client-image "${LAVA_SSH_CLIENT_IMAGE}" \
- >> results/lava.log
diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py
deleted file mode 100755
index b2d8e5306e7..00000000000
--- a/.gitlab-ci/lava/lava_job_submitter.py
+++ /dev/null
@@ -1,537 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (C) 2020 - 2023 Collabora Limited
-# Authors:
-# Gustavo Padovan <gustavo.padovan@collabora.com>
-# Guilherme Gallo <guilherme.gallo@collabora.com>
-#
-# SPDX-License-Identifier: MIT
-
-"""Send a job to LAVA, track it and collect log back"""
-
-import contextlib
-import json
-import pathlib
-import sys
-import time
-from collections import defaultdict
-from dataclasses import dataclass, fields
-from datetime import datetime, timedelta
-from io import StringIO
-from os import environ, getenv, path
-from typing import Any, Optional
-
-import fire
-from lava.exceptions import (
- MesaCIException,
- MesaCIParseException,
- MesaCIRetryError,
- MesaCITimeoutError,
-)
-from lava.utils import CONSOLE_LOG
-from lava.utils import DEFAULT_GITLAB_SECTION_TIMEOUTS as GL_SECTION_TIMEOUTS
-from lava.utils import (
- GitlabSection,
- LAVAJob,
- LogFollower,
- LogSectionType,
- call_proxy,
- fatal_err,
- generate_lava_job_definition,
- hide_sensitive_data,
- print_log,
- setup_lava_proxy,
-)
-from lavacli.utils import flow_yaml as lava_yaml
-
-# Initialize structural logging with a defaultdict, it can be changed for more
-# sophisticated dict-like data abstractions.
-STRUCTURAL_LOG = defaultdict(list)
-
-try:
- from ci.structured_logger import StructuredLogger
-except ImportError as e:
- print_log(
- f"Could not import StructuredLogger library: {e}. "
- "Falling back to defaultdict based structured logger."
- )
-
-# Timeout in seconds to decide if the device from the dispatched LAVA job has
-# hung or not due to the lack of new log output.
-DEVICE_HANGING_TIMEOUT_SEC = int(getenv("DEVICE_HANGING_TIMEOUT_SEC", 5*60))
-
-# How many seconds the script should wait before try a new polling iteration to
-# check if the dispatched LAVA job is running or waiting in the job queue.
-WAIT_FOR_DEVICE_POLLING_TIME_SEC = int(
- getenv("LAVA_WAIT_FOR_DEVICE_POLLING_TIME_SEC", 1)
-)
-
-# How many seconds the script will wait to let LAVA finalize the job and give
-# the final details.
-WAIT_FOR_LAVA_POST_PROCESSING_SEC = int(getenv("LAVA_WAIT_LAVA_POST_PROCESSING_SEC", 5))
-WAIT_FOR_LAVA_POST_PROCESSING_RETRIES = int(
- getenv("LAVA_WAIT_LAVA_POST_PROCESSING_RETRIES", 6)
-)
-
-# How many seconds to wait between log output LAVA RPC calls.
-LOG_POLLING_TIME_SEC = int(getenv("LAVA_LOG_POLLING_TIME_SEC", 5))
-
-# How many retries should be made when a timeout happen.
-NUMBER_OF_RETRIES_TIMEOUT_DETECTION = int(
- getenv("LAVA_NUMBER_OF_RETRIES_TIMEOUT_DETECTION", 2)
-)
-
-
-def raise_exception_from_metadata(metadata: dict, job_id: int) -> None:
- """
- Investigate infrastructure errors from the job metadata.
- If it finds an error, raise it as MesaCIException.
- """
- if "result" not in metadata or metadata["result"] != "fail":
- return
- if "error_type" in metadata:
- error_type = metadata["error_type"]
- if error_type == "Infrastructure":
- raise MesaCIException(
- f"LAVA job {job_id} failed with Infrastructure Error. Retry."
- )
- if error_type == "Job":
- # This happens when LAVA assumes that the job cannot terminate or
- # with mal-formed job definitions. As we are always validating the
- # jobs, only the former is probable to happen. E.g.: When some LAVA
- # action timed out more times than expected in job definition.
- raise MesaCIException(
- f"LAVA job {job_id} failed with JobError "
- "(possible LAVA timeout misconfiguration/bug). Retry."
- )
- if "case" in metadata and metadata["case"] == "validate":
- raise MesaCIException(
- f"LAVA job {job_id} failed validation (possible download error). Retry."
- )
-
-
-def raise_lava_error(job) -> None:
- # Look for infrastructure errors, raise them, and retry if we see them.
- results_yaml = call_proxy(job.proxy.results.get_testjob_results_yaml, job.job_id)
- results = lava_yaml.load(results_yaml)
- for res in results:
- metadata = res["metadata"]
- raise_exception_from_metadata(metadata, job.job_id)
-
- # If we reach this far, it means that the job ended without hwci script
- # result and no LAVA infrastructure problem was found
- job.status = "fail"
-
-
-def show_final_job_data(job, colour=f"{CONSOLE_LOG['BOLD']}{CONSOLE_LOG['FG_GREEN']}"):
- with GitlabSection(
- "job_data",
- "LAVA job info",
- type=LogSectionType.LAVA_POST_PROCESSING,
- start_collapsed=True,
- colour=colour,
- ):
- wait_post_processing_retries: int = WAIT_FOR_LAVA_POST_PROCESSING_RETRIES
- while not job.is_post_processed() and wait_post_processing_retries > 0:
- # Wait a little until LAVA finishes processing metadata
- time.sleep(WAIT_FOR_LAVA_POST_PROCESSING_SEC)
- wait_post_processing_retries -= 1
-
- if not job.is_post_processed():
- waited_for_sec: int = (
- WAIT_FOR_LAVA_POST_PROCESSING_RETRIES
- * WAIT_FOR_LAVA_POST_PROCESSING_SEC
- )
- print_log(
- f"Waited for {waited_for_sec} seconds "
- "for LAVA to post-process the job, it haven't finished yet. "
- "Dumping it's info anyway"
- )
-
- details: dict[str, str] = job.show()
- for field, value in details.items():
- print(f"{field:<15}: {value}")
- job.refresh_log()
-
-
-def fetch_logs(job, max_idle_time, log_follower) -> None:
- is_job_hanging(job, max_idle_time)
-
- time.sleep(LOG_POLLING_TIME_SEC)
- new_log_lines = fetch_new_log_lines(job)
- parsed_lines = parse_log_lines(job, log_follower, new_log_lines)
-
- for line in parsed_lines:
- print_log(line)
-
-
-def is_job_hanging(job, max_idle_time):
- # Poll to check for new logs, assuming that a prolonged period of
- # silence means that the device has died and we should try it again
- if datetime.now() - job.last_log_time > max_idle_time:
- max_idle_time_min = max_idle_time.total_seconds() / 60
-
- raise MesaCITimeoutError(
- f"{CONSOLE_LOG['BOLD']}"
- f"{CONSOLE_LOG['FG_YELLOW']}"
- f"LAVA job {job.job_id} does not respond for {max_idle_time_min} "
- "minutes. Retry."
- f"{CONSOLE_LOG['RESET']}",
- timeout_duration=max_idle_time,
- )
-
-
-def parse_log_lines(job, log_follower, new_log_lines):
-
- if log_follower.feed(new_log_lines):
- # If we had non-empty log data, we can assure that the device is alive.
- job.heartbeat()
- parsed_lines = log_follower.flush()
-
- # Only parse job results when the script reaches the end of the logs.
- # Depending on how much payload the RPC scheduler.jobs.logs get, it may
- # reach the LAVA_POST_PROCESSING phase.
- if log_follower.current_section.type in (
- LogSectionType.TEST_CASE,
- LogSectionType.LAVA_POST_PROCESSING,
- ):
- parsed_lines = job.parse_job_result_from_log(parsed_lines)
- return parsed_lines
-
-
-def fetch_new_log_lines(job):
-
- # The XMLRPC binary packet may be corrupted, causing a YAML scanner error.
- # Retry the log fetching several times before exposing the error.
- for _ in range(5):
- with contextlib.suppress(MesaCIParseException):
- new_log_lines = job.get_logs()
- break
- else:
- raise MesaCIParseException
- return new_log_lines
-
-
-def submit_job(job):
- try:
- job.submit()
- except Exception as mesa_ci_err:
- raise MesaCIException(
- f"Could not submit LAVA job. Reason: {mesa_ci_err}"
- ) from mesa_ci_err
-
-
-def wait_for_job_get_started(job):
- print_log(f"Waiting for job {job.job_id} to start.")
- while not job.is_started():
- time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
- job.refresh_log()
- print_log(f"Job {job.job_id} started.")
-
-
-def bootstrap_log_follower() -> LogFollower:
- gl = GitlabSection(
- id="lava_boot",
- header="LAVA boot",
- type=LogSectionType.LAVA_BOOT,
- start_collapsed=True,
- )
- print(gl.start())
- return LogFollower(starting_section=gl)
-
-
-def follow_job_execution(job, log_follower):
- with log_follower:
- max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
- # Start to check job's health
- job.heartbeat()
- while not job.is_finished:
- fetch_logs(job, max_idle_time, log_follower)
- structural_log_phases(job, log_follower)
-
- # Mesa Developers expect to have a simple pass/fail job result.
- # If this does not happen, it probably means a LAVA infrastructure error
- # happened.
- if job.status not in ["pass", "fail"]:
- raise_lava_error(job)
-
- # LogFollower does some cleanup after the early exit (trigger by
- # `hwci: pass|fail` regex), let's update the phases after the cleanup.
- structural_log_phases(job, log_follower)
-
-
-def structural_log_phases(job, log_follower):
- phases: dict[str, Any] = {
- s.header.split(" - ")[0]: {
- k: str(getattr(s, k)) for k in ("start_time", "end_time")
- }
- for s in log_follower.section_history
- }
- job.log["dut_job_phases"] = phases
-
-
-def print_job_final_status(job):
- if job.status == "running":
- job.status = "hung"
-
- color = LAVAJob.COLOR_STATUS_MAP.get(job.status, CONSOLE_LOG["FG_RED"])
- print_log(
- f"{color}"
- f"LAVA Job finished with status: {job.status}"
- f"{CONSOLE_LOG['RESET']}"
- )
-
- job.refresh_log()
- show_final_job_data(job, colour=f"{CONSOLE_LOG['BOLD']}{color}")
-
-
-def execute_job_with_retries(
- proxy, job_definition, retry_count, jobs_log
-) -> Optional[LAVAJob]:
- last_failed_job = None
- for attempt_no in range(1, retry_count + 2):
- # Need to get the logger value from its object to enable autosave
- # features, if AutoSaveDict is enabled from StructuredLogging module
- jobs_log.append({})
- job_log = jobs_log[-1]
- job = LAVAJob(proxy, job_definition, job_log)
- STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no
- try:
- job_log["submitter_start_time"] = datetime.now().isoformat()
- submit_job(job)
- wait_for_job_get_started(job)
- log_follower: LogFollower = bootstrap_log_follower()
- follow_job_execution(job, log_follower)
- return job
-
- except (MesaCIException, KeyboardInterrupt) as exception:
- job.handle_exception(exception)
-
- finally:
- print_job_final_status(job)
- # If LAVA takes too long to post process the job, the submitter
- # gives up and proceeds.
- job_log["submitter_end_time"] = datetime.now().isoformat()
- last_failed_job = job
- print_log(
- f"{CONSOLE_LOG['BOLD']}"
- f"Finished executing LAVA job in the attempt #{attempt_no}"
- f"{CONSOLE_LOG['RESET']}"
- )
-
- return last_failed_job
-
-
-def retriable_follow_job(proxy, job_definition) -> LAVAJob:
- number_of_retries = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
-
- last_attempted_job = execute_job_with_retries(
- proxy, job_definition, number_of_retries, STRUCTURAL_LOG["dut_jobs"]
- )
-
- if last_attempted_job.exception is not None:
- # Infra failed in all attempts
- raise MesaCIRetryError(
- f"{CONSOLE_LOG['BOLD']}"
- f"{CONSOLE_LOG['FG_RED']}"
- "Job failed after it exceeded the number of "
- f"{number_of_retries} retries."
- f"{CONSOLE_LOG['RESET']}",
- retry_count=number_of_retries,
- last_job=last_attempted_job,
- )
-
- return last_attempted_job
-
-
-@dataclass
-class PathResolver:
- def __post_init__(self):
- for field in fields(self):
- value = getattr(self, field.name)
- if not value:
- continue
- if field.type == pathlib.Path:
- value = pathlib.Path(value)
- setattr(self, field.name, value.resolve())
-
-
-@dataclass
-class LAVAJobSubmitter(PathResolver):
- boot_method: str
- ci_project_dir: str
- device_type: str
- job_timeout_min: int # The job timeout in minutes
- build_url: str = None
- dtb_filename: str = None
- dump_yaml: bool = False # Whether to dump the YAML payload to stdout
- first_stage_init: str = None
- jwt_file: pathlib.Path = None
- kernel_image_name: str = None
- kernel_image_type: str = ""
- kernel_url_prefix: str = None
- lava_tags: str = "" # Comma-separated LAVA tags for the job
- mesa_job_name: str = "mesa_ci_job"
- pipeline_info: str = ""
- rootfs_url_prefix: str = None
- validate_only: bool = False # Whether to only validate the job, not execute it
- visibility_group: str = None # Only affects LAVA farm maintainers
- job_rootfs_overlay_url: str = None
- structured_log_file: pathlib.Path = None # Log file path with structured LAVA log
- ssh_client_image: str = None # x86_64 SSH client image to follow the job's output
- __structured_log_context = contextlib.nullcontext() # Structured Logger context
-
- def __post_init__(self) -> None:
- super().__post_init__()
- # Remove mesa job names with spaces, which breaks the lava-test-case command
- self.mesa_job_name = self.mesa_job_name.split(" ")[0]
-
- if not self.structured_log_file:
- return
-
- self.__structured_log_context = StructuredLoggerWrapper(self).logger_context()
- self.proxy = setup_lava_proxy()
-
- def __prepare_submission(self) -> str:
- # Overwrite the timeout for the testcases with the value offered by the
- # user. The testcase running time should be at least 4 times greater than
- # the other sections (boot and setup), so we can safely ignore them.
- # If LAVA fails to stop the job at this stage, it will fall back to the
- # script section timeout with a reasonable delay.
- GL_SECTION_TIMEOUTS[LogSectionType.TEST_CASE] = timedelta(
- minutes=self.job_timeout_min
- )
-
- job_definition = generate_lava_job_definition(self)
-
- if self.dump_yaml:
- self.dump_job_definition(job_definition)
-
- validation_job = LAVAJob(self.proxy, job_definition)
- if errors := validation_job.validate():
- fatal_err(f"Error in LAVA job definition: {errors}")
- print_log("LAVA job definition validated successfully")
-
- return job_definition
-
- @classmethod
- def is_under_ci(cls):
- ci_envvar: str = getenv("CI", "false")
- return ci_envvar.lower() == "true"
-
- def dump_job_definition(self, job_definition) -> None:
- with GitlabSection(
- "yaml_dump",
- "LAVA job definition (YAML)",
- type=LogSectionType.LAVA_BOOT,
- start_collapsed=True,
- ):
- print(hide_sensitive_data(job_definition))
-
- def submit(self) -> None:
- """
- Prepares and submits the LAVA job.
- If `validate_only` is True, it validates the job without submitting it.
- If the job finishes with a non-pass status or encounters an exception,
- the program exits with a non-zero return code.
- """
- job_definition: str = self.__prepare_submission()
-
- if self.validate_only:
- return
-
- with self.__structured_log_context:
- last_attempt_job = None
- try:
- last_attempt_job = retriable_follow_job(self.proxy, job_definition)
-
- except MesaCIRetryError as retry_exception:
- last_attempt_job = retry_exception.last_job
-
- except Exception as exception:
- STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception)
- raise exception
-
- finally:
- self.finish_script(last_attempt_job)
-
- def print_log_artifact_url(self):
- base_url = "https://$CI_PROJECT_ROOT_NAMESPACE.pages.freedesktop.org/"
- artifacts_path = "-/$CI_PROJECT_NAME/-/jobs/$CI_JOB_ID/artifacts/"
- relative_log_path = self.structured_log_file.relative_to(pathlib.Path.cwd())
- full_path = f"{base_url}{artifacts_path}{relative_log_path}"
- artifact_url = path.expandvars(full_path)
-
- print_log(f"Structural Logging data available at: {artifact_url}")
-
- def finish_script(self, last_attempt_job):
- if self.is_under_ci() and self.structured_log_file:
- self.print_log_artifact_url()
-
- if not last_attempt_job:
- # No job was run, something bad happened
- STRUCTURAL_LOG["job_combined_status"] = "script_crash"
- current_exception = str(sys.exc_info()[0])
- STRUCTURAL_LOG["job_combined_fail_reason"] = current_exception
- raise SystemExit(1)
-
- STRUCTURAL_LOG["job_combined_status"] = last_attempt_job.status
-
- if last_attempt_job.status != "pass":
- raise SystemExit(1)
-
-
-class StructuredLoggerWrapper:
- def __init__(self, submitter: LAVAJobSubmitter) -> None:
- self.__submitter: LAVAJobSubmitter = submitter
-
- def _init_logger(self):
- STRUCTURAL_LOG["fixed_tags"] = self.__submitter.lava_tags
- STRUCTURAL_LOG["dut_job_type"] = self.__submitter.device_type
- STRUCTURAL_LOG["job_combined_fail_reason"] = None
- STRUCTURAL_LOG["job_combined_status"] = "not_submitted"
- STRUCTURAL_LOG["dut_attempt_counter"] = 0
-
- # Initialize dut_jobs list to enable appends
- STRUCTURAL_LOG["dut_jobs"] = []
-
- @contextlib.contextmanager
- def _simple_logger_context(self):
- log_file = pathlib.Path(self.__submitter.structured_log_file)
- log_file.parent.mkdir(parents=True, exist_ok=True)
- try:
- # Truncate the file
- log_file.write_text("")
- yield
- finally:
- log_file.write_text(json.dumps(STRUCTURAL_LOG, indent=2))
-
- def logger_context(self):
- context = contextlib.nullcontext()
- try:
-
- global STRUCTURAL_LOG
- STRUCTURAL_LOG = StructuredLogger(
- self.__submitter.structured_log_file, truncate=True
- ).data
- except NameError:
- context = self._simple_logger_context()
-
- self._init_logger()
- return context
-
-
-if __name__ == "__main__":
- # given that we proxy from DUT -> LAVA dispatcher -> LAVA primary -> us ->
- # GitLab runner -> GitLab primary -> user, safe to say we don't need any
- # more buffering
- sys.stdout.reconfigure(line_buffering=True)
- sys.stderr.reconfigure(line_buffering=True)
- # LAVA farm is giving datetime in UTC timezone, let's set it locally for the
- # script run.
- # Setting environ here will not affect the system time, as the os.environ
- # lifetime follows the script one.
- environ["TZ"] = "UTC"
- time.tzset()
-
- fire.Fire(LAVAJobSubmitter)
diff --git a/.gitlab-ci/lava/requirements-test.txt b/.gitlab-ci/lava/requirements-test.txt
deleted file mode 100644
index 0ff561db901..00000000000
--- a/.gitlab-ci/lava/requirements-test.txt
+++ /dev/null
@@ -1,6 +0,0 @@
--r requirements.txt
-freezegun==1.1.0
-hypothesis==6.67.1
-pytest==7.2.1
-pytest-cov==3.0.0
-PyYAML==5.3.1
diff --git a/.gitlab-ci/lava/requirements.txt b/.gitlab-ci/lava/requirements.txt
deleted file mode 100644
index e89021f3fd5..00000000000
--- a/.gitlab-ci/lava/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-lavacli==1.5.2
-fire==0.5.0
diff --git a/.gitlab-ci/lava/utils/__init__.py b/.gitlab-ci/lava/utils/__init__.py
deleted file mode 100644
index 349d2b32561..00000000000
--- a/.gitlab-ci/lava/utils/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from .console_format import CONSOLE_LOG
-from .gitlab_section import GitlabSection
-from .lava_job import LAVAJob
-from .lava_job_definition import generate_lava_job_definition
-from .lava_proxy import call_proxy, setup_lava_proxy
-from .log_follower import (
- LogFollower,
- fatal_err,
- fix_lava_gitlab_section_log,
- hide_sensitive_data,
- print_log,
-)
-from .log_section import (
- DEFAULT_GITLAB_SECTION_TIMEOUTS,
- FALLBACK_GITLAB_SECTION_TIMEOUT,
- LogSection,
- LogSectionType,
-)
diff --git a/.gitlab-ci/lava/utils/console_format.py b/.gitlab-ci/lava/utils/console_format.py
deleted file mode 100644
index 3ad7600591b..00000000000
--- a/.gitlab-ci/lava/utils/console_format.py
+++ /dev/null
@@ -1,10 +0,0 @@
-CONSOLE_LOG = {
- "FG_GREEN": "\x1b[1;32;5;197m",
- "FG_RED": "\x1b[1;38;5;197m",
- "FG_YELLOW": "\x1b[1;33;5;197m",
- "FG_MAGENTA": "\x1b[1;35;5;197m",
- "RESET": "\x1b[0m",
- "UNDERLINED": "\x1b[3m",
- "BOLD": "\x1b[1m",
- "DIM": "\x1b[2m",
-}
diff --git a/.gitlab-ci/lava/utils/gitlab_section.py b/.gitlab-ci/lava/utils/gitlab_section.py
deleted file mode 100644
index 034afb4eb33..00000000000
--- a/.gitlab-ci/lava/utils/gitlab_section.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from __future__ import annotations
-
-import re
-from dataclasses import dataclass, field
-from datetime import datetime, timedelta
-from typing import TYPE_CHECKING, Optional
-
-from lava.utils.console_format import CONSOLE_LOG
-
-if TYPE_CHECKING:
- from lava.utils.log_section import LogSectionType
-
-
-# TODO: Add section final status to assist with monitoring
-@dataclass
-class GitlabSection:
- id: str
- header: str
- type: LogSectionType
- start_collapsed: bool = False
- escape: str = "\x1b[0K"
- colour: str = f"{CONSOLE_LOG['BOLD']}{CONSOLE_LOG['FG_GREEN']}"
- __start_time: Optional[datetime] = field(default=None, init=False)
- __end_time: Optional[datetime] = field(default=None, init=False)
-
- @classmethod
- def section_id_filter(cls, value) -> str:
- return str(re.sub(r"[^\w_-]+", "-", value))
-
- def __post_init__(self):
- self.id = self.section_id_filter(self.id)
-
- @property
- def has_started(self) -> bool:
- return self.__start_time is not None
-
- @property
- def has_finished(self) -> bool:
- return self.__end_time is not None
-
- @property
- def start_time(self) -> datetime:
- return self.__start_time
-
- @property
- def end_time(self) -> Optional[datetime]:
- return self.__end_time
-
- def get_timestamp(self, time: datetime) -> str:
- unix_ts = datetime.timestamp(time)
- return str(int(unix_ts))
-
- def section(self, marker: str, header: str, time: datetime) -> str:
- preamble = f"{self.escape}section_{marker}"
- collapse = marker == "start" and self.start_collapsed
- collapsed = "[collapsed=true]" if collapse else ""
- section_id = f"{self.id}{collapsed}"
-
- timestamp = self.get_timestamp(time)
- before_header = ":".join([preamble, timestamp, section_id])
- colored_header = f"{self.colour}{header}\x1b[0m" if header else ""
- header_wrapper = "\r" + f"{self.escape}{colored_header}"
-
- return f"{before_header}{header_wrapper}"
-
- def __str__(self) -> str:
- status = "NS" if not self.has_started else "F" if self.has_finished else "IP"
- delta = self.delta_time()
- elapsed_time = "N/A" if delta is None else str(delta)
- return (
- f"GitlabSection({self.id}, {self.header}, {self.type}, "
- f"SC={self.start_collapsed}, S={status}, ST={self.start_time}, "
- f"ET={self.end_time}, ET={elapsed_time})"
- )
-
- def __enter__(self):
- print(self.start())
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- print(self.end())
-
- def start(self) -> str:
- assert not self.has_finished, "Starting an already finished section"
- self.__start_time = datetime.now()
- return self.section(marker="start", header=self.header, time=self.__start_time)
-
- def end(self) -> str:
- assert self.has_started, "Ending an uninitialized section"
- self.__end_time = datetime.now()
- assert (
- self.__end_time >= self.__start_time
- ), "Section execution time will be negative"
- return self.section(marker="end", header="", time=self.__end_time)
-
- def delta_time(self) -> Optional[timedelta]:
- if self.__start_time and self.__end_time:
- return self.__end_time - self.__start_time
-
- if self.has_started:
- return datetime.now() - self.__start_time
-
- return None
diff --git a/.gitlab-ci/lava/utils/lava_farm.py b/.gitlab-ci/lava/utils/lava_farm.py
deleted file mode 100644
index dfd51ab9b92..00000000000
--- a/.gitlab-ci/lava/utils/lava_farm.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-import re
-from enum import Enum
-
-
-class LavaFarm(Enum):
- """Enum class representing the different LAVA farms."""
-
- LIMA = 1
- COLLABORA = 2
- UNKNOWN = 3
-
-
-LAVA_FARM_RUNNER_PATTERNS: dict[LavaFarm, str] = {
- # Lima pattern comes first, since it has the same prefix as the
- # Collabora pattern.
- LavaFarm.LIMA: r"^mesa-ci-[\x01-\x7F]+-lava-lima$",
- LavaFarm.COLLABORA: r"^mesa-ci-[\x01-\x7F]+-lava-[\x01-\x7F]+$",
- LavaFarm.UNKNOWN: r"^[\x01-\x7F]+",
-}
-
-
-def get_lava_farm() -> LavaFarm:
- """
- Returns the LAVA farm based on the RUNNER_TAG environment variable.
-
- :return: The LAVA farm
- """
- runner_tag: str = os.getenv("RUNNER_TAG", "unknown")
-
- for farm, pattern in LAVA_FARM_RUNNER_PATTERNS.items():
- if re.match(pattern, runner_tag):
- return farm
-
- raise ValueError(f"Unknown LAVA runner tag: {runner_tag}")
diff --git a/.gitlab-ci/lava/utils/lava_job.py b/.gitlab-ci/lava/utils/lava_job.py
deleted file mode 100644
index b69f8b9fbb7..00000000000
--- a/.gitlab-ci/lava/utils/lava_job.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import re
-import xmlrpc
-from collections import defaultdict
-from datetime import datetime
-from typing import Any, Optional
-
-from lava.exceptions import (
- MesaCIException,
- MesaCIKnownIssueException,
- MesaCIParseException,
- MesaCITimeoutError,
-)
-from lava.utils import CONSOLE_LOG
-from lava.utils.log_follower import print_log
-from lavacli.utils import flow_yaml as lava_yaml
-
-from .lava_proxy import call_proxy
-
-
-class LAVAJob:
- COLOR_STATUS_MAP: dict[str, str] = {
- "pass": CONSOLE_LOG["FG_GREEN"],
- "hung": CONSOLE_LOG["FG_YELLOW"],
- "fail": CONSOLE_LOG["FG_RED"],
- "canceled": CONSOLE_LOG["FG_MAGENTA"],
- }
-
- def __init__(self, proxy, definition, log=defaultdict(str)) -> None:
- self._job_id = None
- self.proxy = proxy
- self.definition = definition
- self.last_log_line = 0
- self.last_log_time = None
- self._is_finished = False
- self.log: dict[str, Any] = log
- self.status = "not_submitted"
- self.__exception: Optional[str] = None
-
- def heartbeat(self) -> None:
- self.last_log_time: datetime = datetime.now()
- self.status = "running"
-
- @property
- def status(self) -> str:
- return self._status
-
- @status.setter
- def status(self, new_status: str) -> None:
- self._status = new_status
- self.log["status"] = self._status
-
- @property
- def job_id(self) -> int:
- return self._job_id
-
- @job_id.setter
- def job_id(self, new_id: int) -> None:
- self._job_id = new_id
- self.log["lava_job_id"] = self._job_id
-
- @property
- def is_finished(self) -> bool:
- return self._is_finished
-
- @property
- def exception(self) -> str:
- return self.__exception
-
- @exception.setter
- def exception(self, exception: Exception) -> None:
- self.__exception = repr(exception)
- self.log["dut_job_fail_reason"] = self.__exception
-
- def validate(self) -> Optional[dict]:
- """Returns a dict with errors, if the validation fails.
-
- Returns:
- Optional[dict]: a dict with the validation errors, if any
- """
- return call_proxy(self.proxy.scheduler.jobs.validate, self.definition, True)
-
- def show(self) -> dict[str, str]:
- return call_proxy(self.proxy.scheduler.jobs.show, self._job_id)
-
- def get_lava_time(self, key, data) -> Optional[str]:
- return data[key].value if data[key] else None
-
- def refresh_log(self) -> None:
- details = self.show()
- self.log["dut_start_time"] = self.get_lava_time("start_time", details)
- self.log["dut_submit_time"] = self.get_lava_time("submit_time", details)
- self.log["dut_end_time"] = self.get_lava_time("end_time", details)
- self.log["dut_name"] = details.get("device")
- self.log["dut_state"] = details.get("state")
-
- def submit(self) -> bool:
- try:
- self.job_id = call_proxy(self.proxy.scheduler.jobs.submit, self.definition)
- self.status = "submitted"
- self.refresh_log()
- except MesaCIException:
- return False
- return True
-
- def lava_state(self) -> str:
- job_state: dict[str, str] = call_proxy(
- self.proxy.scheduler.job_state, self._job_id
- )
- return job_state["job_state"]
-
- def cancel(self):
- if self._job_id:
- self.proxy.scheduler.jobs.cancel(self._job_id)
- # If we don't have yet set another job's status, let's update it
- # with canceled one
- if self.status == "running":
- self.status = "canceled"
-
- def is_started(self) -> bool:
- waiting_states = ("Submitted", "Scheduling", "Scheduled")
- return self.lava_state() not in waiting_states
-
- def is_post_processed(self) -> bool:
- return self.lava_state() != "Running"
-
- def _load_log_from_data(self, data) -> list[str]:
- lines = []
- if isinstance(data, xmlrpc.client.Binary):
- # We are dealing with xmlrpc.client.Binary
- # Let's extract the data
- data = data.data
- # When there is no new log data, the YAML is empty
- if loaded_lines := lava_yaml.load(data):
- lines: list[str] = loaded_lines
- self.last_log_line += len(lines)
- return lines
-
- def get_logs(self) -> list[str]:
- try:
- (finished, data) = call_proxy(
- self.proxy.scheduler.jobs.logs, self._job_id, self.last_log_line
- )
- self._is_finished = finished
- return self._load_log_from_data(data)
-
- except Exception as mesa_ci_err:
- raise MesaCIParseException(
- f"Could not get LAVA job logs. Reason: {mesa_ci_err}"
- ) from mesa_ci_err
-
- def parse_job_result_from_log(
- self, lava_lines: list[dict[str, str]]
- ) -> list[dict[str, str]]:
- """Use the console log to catch if the job has completed successfully or
- not. Returns the list of log lines until the result line."""
-
- last_line = None # Print all lines. lines[:None] == lines[:]
-
- for idx, line in enumerate(lava_lines):
- if result := re.search(r"hwci: mesa: (pass|fail)", line):
- self._is_finished = True
- self.status = result[1]
-
- last_line = idx + 1
- # We reached the log end here. hwci script has finished.
- break
- return lava_lines[:last_line]
-
- def handle_exception(self, exception: Exception):
- print_log(exception)
- self.cancel()
- self.exception = exception
-
- # Give more accurate status depending on exception
- if isinstance(exception, MesaCIKnownIssueException):
- self.status = "canceled"
- elif isinstance(exception, MesaCITimeoutError):
- self.status = "hung"
- elif isinstance(exception, MesaCIException):
- self.status = "failed"
- elif isinstance(exception, KeyboardInterrupt):
- self.status = "interrupted"
- print_log("LAVA job submitter was interrupted. Cancelling the job.")
- raise
- else:
- self.status = "job_submitter_error"
diff --git a/.gitlab-ci/lava/utils/lava_job_definition.py b/.gitlab-ci/lava/utils/lava_job_definition.py
deleted file mode 100644
index c7b43658cb5..00000000000
--- a/.gitlab-ci/lava/utils/lava_job_definition.py
+++ /dev/null
@@ -1,150 +0,0 @@
-from io import StringIO
-from typing import TYPE_CHECKING, Any
-
-import re
-from lava.utils.lava_farm import LavaFarm, get_lava_farm
-from ruamel.yaml.scalarstring import LiteralScalarString
-from ruamel.yaml import YAML
-from os import getenv
-
-if TYPE_CHECKING:
- from lava.lava_job_submitter import LAVAJobSubmitter
-
-# How many attempts should be made when a timeout happen during LAVA device boot.
-NUMBER_OF_ATTEMPTS_LAVA_BOOT = int(getenv("LAVA_NUMBER_OF_ATTEMPTS_LAVA_BOOT", 3))
-
-# Supports any integers in [0, 100].
-# The scheduler considers the job priority when ordering the queue
-# to consider which job should run next.
-JOB_PRIORITY = int(getenv("JOB_PRIORITY", 75))
-
-
-def has_ssh_support(job_submitter: "LAVAJobSubmitter") -> bool:
- force_uart = bool(getenv("LAVA_FORCE_UART", False))
-
- if force_uart:
- return False
-
- # Only Collabora's farm supports to run docker container as a LAVA actions,
- # which is required to follow the job in a SSH section
- current_farm = get_lava_farm()
-
- # SSH job definition still needs to add support for fastboot.
- job_uses_fastboot: bool = job_submitter.boot_method == "fastboot"
-
- return current_farm == LavaFarm.COLLABORA and not job_uses_fastboot
-
-
-def generate_lava_yaml_payload(job_submitter: "LAVAJobSubmitter") -> dict[str, Any]:
- """
- Bridge function to use the supported job definition depending on some Mesa
- CI job characteristics.
-
- The strategy here, is to use LAVA with a containerized SSH session to follow
- the job output, escaping from dumping data to the UART, which proves to be
- error prone in some devices.
- """
- from lava.utils.ssh_job_definition import (
- generate_lava_yaml_payload as ssh_lava_yaml,
- )
- from lava.utils.uart_job_definition import (
- generate_lava_yaml_payload as uart_lava_yaml,
- )
-
- if has_ssh_support(job_submitter):
- return ssh_lava_yaml(job_submitter)
-
- return uart_lava_yaml(job_submitter)
-
-
-def generate_lava_job_definition(job_submitter: "LAVAJobSubmitter") -> str:
- job_stream = StringIO()
- yaml = YAML()
- yaml.width = 4096
- yaml.dump(generate_lava_yaml_payload(job_submitter), job_stream)
- return job_stream.getvalue()
-
-
-def to_yaml_block(steps_array: list[str], escape_vars=[]) -> LiteralScalarString:
- def escape_envvar(match):
- return "\\" + match.group(0)
-
- filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")]
- final_str = "\n".join(filtered_array)
-
- for escape_var in escape_vars:
- # Find env vars and add '\\' before them
- final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str)
- return LiteralScalarString(final_str)
-
-
-def generate_metadata(args) -> dict[str, Any]:
- # General metadata and permissions
- values = {
- "job_name": f"mesa: {args.pipeline_info}",
- "device_type": args.device_type,
- "visibility": {"group": [args.visibility_group]},
- "priority": JOB_PRIORITY,
- "context": {
- "extra_nfsroot_args": " init=/init rootwait usbcore.quirks=0bda:8153:k"
- },
- "timeouts": {
- "job": {"minutes": args.job_timeout_min},
- "actions": {
- "depthcharge-retry": {
- # Could take between 1 and 1.5 min in slower boots
- "minutes": 4
- },
- "depthcharge-start": {
- # Should take less than 1 min.
- "minutes": 1,
- },
- "depthcharge-action": {
- # This timeout englobes the entire depthcharge timing,
- # including retries
- "minutes": 5
- * NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- },
- },
- },
- }
-
- if args.lava_tags:
- values["tags"] = args.lava_tags.split(",")
-
- return values
-
-
-def artifact_download_steps(args):
- """
- This function is responsible for setting up the SSH server in the DUT and to
- export the first boot environment to a file.
- """
- # Putting JWT pre-processing and mesa download, within init-stage1.sh file,
- # as we do with non-SSH version.
- download_steps = [
- "set -ex",
- "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
- f"{args.job_rootfs_overlay_url} | tar -xz -C /",
- f"mkdir -p {args.ci_project_dir}",
- f"curl -L --retry 4 -f --retry-all-errors --retry-delay 60 {args.build_url} | "
- f"tar --zstd -x -C {args.ci_project_dir}",
- ]
-
- # If the JWT file is provided, we will use it to authenticate with the cloud
- # storage provider and will hide it from the job output in Gitlab.
- if args.jwt_file:
- with open(args.jwt_file) as jwt_file:
- download_steps += [
- "set +x # HIDE_START",
- f'echo -n "{jwt_file.read()}" > "{args.jwt_file}"',
- "set -x # HIDE_END",
- f'echo "export CI_JOB_JWT_FILE={args.jwt_file}" >> /set-job-env-vars.sh',
- ]
- else:
- download_steps += [
- "echo Could not find jwt file, disabling S3 requests...",
- "sed -i '/S3_RESULTS_UPLOAD/d' /set-job-env-vars.sh",
- ]
-
- return download_steps
diff --git a/.gitlab-ci/lava/utils/lava_log_hints.py b/.gitlab-ci/lava/utils/lava_log_hints.py
deleted file mode 100644
index b147a8747ea..00000000000
--- a/.gitlab-ci/lava/utils/lava_log_hints.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import annotations
-
-import re
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
- from lava.utils import LogFollower
-
-from lava.exceptions import MesaCIKnownIssueException
-from lava.utils.console_format import CONSOLE_LOG
-from lava.utils.log_section import LogSectionType
-
-
-@dataclass
-class LAVALogHints:
- log_follower: LogFollower
- has_r8152_issue_history: bool = field(default=False, init=False)
-
- def detect_failure(self, new_lines: list[dict[str, Any]]):
- for line in new_lines:
- self.detect_r8152_issue(line)
-
- def detect_r8152_issue(self, line):
- if (
- self.log_follower.phase == LogSectionType.TEST_CASE
- and line["lvl"] == "target"
- ):
- if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]):
- self.has_r8152_issue_history = True
- return
-
- if self.has_r8152_issue_history and re.search(
- r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
- line["msg"],
- ):
- raise MesaCIKnownIssueException(
- f"{CONSOLE_LOG['FG_MAGENTA']}"
- "Probable network issue failure encountered, retrying the job"
- f"{CONSOLE_LOG['RESET']}"
- )
-
- self.has_r8152_issue_history = False
diff --git a/.gitlab-ci/lava/utils/lava_proxy.py b/.gitlab-ci/lava/utils/lava_proxy.py
deleted file mode 100644
index 581ec46038e..00000000000
--- a/.gitlab-ci/lava/utils/lava_proxy.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import time
-import traceback
-import urllib
-import urllib.parse
-import xmlrpc
-import xmlrpc.client
-
-import lavacli
-
-from .log_follower import fatal_err, print_log
-
-
-def setup_lava_proxy():
- config = lavacli.load_config("default")
- uri, usr, tok = (config.get(key) for key in ("uri", "username", "token"))
- uri_obj = urllib.parse.urlparse(uri)
- uri_str = f"{uri_obj.scheme}://{usr}:{tok}@{uri_obj.netloc}{uri_obj.path}"
- transport = lavacli.RequestsTransport(
- uri_obj.scheme,
- config.get("proxy"),
- config.get("timeout", 120.0),
- config.get("verify_ssl_cert", True),
- )
- proxy = xmlrpc.client.ServerProxy(uri_str, allow_none=True, transport=transport)
-
- print_log(f'Proxy for {config["uri"]} created.')
-
- return proxy
-
-
-def call_proxy(fn, *args):
- retries = 60
- for n in range(1, retries + 1):
- try:
- return fn(*args)
- except xmlrpc.client.ProtocolError as err:
- if n == retries:
- traceback.print_exc()
- fatal_err(f"A protocol error occurred (Err {err.errcode} {err.errmsg})")
- else:
- time.sleep(15)
- except xmlrpc.client.Fault as err:
- traceback.print_exc()
- fatal_err(f"FATAL: Fault: {err.faultString} (code: {err.faultCode})", err)
diff --git a/.gitlab-ci/lava/utils/log_follower.py b/.gitlab-ci/lava/utils/log_follower.py
deleted file mode 100644
index 1fdf490bcb8..00000000000
--- a/.gitlab-ci/lava/utils/log_follower.py
+++ /dev/null
@@ -1,310 +0,0 @@
-#!/usr/bin/env python3
-#
-# Copyright (C) 2022 Collabora Limited
-# Author: Guilherme Gallo <guilherme.gallo@collabora.com>
-#
-# SPDX-License-Identifier: MIT
-
-"""
-Some utilities to analyse logs, create gitlab sections and other quality of life
-improvements
-"""
-
-import logging
-import re
-import sys
-from dataclasses import dataclass, field
-from datetime import datetime, timedelta
-from typing import Optional, Union
-
-from lava.exceptions import MesaCITimeoutError
-from lava.utils.console_format import CONSOLE_LOG
-from lava.utils.gitlab_section import GitlabSection
-from lava.utils.lava_farm import LavaFarm, get_lava_farm
-from lava.utils.lava_log_hints import LAVALogHints
-from lava.utils.log_section import (
- DEFAULT_GITLAB_SECTION_TIMEOUTS,
- FALLBACK_GITLAB_SECTION_TIMEOUT,
- LOG_SECTIONS,
- LogSectionType,
-)
-
-
-@dataclass
-class LogFollower:
- starting_section: Optional[GitlabSection] = None
- _current_section: Optional[GitlabSection] = None
- section_history: list[GitlabSection] = field(default_factory=list, init=False)
- timeout_durations: dict[LogSectionType, timedelta] = field(
- default_factory=lambda: DEFAULT_GITLAB_SECTION_TIMEOUTS,
- )
- fallback_timeout: timedelta = FALLBACK_GITLAB_SECTION_TIMEOUT
- _buffer: list[str] = field(default_factory=list, init=False)
- log_hints: LAVALogHints = field(init=False)
- lava_farm: LavaFarm = field(init=False, default=get_lava_farm())
- _merge_next_line: str = field(default_factory=str, init=False)
-
- def __post_init__(self):
- # Make it trigger current_section setter to populate section history
- self.current_section = self.starting_section
- section_is_created = bool(self._current_section)
- section_has_started = bool(
- self._current_section and self._current_section.has_started
- )
- self.log_hints = LAVALogHints(self)
- assert (
- section_is_created == section_has_started
- ), "Can't follow logs beginning from uninitialized GitLab sections."
-
- # Initialize fix_lava_gitlab_section_log generator
- self.gl_section_fix_gen = fix_lava_gitlab_section_log()
- next(self.gl_section_fix_gen)
-
- @property
- def current_section(self):
- return self._current_section
-
- @current_section.setter
- def current_section(self, new_section: GitlabSection) -> None:
- if old_section := self._current_section:
- self.section_history.append(old_section)
- self._current_section = new_section
-
- @property
- def phase(self) -> LogSectionType:
- return (
- self._current_section.type
- if self._current_section
- else LogSectionType.UNKNOWN
- )
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- """Cleanup existing buffer if this object gets out from the context"""
- self.clear_current_section()
- last_lines = self.flush()
- for line in last_lines:
- print(line)
-
- def watchdog(self):
- if not self._current_section:
- return
-
- timeout_duration = self.timeout_durations.get(
- self._current_section.type, self.fallback_timeout
- )
-
- if self._current_section.delta_time() > timeout_duration:
- raise MesaCITimeoutError(
- f"Gitlab Section {self._current_section} has timed out",
- timeout_duration=timeout_duration,
- )
-
- def clear_current_section(self):
- if self._current_section and not self._current_section.has_finished:
- self._buffer.append(self._current_section.end())
- self.current_section = None
-
- def update_section(self, new_section: GitlabSection):
- # Sections can have redundant regex to find them to mitigate LAVA
- # interleaving kmsg and stderr/stdout issue.
- if self.current_section and self.current_section.id == new_section.id:
- return
- self.clear_current_section()
- self.current_section = new_section
- self._buffer.append(new_section.start())
-
- def manage_gl_sections(self, line):
- if isinstance(line["msg"], list):
- logging.debug("Ignoring messages as list. Kernel dumps.")
- return
-
- for log_section in LOG_SECTIONS:
- if new_section := log_section.from_log_line_to_section(line):
- self.update_section(new_section)
- break
-
- def detect_kernel_dump_line(self, line: dict[str, Union[str, list]]) -> bool:
- # line["msg"] can be a list[str] when there is a kernel dump
- if isinstance(line["msg"], list):
- return line["lvl"] == "debug"
-
- # result level has dict line["msg"]
- if not isinstance(line["msg"], str):
- return False
-
- # we have a line, check if it is a kernel message
- if re.search(r"\[[\d\s]{5}\.[\d\s]{6}\] +\S{2,}", line["msg"]):
- print_log(f"{CONSOLE_LOG['BOLD']}{line['msg']}{CONSOLE_LOG['RESET']}")
- return True
-
- return False
-
- def remove_trailing_whitespace(self, line: dict[str, str]) -> None:
- """
- Removes trailing whitespace from the end of the `msg` value in the log line dictionary.
-
- Args:
- line: A dictionary representing a single log line.
-
- Note:
- LAVA treats carriage return characters as a line break, so each carriage return in an output console
- is mapped to a console line in LAVA. This method removes trailing `\r\n` characters from log lines.
- """
- msg: Optional[str] = line.get("msg")
- if not msg:
- return False
-
- messages = [msg] if isinstance(msg, str) else msg
-
- for message in messages:
- # LAVA logs brings raw messages, which includes newlines characters as \r\n.
- line["msg"]: str = re.sub(r"\r\n$", "", message)
-
- def merge_carriage_return_lines(self, line: dict[str, str]) -> bool:
- """
- Merges lines that end with a carriage return character into a single line.
-
- Args:
- line: A dictionary representing a single log line.
-
- Returns:
- A boolean indicating whether the current line has been merged with the next line.
-
- Note:
- LAVA treats carriage return characters as a line break, so each carriage return in an output console
- is mapped to a console line in LAVA.
- """
- if line["msg"].endswith("\r"):
- self._merge_next_line += line["msg"]
- return True
-
- if self._merge_next_line:
- line["msg"] = self._merge_next_line + line["msg"]
- self._merge_next_line = ""
-
- return False
-
-
- def feed(self, new_lines: list[dict[str, str]]) -> bool:
- """Input data to be processed by LogFollower instance
- Returns true if the DUT (device under test) seems to be alive.
- """
-
- self.watchdog()
-
- # No signal of job health in the log
- is_job_healthy = False
-
- for line in new_lines:
- self.remove_trailing_whitespace(line)
-
- if self.detect_kernel_dump_line(line):
- continue
-
- if self.merge_carriage_return_lines(line):
- continue
-
- # At least we are fed with a non-kernel dump log, it seems that the
- # job is progressing
- is_job_healthy = True
- self.manage_gl_sections(line)
- if parsed_line := self.parse_lava_line(line):
- self._buffer.append(parsed_line)
-
- self.log_hints.detect_failure(new_lines)
-
- return is_job_healthy
-
- def flush(self) -> list[str]:
- buffer = self._buffer
- self._buffer = []
- return buffer
-
- def parse_lava_line(self, line) -> Optional[str]:
- prefix = ""
- suffix = ""
-
- if line["lvl"] in ["results", "feedback", "debug"]:
- return
- elif line["lvl"] in ["warning", "error"]:
- prefix = CONSOLE_LOG["FG_RED"]
- suffix = CONSOLE_LOG["RESET"]
- elif line["lvl"] == "input":
- prefix = "$ "
- suffix = ""
- elif line["lvl"] == "target" and self.lava_farm != LavaFarm.COLLABORA:
- # gl_section_fix_gen will output the stored line if it can't find a
- # match for the first split line
- # So we can recover it and put it back to the buffer
- if recovered_first_line := self.gl_section_fix_gen.send(line):
- self._buffer.append(recovered_first_line)
-
- return f'{prefix}{line["msg"]}{suffix}'
-
-def fix_lava_gitlab_section_log():
- """This function is a temporary solution for the Gitlab section markers
- splitting problem. Gitlab parses the following lines to define a collapsible
- gitlab section in their log:
- - \x1b[0Ksection_start:timestamp:section_id[collapsible=true/false]\r\x1b[0Ksection_header
- - \x1b[0Ksection_end:timestamp:section_id\r\x1b[0K
- There is some problem in message passing between the LAVA dispatcher and the
- device under test (DUT), that replaces \r control characters into \n. When
- this problem is fixed on the LAVA side, one should remove this function.
- """
- while True:
- line = yield False
- first_line = None
- split_line_pattern = re.compile(r"\x1b\[0K(section_\w+):(\d+):([^\s\r]+)$")
- second_line_pattern = re.compile(r"\x1b\[0K([\S ]+)?")
-
- if not re.search(split_line_pattern, line["msg"]):
- continue
-
- first_line = line["msg"]
- # Delete the current line and hold this log line stream to be able to
- # possibly merge it with the next line.
- line["msg"] = ""
- line = yield False
-
- # This code reached when we detect a possible first split line
- if re.search(second_line_pattern, line["msg"]):
- assert first_line
- line["msg"] = f"{first_line}\r{line['msg']}"
- else:
- # The current line doesn't match with the previous one, send back the
- # latter to give the user the chance to recover it.
- yield first_line
-
-
-
-def print_log(msg: str, *args) -> None:
- # Reset color from timestamp, since `msg` can tint the terminal color
- print(f"{CONSOLE_LOG['RESET']}{datetime.now()}: {msg}", *args)
-
-
-def fatal_err(msg, exception=None):
- colored_msg = f"{CONSOLE_LOG['FG_RED']}"
- print_log(colored_msg, f"{msg}", f"{CONSOLE_LOG['RESET']}")
- if exception:
- raise exception
- sys.exit(1)
-
-
-def hide_sensitive_data(yaml_data: str, start_hide: str = "HIDE_START", end_hide: str = "HIDE_END") -> str:
- skip_line = False
- dump_data: list[str] = []
- for line in yaml_data.splitlines(True):
- if start_hide in line:
- skip_line = True
- elif end_hide in line:
- skip_line = False
-
- if skip_line:
- continue
-
- dump_data.append(line)
-
- return "".join(dump_data)
diff --git a/.gitlab-ci/lava/utils/log_section.py b/.gitlab-ci/lava/utils/log_section.py
deleted file mode 100644
index 25620a6155b..00000000000
--- a/.gitlab-ci/lava/utils/log_section.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import re
-from dataclasses import dataclass
-from datetime import timedelta
-from enum import Enum, auto
-from os import getenv
-from typing import Optional, Pattern, Union
-
-from lava.utils.gitlab_section import GitlabSection
-
-
-class LogSectionType(Enum):
- UNKNOWN = auto()
- LAVA_BOOT = auto()
- TEST_DUT_SUITE = auto()
- TEST_SUITE = auto()
- TEST_CASE = auto()
- LAVA_POST_PROCESSING = auto()
-
-
-# Empirically, successful device boot in LAVA time takes less than 3
-# minutes.
-# LAVA itself is configured to attempt thrice to boot the device,
-# summing up to 9 minutes.
-# It is better to retry the boot than cancel the job and re-submit to avoid
-# the enqueue delay.
-LAVA_BOOT_TIMEOUT = int(getenv("LAVA_BOOT_TIMEOUT", 9))
-
-# Test DUT suite phase is where the initialization happens in DUT, not on docker.
-# The device will be listening to SSH session until the end of the job.
-LAVA_TEST_DUT_SUITE_TIMEOUT = int(getenv("JOB_TIMEOUT", 60))
-
-# Test suite phase is where the initialization happens on docker.
-LAVA_TEST_SUITE_TIMEOUT = int(getenv("LAVA_TEST_SUITE_TIMEOUT", 5))
-
-# Test cases may take a long time, this script has no right to interrupt
-# them. But if the test case takes almost 1h, it will never succeed due to
-# Gitlab job timeout.
-LAVA_TEST_CASE_TIMEOUT = int(getenv("JOB_TIMEOUT", 60))
-
-# LAVA post processing may refer to a test suite teardown, or the
-# adjustments to start the next test_case
-LAVA_POST_PROCESSING_TIMEOUT = int(getenv("LAVA_POST_PROCESSING_TIMEOUT", 5))
-
-FALLBACK_GITLAB_SECTION_TIMEOUT = timedelta(minutes=10)
-DEFAULT_GITLAB_SECTION_TIMEOUTS = {
- LogSectionType.LAVA_BOOT: timedelta(minutes=LAVA_BOOT_TIMEOUT),
- LogSectionType.TEST_DUT_SUITE: timedelta(minutes=LAVA_TEST_DUT_SUITE_TIMEOUT),
- LogSectionType.TEST_SUITE: timedelta(minutes=LAVA_TEST_SUITE_TIMEOUT),
- LogSectionType.TEST_CASE: timedelta(minutes=LAVA_TEST_CASE_TIMEOUT),
- LogSectionType.LAVA_POST_PROCESSING: timedelta(
- minutes=LAVA_POST_PROCESSING_TIMEOUT
- ),
-}
-
-
-@dataclass(frozen=True)
-class LogSection:
- regex: Union[Pattern, str]
- levels: tuple[str]
- section_id: str
- section_header: str
- section_type: LogSectionType
- collapsed: bool = False
-
- def from_log_line_to_section(
- self, lava_log_line: dict[str, str]
- ) -> Optional[GitlabSection]:
- if lava_log_line["lvl"] not in self.levels:
- return
-
- if match := re.search(self.regex, lava_log_line["msg"]):
- section_id = self.section_id.format(*match.groups())
- section_header = self.section_header.format(*match.groups())
- timeout = DEFAULT_GITLAB_SECTION_TIMEOUTS[self.section_type]
- return GitlabSection(
- id=section_id,
- header=f"{section_header} - Timeout: {timeout}",
- type=self.section_type,
- start_collapsed=self.collapsed,
- )
-
-
-LOG_SECTIONS = (
- LogSection(
- regex=re.compile(r"<?STARTTC>? ([^>]*)"),
- levels=("target", "debug"),
- section_id="{}",
- section_header="test_case {}",
- section_type=LogSectionType.TEST_CASE,
- ),
- LogSection(
- regex=re.compile(r"<?STARTRUN>? ([^>]*ssh.*server.*)"),
- levels=("debug"),
- section_id="{}",
- section_header="[dut] test_suite {}",
- section_type=LogSectionType.TEST_DUT_SUITE,
- ),
- LogSection(
- regex=re.compile(r"<?STARTRUN>? ([^>]*)"),
- levels=("debug"),
- section_id="{}",
- section_header="[docker] test_suite {}",
- section_type=LogSectionType.TEST_SUITE,
- ),
- LogSection(
- regex=re.compile(r"ENDTC>? ([^>]+)"),
- levels=("target", "debug"),
- section_id="post-{}",
- section_header="Post test_case {}",
- collapsed=True,
- section_type=LogSectionType.LAVA_POST_PROCESSING,
- ),
-)
diff --git a/.gitlab-ci/lava/utils/ssh_job_definition.py b/.gitlab-ci/lava/utils/ssh_job_definition.py
deleted file mode 100644
index 1308e5ca92a..00000000000
--- a/.gitlab-ci/lava/utils/ssh_job_definition.py
+++ /dev/null
@@ -1,208 +0,0 @@
-"""
-In a few words: some devices in Mesa CI has problematic serial connection, they
-may hang (become silent) intermittently. Every time it hangs for minutes, the
-job is retried, causing delays in the overall pipeline executing, ultimately
-blocking legit MRs to merge.
-
-To reduce reliance on UART, we explored LAVA features, such as running docker
-containers as a test alongside the DUT one, to be able to create an SSH server
-in the DUT the earliest possible and an SSH client in a docker container, to
-establish a SSH session between both, allowing the console output to be passed
-via SSH pseudo terminal, instead of relying in the error-prone UART.
-
-In more detail, we aim to use "export -p" to share the initial boot environment
-with SSH LAVA test-cases.
-The "init-stage1.sh" script handles tasks such as system mounting and network
-setup, which are necessary for allocating a pseudo-terminal under "/dev/pts".
-Although these chores are not required for establishing an SSH session, they are
-essential for proper functionality to the target script given by HWCI_SCRIPT
-environment variable.
-
-Therefore, we have divided the job definition into four parts:
-
-1. [DUT] Logging in to DUT and run the SSH server with root access.
-2. [DUT] Running the "init-stage1.sh" script for the first SSH test case.
-3. [DUT] Export the first boot environment to `/dut-env-vars.sh` file.
-4. [SSH] Enabling the pseudo-terminal for colors and running the "init-stage2.sh"
-script after sourcing "dut-env-vars.sh" again for the second SSH test case.
-"""
-
-
-from pathlib import Path
-from typing import Any
-
-from .lava_job_definition import (
- NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- artifact_download_steps,
- generate_metadata,
- to_yaml_block,
-)
-
-# Very early SSH server setup. Uses /dut_ready file to flag it is done.
-SSH_SERVER_COMMANDS = {
- "auto_login": {
- "login_commands": [
- "dropbear -R -B",
- "touch /dut_ready",
- ],
- "login_prompt": "ogin:",
- # To login as root, the username should be empty
- "username": "",
- }
-}
-
-# TODO: Extract this inline script to a shell file, like we do with
-# init-stage[12].sh
-# The current way is difficult to maintain because one has to deal with escaping
-# characters for both Python and the resulting job definition YAML.
-# Plus, it always good to lint bash scripts with shellcheck.
-DOCKER_COMMANDS = [
- """set -ex
-timeout 1m bash << EOF
-while [ -z "$(lava-target-ip)" ]; do
- echo Waiting for DUT to join LAN;
- sleep 1;
-done
-EOF
-
-ping -c 5 -w 60 $(lava-target-ip)
-
-lava_ssh_test_case() {
- set -x
- local test_case="${1}"
- shift
- lava-test-case \"${test_case}\" --shell \\
- ssh ${SSH_PTY_ARGS:--T} \\
- -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\
- root@$(lava-target-ip) \"${@}\"
-}""",
-]
-
-
-def generate_dut_test(args):
- # Commands executed on DUT.
- # Trying to execute the minimal number of commands, because the console data is
- # retrieved via UART, which is hang-prone in some devices.
-
- first_stage_steps: list[str] = Path(args.first_stage_init).read_text().splitlines()
- return {
- "namespace": "dut",
- "definitions": [
- {
- "from": "inline",
- "name": "setup-ssh-server",
- "path": "inline-setup-ssh-server",
- "repository": {
- "metadata": {
- "format": "Lava-Test Test Definition 1.0",
- "name": "dut-env-export",
- },
- "run": {
- "steps": [
- to_yaml_block(first_stage_steps),
- "export -p > /dut-env-vars.sh", # Exporting the first boot environment
- ],
- },
- },
- }
- ],
- }
-
-
-def generate_docker_test(args):
- # This is a growing list of commands that will be executed by the docker
- # guest, which will be the SSH client.
- docker_commands = []
-
- # LAVA test wrapping Mesa CI job in a SSH session.
- init_stages_test = {
- "namespace": "container",
- "timeout": {"minutes": args.job_timeout_min},
- "failure_retry": 3,
- "definitions": [
- {
- "name": "docker_ssh_client",
- "from": "inline",
- "path": "inline/docker_ssh_client.yaml",
- "repository": {
- "metadata": {
- "name": "mesa",
- "description": "Mesa test plan",
- "format": "Lava-Test Test Definition 1.0",
- },
- "run": {"steps": docker_commands},
- },
- }
- ],
- "docker": {
- "image": args.ssh_client_image,
- },
- }
-
- docker_commands += [
- to_yaml_block(DOCKER_COMMANDS, escape_vars=["LAVA_TARGET_IP"]),
- "lava_ssh_test_case 'wait_for_dut_login' << EOF",
- "while [ ! -e /dut_ready ]; do sleep 1; done;",
- "EOF",
- to_yaml_block(
- (
- "lava_ssh_test_case 'artifact_download' 'bash --' << EOF",
- "source /dut-env-vars.sh",
- *artifact_download_steps(args),
- "EOF",
- )
- ),
- "export SSH_PTY_ARGS=-tt",
- # Putting CI_JOB name as the testcase name, it may help LAVA farm
- # maintainers with monitoring
- f"lava_ssh_test_case 'mesa-ci_{args.mesa_job_name}' "
- # Changing directory to /, as the HWCI_SCRIPT expects that
- "'\"cd / && /init-stage2.sh\"'",
- ]
-
- return init_stages_test
-
-
-def generate_lava_yaml_payload(args) -> dict[str, Any]:
- values = generate_metadata(args)
-
- # URLs to our kernel rootfs to boot from, both generated by the base
- # container build
- deploy = {
- "namespace": "dut",
- "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- "timeout": {"minutes": 10},
- "timeouts": {"http-download": {"minutes": 2}},
- "to": "tftp",
- "os": "oe",
- "kernel": {"url": f"{args.kernel_url_prefix}/{args.kernel_image_name}"},
- "nfsrootfs": {
- "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
- "compression": "zstd",
- },
- }
- if args.kernel_image_type:
- deploy["kernel"]["type"] = args.kernel_image_type
- if args.dtb_filename:
- deploy["dtb"] = {"url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"}
-
- # always boot over NFS
- boot = {
- "namespace": "dut",
- "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- "method": args.boot_method,
- "commands": "nfs",
- "prompts": ["lava-shell:"],
- **SSH_SERVER_COMMANDS,
- }
-
- # only declaring each job as a single 'test' since LAVA's test parsing is
- # not useful to us
- values["actions"] = [
- {"deploy": deploy},
- {"boot": boot},
- {"test": generate_dut_test(args)},
- {"test": generate_docker_test(args)},
- ]
-
- return values
diff --git a/.gitlab-ci/lava/utils/uart_job_definition.py b/.gitlab-ci/lava/utils/uart_job_definition.py
deleted file mode 100644
index cd239c3215f..00000000000
--- a/.gitlab-ci/lava/utils/uart_job_definition.py
+++ /dev/null
@@ -1,171 +0,0 @@
-from typing import Any
-from .lava_job_definition import (
- generate_metadata,
- NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- artifact_download_steps,
-)
-
-
-def generate_lava_yaml_payload(args) -> dict[str, Any]:
- values = generate_metadata(args)
-
- # URLs to our kernel rootfs to boot from, both generated by the base
- # container build
-
- nfsrootfs = {
- "url": f"{args.rootfs_url_prefix}/lava-rootfs.tar.zst",
- "compression": "zstd",
- }
-
- fastboot_deploy_nfs = {
- "timeout": {"minutes": 10},
- "to": "nfs",
- "nfsrootfs": nfsrootfs,
- }
-
- fastboot_deploy_prepare = {
- "timeout": {"minutes": 5},
- "to": "downloads",
- "os": "oe",
- "images": {
- "kernel": {
- "url": f"{args.kernel_url_prefix}/{args.kernel_image_name}",
- },
- },
- "postprocess": {
- "docker": {
- "image": "registry.gitlab.collabora.com/lava/health-check-docker",
- "steps": [
- f"cat Image.gz {args.dtb_filename}.dtb > Image.gz+dtb",
- "mkbootimg --kernel Image.gz+dtb"
- + ' --cmdline "root=/dev/nfs rw nfsroot=$NFS_SERVER_IP:$NFS_ROOTFS,tcp,hard rootwait ip=dhcp init=/init"'
- + " --pagesize 4096 --base 0x80000000 -o boot.img",
- ],
- }
- },
- }
- if args.kernel_image_type:
- fastboot_deploy_prepare["images"]["kernel"]["type"] = args.kernel_image_type
- if args.dtb_filename:
- fastboot_deploy_prepare["images"]["dtb"] = {
- "url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"
- }
-
- tftp_deploy = {
- "timeout": {"minutes": 5},
- "to": "tftp",
- "os": "oe",
- "kernel": {
- "url": f"{args.kernel_url_prefix}/{args.kernel_image_name}",
- },
- "nfsrootfs": nfsrootfs,
- }
- if args.kernel_image_type:
- tftp_deploy["kernel"]["type"] = args.kernel_image_type
- if args.dtb_filename:
- tftp_deploy["dtb"] = {
- "url": f"{args.kernel_url_prefix}/{args.dtb_filename}.dtb"
- }
-
- fastboot_deploy = {
- "timeout": {"minutes": 2},
- "to": "fastboot",
- "docker": {
- "image": "registry.gitlab.collabora.com/lava/health-check-docker",
- },
- "images": {
- "boot": {"url": "downloads://boot.img"},
- },
- }
-
- fastboot_boot = {
- "timeout": {"minutes": 2},
- "docker": {"image": "registry.gitlab.collabora.com/lava/health-check-docker"},
- "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- "method": args.boot_method,
- "prompts": ["lava-shell:"],
- "commands": ["set_active a"],
- }
-
- tftp_boot = {
- "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT,
- "method": args.boot_method,
- "prompts": ["lava-shell:"],
- "commands": "nfs",
- }
-
- # skeleton test definition: only declaring each job as a single 'test'
- # since LAVA's test parsing is not useful to us
- run_steps = []
- test = {
- "timeout": {"minutes": args.job_timeout_min},
- "failure_retry": 1,
- "definitions": [
- {
- "name": "mesa",
- "from": "inline",
- "lava-signal": "kmsg",
- "path": "inline/mesa.yaml",
- "repository": {
- "metadata": {
- "name": "mesa",
- "description": "Mesa test plan",
- "os": ["oe"],
- "scope": ["functional"],
- "format": "Lava-Test Test Definition 1.0",
- },
- "run": {"steps": run_steps},
- },
- }
- ],
- }
-
- # job execution script:
- # - inline .gitlab-ci/common/init-stage1.sh
- # - fetch and unpack per-pipeline build artifacts from build job
- # - fetch and unpack per-job environment from lava-submit.sh
- # - exec .gitlab-ci/common/init-stage2.sh
-
- with open(args.first_stage_init, "r") as init_sh:
- run_steps += [
- x.rstrip() for x in init_sh if not x.startswith("#") and x.rstrip()
- ]
- # We cannot distribute the Adreno 660 shader firmware inside rootfs,
- # since the license isn't bundled inside the repository
- if args.device_type == "sm8350-hdk":
- run_steps.append(
- "curl -L --retry 4 -f --retry-all-errors --retry-delay 60 "
- + "https://github.com/allahjasif1990/hdk888-firmware/raw/main/a660_zap.mbn "
- + '-o "/lib/firmware/qcom/sm8350/a660_zap.mbn"'
- )
-
- run_steps += artifact_download_steps(args)
-
- run_steps += [
- f"mkdir -p {args.ci_project_dir}",
- f"curl {args.build_url} | tar --zstd -x -C {args.ci_project_dir}",
- # Sleep a bit to give time for bash to dump shell xtrace messages into
- # console which may cause interleaving with LAVA_SIGNAL_STARTTC in some
- # devices like a618.
- "sleep 1",
- # Putting CI_JOB name as the testcase name, it may help LAVA farm
- # maintainers with monitoring
- f"lava-test-case 'mesa-ci_{args.mesa_job_name}' --shell /init-stage2.sh",
- ]
-
- if args.boot_method == "fastboot":
- values["actions"] = [
- {"deploy": fastboot_deploy_nfs},
- {"deploy": fastboot_deploy_prepare},
- {"deploy": fastboot_deploy},
- {"boot": fastboot_boot},
- {"test": test},
- ]
- else: # tftp
- values["actions"] = [
- {"deploy": tftp_deploy},
- {"boot": tftp_boot},
- {"test": test},
- ]
-
- return values