summaryrefslogtreecommitdiff
path: root/.gitlab-ci/lava/lava_job_submitter.py
diff options
context:
space:
mode:
Diffstat (limited to '.gitlab-ci/lava/lava_job_submitter.py')
-rwxr-xr-x.gitlab-ci/lava/lava_job_submitter.py127
1 files changed, 91 insertions, 36 deletions
diff --git a/.gitlab-ci/lava/lava_job_submitter.py b/.gitlab-ci/lava/lava_job_submitter.py
index 39d07deac40..064219de782 100755
--- a/.gitlab-ci/lava/lava_job_submitter.py
+++ b/.gitlab-ci/lava/lava_job_submitter.py
@@ -18,7 +18,7 @@ from collections import defaultdict
from dataclasses import dataclass, fields
from datetime import datetime, timedelta
from io import StringIO
-from os import getenv
+from os import environ, getenv, path
from typing import Any, Optional
import fire
@@ -282,6 +282,7 @@ def print_job_final_status(job):
def execute_job_with_retries(
proxy, job_definition, retry_count, jobs_log
) -> Optional[LAVAJob]:
+ last_failed_job = None
for attempt_no in range(1, retry_count + 2):
# Need to get the logger value from its object to enable autosave
# features, if AutoSaveDict is enabled from StructuredLogging module
@@ -290,41 +291,52 @@ def execute_job_with_retries(
job = LAVAJob(proxy, job_definition, job_log)
STRUCTURAL_LOG["dut_attempt_counter"] = attempt_no
try:
+ job_log["submitter_start_time"] = datetime.now().isoformat()
submit_job(job)
wait_for_job_get_started(job)
log_follower: LogFollower = bootstrap_log_follower()
follow_job_execution(job, log_follower)
return job
+
except (MesaCIException, KeyboardInterrupt) as exception:
job.handle_exception(exception)
+
+ finally:
+ print_job_final_status(job)
+ # If LAVA takes too long to post process the job, the submitter
+ # gives up and proceeds.
+ job_log["submitter_end_time"] = datetime.now().isoformat()
+ last_failed_job = job
print_log(
f"{CONSOLE_LOG['BOLD']}"
f"Finished executing LAVA job in the attempt #{attempt_no}"
f"{CONSOLE_LOG['RESET']}"
)
- finally:
- job_log["finished_time"] = datetime.now().isoformat()
- print_job_final_status(job)
+
+ return last_failed_job
def retriable_follow_job(proxy, job_definition) -> LAVAJob:
number_of_retries = NUMBER_OF_RETRIES_TIMEOUT_DETECTION
- if finished_job := execute_job_with_retries(
+ last_attempted_job = execute_job_with_retries(
proxy, job_definition, number_of_retries, STRUCTURAL_LOG["dut_jobs"]
- ):
- return finished_job
-
- # Job failed in all attempts
- raise MesaCIRetryError(
- f"{CONSOLE_LOG['BOLD']}"
- f"{CONSOLE_LOG['FG_RED']}"
- "Job failed after it exceeded the number of "
- f"{number_of_retries} retries."
- f"{CONSOLE_LOG['RESET']}",
- retry_count=number_of_retries,
)
+ if last_attempted_job.exception is not None:
+ # Infra failed in all attempts
+ raise MesaCIRetryError(
+ f"{CONSOLE_LOG['BOLD']}"
+ f"{CONSOLE_LOG['FG_RED']}"
+ "Job failed after it exceeded the number of "
+ f"{number_of_retries} retries."
+ f"{CONSOLE_LOG['RESET']}",
+ retry_count=number_of_retries,
+ last_job=last_attempted_job,
+ )
+
+ return last_attempted_job
+
@dataclass
class PathResolver:
@@ -371,20 +383,9 @@ class LAVAJobSubmitter(PathResolver):
return
self.__structured_log_context = StructuredLoggerWrapper(self).logger_context()
+ self.proxy = setup_lava_proxy()
- def dump(self, job_definition):
- if self.dump_yaml:
- with GitlabSection(
- "yaml_dump",
- "LAVA job definition (YAML)",
- type=LogSectionType.LAVA_BOOT,
- start_collapsed=True,
- ):
- print(hide_sensitive_data(job_definition))
-
- def submit(self):
- proxy = setup_lava_proxy()
-
+ def __prepare_submission(self) -> str:
# Overwrite the timeout for the testcases with the value offered by the
# user. The testcase running time should be at least 4 times greater than
# the other sections (boot and setup), so we can safely ignore them.
@@ -398,27 +399,81 @@ class LAVAJobSubmitter(PathResolver):
lava_yaml.dump(generate_lava_yaml_payload(self), job_definition_stream)
job_definition = job_definition_stream.getvalue()
- self.dump(job_definition)
+ if self.dump_yaml:
+ self.dump_job_definition(job_definition)
- job = LAVAJob(proxy, job_definition)
- if errors := job.validate():
+ validation_job = LAVAJob(self.proxy, job_definition)
+ if errors := validation_job.validate():
fatal_err(f"Error in LAVA job definition: {errors}")
print_log("LAVA job definition validated successfully")
+ return job_definition
+
+ @classmethod
+ def is_under_ci(cls):
+ ci_envvar: str = getenv("CI", "false")
+ return ci_envvar.lower() == "true"
+
+ def dump_job_definition(self, job_definition) -> None:
+ with GitlabSection(
+ "yaml_dump",
+ "LAVA job definition (YAML)",
+ type=LogSectionType.LAVA_BOOT,
+ start_collapsed=True,
+ ):
+ print(hide_sensitive_data(job_definition))
+
+ def submit(self) -> None:
+ """
+ Prepares and submits the LAVA job.
+ If `validate_only` is True, it validates the job without submitting it.
+ If the job finishes with a non-pass status or encounters an exception,
+ the program exits with a non-zero return code.
+ """
+ job_definition: str = self.__prepare_submission()
+
if self.validate_only:
return
with self.__structured_log_context:
+ last_attempt_job = None
try:
- finished_job = retriable_follow_job(proxy, job_definition)
- exit_code = 0 if finished_job.status == "pass" else 1
- STRUCTURAL_LOG["job_combined_status"] = job.status
- sys.exit(exit_code)
+ last_attempt_job = retriable_follow_job(self.proxy, job_definition)
+
+ except MesaCIRetryError as retry_exception:
+ last_attempt_job = retry_exception.last_job
except Exception as exception:
STRUCTURAL_LOG["job_combined_fail_reason"] = str(exception)
raise exception
+ finally:
+ self.finish_script(last_attempt_job)
+
+ def print_log_artifact_url(self):
+ base_url = "https://$CI_PROJECT_ROOT_NAMESPACE.pages.freedesktop.org/"
+ artifacts_path = "-/$CI_PROJECT_NAME/-/jobs/$CI_JOB_ID/artifacts/"
+ relative_log_path = self.structured_log_file.relative_to(pathlib.Path.cwd())
+ full_path = f"{base_url}{artifacts_path}{relative_log_path}"
+ artifact_url = path.expandvars(full_path)
+
+ print_log(f"Structural Logging data available at: {artifact_url}")
+
+ def finish_script(self, last_attempt_job):
+ if self.is_under_ci() and self.structured_log_file:
+ self.print_log_artifact_url()
+
+ if not last_attempt_job:
+ # No job was run, something bad happened
+ STRUCTURAL_LOG["job_combined_status"] = "script_crash"
+ current_exception = str(sys.exc_info()[0])
+ STRUCTURAL_LOG["job_combined_fail_reason"] = current_exception
+ raise SystemExit(1)
+
+ STRUCTURAL_LOG["job_combined_status"] = last_attempt_job.status
+
+ if last_attempt_job.status != "pass":
+ raise SystemExit(1)
class StructuredLoggerWrapper:
def __init__(self, submitter: LAVAJobSubmitter) -> None: