load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load") load("@aspect_bazel_lib//lib:tar.bzl", "tar") RUNFILES_PREFIX = """ # ================= BEGIN RUNFILES INIT ================= # TODO should this be extracted to shared init script # Get the directory where the script is located if [[ -z "${RUNFILES_DIR:-}" ]]; then SCRIPT_DIR="$(readlink -f "${BASH_SOURCE[0]}")" # Set RUNFILES_DIR relative to the script location export RUNFILES_DIR="${SCRIPT_DIR}.runfiles" fi # --- begin runfiles.bash initialization v3 --- # Copy-pasted from the Bazel Bash runfiles library v3. set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ source $f || \ source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ source "$0.runfiles/$f" 2>/dev/null || \ source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e # --- end runfiles.bash initialization v3 --- # ================== END RUNFILES INIT ================== """ def databuild_job( name, configure, execute, visibility = None): """Creates a DataBuild job target with configuration and execution capabilities. Args: name: Name of the job target configure: Target that implements the configuration logic execute: Target that implements the execution logic deps: List of other job_targets this job depends on visibility: Visibility specification **kwargs: Additional attributes to pass to the underlying rule """ _databuild_job_cfg_rule( name = name + ".cfg", configure = configure, visibility = visibility, ) # Create the main rule that serves as a provider for other targets _databuild_job_exec_rule( name = name + ".exec", execute = execute, visibility = visibility, ) # Create a job target that configures then executes _databuild_job_rule( name = name, configure = ":%s.cfg" % name, execute = ":%s.exec" % name, visibility = visibility, ) def _databuild_job_cfg_impl(ctx): configure_file = ctx.executable.configure configure_path = ctx.attr.configure.files_to_run.executable.path script = ctx.actions.declare_file(ctx.label.name) ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{EXECUTABLE_PATH}": configure_path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, "%{PREFIX}": "", }, is_executable = True, ) runfiles = ctx.runfiles( files = [configure_file], ).merge(ctx.attr.configure.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles, ) return [ DefaultInfo( executable = script, runfiles = runfiles, ), ] _databuild_job_cfg_rule = rule( implementation = _databuild_job_cfg_impl, attrs = { "configure": attr.label( doc = "Target that implements the configuration logic", executable = True, cfg = "target", mandatory = True, ), "_template": attr.label( default = "@databuild//runtime:simple_executable_wrapper.sh.tpl", allow_single_file = True, ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), }, executable = True, ) def _databuild_job_exec_impl(ctx): execute_file = ctx.executable.execute jq_file = ctx.executable._jq script = ctx.actions.declare_file(ctx.label.name) # Get the correct runfiles paths jq_path = ctx.attr._jq.files_to_run.executable.path execute_path = ctx.attr.execute.files_to_run.executable.path ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{JQ_PATH}": jq_path, "%{EXECUTE_PATH}": execute_path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, }, is_executable = True, ) runfiles = ctx.runfiles( files = [jq_file, execute_file], ).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr._jq.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles, ) return [ DefaultInfo( executable = script, runfiles = runfiles, ), ] # Define the provider DataBuildJobInfo = provider( doc = "Information about a DataBuild job", fields = { "configure": "Target that implements the configuration logic", "execute": "Target that implements the execution logic", "deps": "List of dependencies (other DataBuildJobInfo providers)", }, ) _databuild_job_exec_rule = rule( implementation = _databuild_job_exec_impl, attrs = { "execute": attr.label( doc = "Target that implements the execution logic", mandatory = True, executable = True, cfg = "target", ), "_template": attr.label( default = "@databuild//job:execute_wrapper.sh.tpl", allow_single_file = True, ), "_jq": attr.label( default = "@databuild//runtime:jq", executable = True, cfg = "target", ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), }, executable = True, ) def _databuild_job_impl(ctx): """Wraps the configure and execute targets in a shell script.""" script = ctx.actions.declare_file(ctx.label.name) ctx.actions.write( output = script, is_executable = True, content = RUNFILES_PREFIX + """ $(rlocation _main/{configure_path}) $@ | $(rlocation _main/{execute_path}) """.format( configure_path = ctx.attr.configure.files_to_run.executable.short_path, execute_path = ctx.attr.execute.files_to_run.executable.short_path, ), ) runfiles = ctx.runfiles( files = [ctx.executable.execute, ctx.executable.configure], ).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr.configure.default_runfiles) return [ DefaultInfo( executable = script, runfiles = runfiles, ), DataBuildJobInfo( configure = ctx.attr.configure, execute = script, ), ] _databuild_job_rule = rule( implementation = _databuild_job_impl, attrs = { "configure": attr.label( doc = "Target that implements the configuration logic", mandatory = True, executable = True, # TODO all these cdf=execs are probably a problem for deployment cfg = "target", ), "execute": attr.label( doc = "Target that implements the execution logic", mandatory = True, executable = True, cfg = "target", ), }, executable = True, ) def databuild_graph(name, jobs, lookup, visibility = None): """Creates a databuild graph target.""" _databuild_graph_lookup( name = "%s.lookup" % name, lookup = lookup, visibility = visibility, ) _databuild_graph_analyze( name = "%s.analyze" % name, lookup = "%s.lookup" % name, jobs = jobs, visibility = visibility, ) _databuild_graph_mermaid( name = "%s.mermaid" % name, lookup = "%s.lookup" % name, jobs = jobs, visibility = visibility, ) _databuild_graph_exec( name = "%s.exec" % name, jobs = jobs, visibility = visibility, ) _databuild_graph_build( name = "%s.build" % name, analyze = "%s.analyze" % name, exec = "%s.exec" % name, jobs = jobs, visibility = visibility, ) tar( name = "%s.tar" % name, srcs = [":%s.build" % name], visibility = visibility, ) oci_image( name = "%s.image" % name, base = "@debian", cmd = ["/%s.build" % name], tars = [":%s.tar" % name], visibility = visibility, ) oci_load( name = "%s.load" % name, image = ":%s.image" % name, visibility = visibility, repo_tags = ["databuild_%s:latest" % name], ) # TODO there feels like a lot of boilerplate around wrapping a target with a script - can this be simplified? def _databuild_graph_lookup_impl(ctx): script = ctx.actions.declare_file(ctx.label.name) ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, "%{PREFIX}": "", "%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path, }, is_executable = True, ) runfiles = ctx.runfiles( files = [ctx.executable.lookup], ).merge(ctx.attr.lookup.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles, ) return [ DefaultInfo( executable = script, runfiles = runfiles, ), ] _databuild_graph_lookup = rule( implementation = _databuild_graph_lookup_impl, attrs = { "lookup": attr.label( doc = "Target that implements job lookup for desired partition refs", mandatory = True, executable = True, cfg = "target", ), "_template": attr.label( default = "@databuild//runtime:simple_executable_wrapper.sh.tpl", allow_single_file = True, ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), }, executable = True, ) def _databuild_graph_analyze_impl(ctx): script = ctx.actions.declare_file(ctx.label.name) config_paths = { "//" + job.label.package + ":" +job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")" for job in ctx.attr.jobs } config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}" candidate_job_env_var = "'" + ",".join([ "//" + target.label.package + ":" +target.label.name for target in ctx.attr.jobs ]) + "'" env_setup = """ export DATABUILD_CANDIDATE_JOBS="{candidate_job_env_var}" export DATABUILD_MODE=plan export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path}) """.format( candidate_job_env_var = config_paths_str, lookup_path = ctx.attr.lookup.files_to_run.executable.short_path, ) script_prefix = env_setup ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, "%{PREFIX}": script_prefix, }, is_executable = True, ) # Gather the configure executables configure_executables = [ job[DataBuildJobInfo].configure.files_to_run.executable for job in ctx.attr.jobs ] runfiles = ctx.runfiles( files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables, ).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles ).merge_all([job.default_runfiles for job in ctx.attr.jobs]) # Merge runfiles from all configure targets for job in ctx.attr.jobs: configure_target = job[DataBuildJobInfo].configure runfiles = runfiles.merge(configure_target.default_runfiles) return [ DefaultInfo( executable = script, runfiles = runfiles, ), ] _databuild_graph_analyze = rule( implementation = _databuild_graph_analyze_impl, attrs = { "lookup": attr.label( doc = "Target that implements job lookup for desired partition refs", mandatory = True, executable = True, cfg = "target", ), "jobs": attr.label_list( doc = "The list of jobs that are candidates for building partitions in this databuild graph", allow_empty = False, ), "_template": attr.label( default = "@databuild//graph:go_analyze_wrapper.sh.tpl", allow_single_file = True, ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), "_analyze": attr.label( default = "@databuild//graph:analyze", executable = True, cfg = "target", ), }, executable = True, ) def _databuild_graph_mermaid_impl(ctx): script = ctx.actions.declare_file(ctx.label.name) config_paths = { "//" + job.label.package + ":" +job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")" for job in ctx.attr.jobs } config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}" candidate_job_env_var = "'" + ",".join([ "//" + target.label.package + ":" +target.label.name for target in ctx.attr.jobs ]) + "'" env_setup = """ export DATABUILD_CANDIDATE_JOBS="{candidate_job_env_var}" export DATABUILD_MODE=mermaid export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path}) """.format( candidate_job_env_var = config_paths_str, lookup_path = ctx.attr.lookup.files_to_run.executable.short_path, ) script_prefix = env_setup ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, "%{PREFIX}": script_prefix, }, is_executable = True, ) # Gather the configure executables configure_executables = [ job[DataBuildJobInfo].configure.files_to_run.executable for job in ctx.attr.jobs ] runfiles = ctx.runfiles( files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables, ).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles ).merge_all([job.default_runfiles for job in ctx.attr.jobs]) # Merge runfiles from all configure targets for job in ctx.attr.jobs: configure_target = job[DataBuildJobInfo].configure runfiles = runfiles.merge(configure_target.default_runfiles) return [ DefaultInfo( executable = script, runfiles = runfiles, ) ] _databuild_graph_mermaid = rule( implementation = _databuild_graph_mermaid_impl, attrs = { "lookup": attr.label( doc = "Target that implements job lookup for desired partition refs", mandatory = True, executable = True, cfg = "target", ), "jobs": attr.label_list( doc = "The list of jobs that are candidates for building partitions in this databuild graph", allow_empty = False, ), "_template": attr.label( default = "@databuild//graph:go_analyze_wrapper.sh.tpl", allow_single_file = True, ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), "_analyze": attr.label( default = "@databuild//graph:analyze", executable = True, cfg = "target", ), }, executable = True, ) def _databuild_graph_exec_impl(ctx): script = ctx.actions.declare_file(ctx.label.name) # Gather the execute executables execute_executables = [ job[DataBuildJobInfo].execute for job in ctx.attr.jobs ] ctx.actions.expand_template( template = ctx.file._template, output = script, substitutions = { "%{EXECUTABLE_PATH}": ctx.attr._execute.files_to_run.executable.path, "%{RUNFILES_PREFIX}": RUNFILES_PREFIX, "%{PREFIX}": "", }, is_executable = True, ) runfiles = ctx.runfiles( files = [ctx.executable._execute] + execute_executables, ).merge(ctx.attr._execute.default_runfiles).merge( ctx.attr._bash_runfiles.default_runfiles ).merge_all([job.default_runfiles for job in ctx.attr.jobs]) # Merge runfiles from all execute targets for job in ctx.attr.jobs: execute_target = job[DataBuildJobInfo].execute if hasattr(execute_target, "default_runfiles"): runfiles = runfiles.merge(execute_target.default_runfiles) return [ DefaultInfo( executable = script, runfiles = runfiles, ), ] _databuild_graph_exec = rule( implementation = _databuild_graph_exec_impl, attrs = { "jobs": attr.label_list( doc = "The list of jobs that are candidates for building partitions in this databuild graph", allow_empty = False, ), "_template": attr.label( default = "@databuild//graph:go_exec_wrapper.sh.tpl", allow_single_file = True, ), "_bash_runfiles": attr.label( default = Label("@bazel_tools//tools/bash/runfiles"), allow_files = True, ), "_execute": attr.label( default = "@databuild//graph:execute", executable = True, cfg = "target", ) }, executable = True, ) DataBuildGraphInfo = provider( doc = "Information about a DataBuild graph", fields = { "analyze": "Target that implements the graph analysis logic", "exec": "Target that implements the graph execution logic", "jobs": "List of jobs that are candidates for building partitions in this databuild graph", }, ) def _databuild_graph_build_impl(ctx): """Wraps the analyze and execute targets in a shell script.""" script = ctx.actions.declare_file(ctx.label.name) ctx.actions.write( output = script, is_executable = True, content = "#!/bin/bash\n\n" + RUNFILES_PREFIX + """ # TODO these rlocation commands resolve to nothing - does it expect to be $(rlocation _main/{analyze_path}) $@ | $(rlocation _main/{exec_path}) """.format( analyze_path = ctx.attr.analyze.files_to_run.executable.short_path, exec_path = ctx.attr.exec.files_to_run.executable.short_path, ), ) runfiles = ctx.runfiles( files = [ctx.executable.analyze, ctx.executable.exec], ).merge(ctx.attr.analyze.default_runfiles).merge(ctx.attr.exec.default_runfiles) return [ DefaultInfo( executable = script, runfiles = runfiles, ), DataBuildGraphInfo( analyze = ctx.attr.analyze, exec = ctx.attr.exec, jobs = ctx.attr.jobs, ), ] _databuild_graph_build = rule( implementation = _databuild_graph_build_impl, attrs = { "analyze": attr.label( doc = "Target that implements the graph analysis logic", mandatory = True, executable = True, cfg = "target", ), "exec": attr.label( doc = "Target that implements the graph execution logic", mandatory = True, executable = True, cfg = "target", ), "jobs": attr.label_list( doc = "The list of jobs that are candidates for building partitions in this databuild graph", allow_empty = False, ) }, executable = True, )