1144 lines
39 KiB
Python
1144 lines
39 KiB
Python
load("@aspect_bazel_lib//lib:tar.bzl", "tar")
|
|
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
|
|
|
|
RUNFILES_PREFIX = """
|
|
# ================= BEGIN RUNFILES INIT =================
|
|
|
|
SCRIPT_PATH="$(realpath "$0")"
|
|
|
|
# TODO should this be extracted to shared init script
|
|
# Get the directory where the script is located
|
|
if [[ -z "${RUNFILES_DIR:-}" ]]; then
|
|
SCRIPT_DIR="$(readlink -f "${BASH_SOURCE[0]}")"
|
|
# Set RUNFILES_DIR relative to the script location
|
|
export RUNFILES_DIR="${SCRIPT_DIR}.runfiles"
|
|
fi
|
|
|
|
# --- begin runfiles.bash initialization v3 ---
|
|
# Copy-pasted from the Bazel Bash runfiles library v3.
|
|
set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash
|
|
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
|
|
source $f || \
|
|
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
|
|
source "$0.runfiles/$f" 2>/dev/null || \
|
|
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
|
|
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
|
|
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
|
|
# --- end runfiles.bash initialization v3 ---
|
|
|
|
# ================== END RUNFILES INIT ==================
|
|
|
|
"""
|
|
|
|
def databuild_job(
|
|
name,
|
|
binary,
|
|
visibility = None):
|
|
"""Creates a DataBuild job target with configuration and execution capabilities.
|
|
|
|
Args:
|
|
name: Name of the job target
|
|
binary: Single binary target that handles both config and exec via subcommands
|
|
visibility: Visibility specification
|
|
"""
|
|
|
|
# Single binary approach - use subcommands
|
|
_databuild_job_cfg_rule(
|
|
name = name + ".cfg",
|
|
configure = binary,
|
|
visibility = visibility,
|
|
)
|
|
|
|
_databuild_job_exec_rule(
|
|
name = name + ".exec",
|
|
execute = binary,
|
|
visibility = visibility,
|
|
)
|
|
|
|
# Create a job target that configures then executes
|
|
_databuild_job_rule(
|
|
name = name,
|
|
configure = ":%s.cfg" % name,
|
|
execute = ":%s.exec" % name,
|
|
visibility = visibility,
|
|
)
|
|
|
|
def _databuild_job_cfg_impl(ctx):
|
|
configure_file = ctx.executable.configure
|
|
configure_path = ctx.attr.configure.files_to_run.executable.path
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
ctx.actions.expand_template(
|
|
template = ctx.file._template,
|
|
output = script,
|
|
substitutions = {
|
|
"%{EXECUTABLE_PATH}": configure_path,
|
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.configure.files_to_run.executable.short_path,
|
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
|
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
|
|
},
|
|
is_executable = True,
|
|
)
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [configure_file],
|
|
).merge(ctx.attr.configure.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_job_cfg_rule = rule(
|
|
implementation = _databuild_job_cfg_impl,
|
|
attrs = {
|
|
"configure": attr.label(
|
|
doc = "Target that implements the configuration logic",
|
|
executable = True,
|
|
cfg = "target",
|
|
mandatory = True,
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_job_exec_impl(ctx):
|
|
execute_file = ctx.executable.execute
|
|
wrapper_file = ctx.executable._job_wrapper
|
|
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
# Get the correct runfiles paths
|
|
wrapper_path = ctx.attr._job_wrapper.files_to_run.executable.path
|
|
execute_path = ctx.attr.execute.files_to_run.executable.path
|
|
|
|
# Create a simple script that calls the job wrapper with the original binary
|
|
script_content = RUNFILES_PREFIX + """
|
|
export DATABUILD_JOB_BINARY="$(rlocation _main/{execute_path})"
|
|
"$(rlocation _main/{wrapper_path})" exec $@
|
|
""".format(
|
|
execute_path = ctx.attr.execute.files_to_run.executable.short_path,
|
|
wrapper_path = ctx.attr._job_wrapper.files_to_run.executable.short_path,
|
|
)
|
|
|
|
ctx.actions.write(
|
|
output = script,
|
|
content = script_content,
|
|
is_executable = True,
|
|
)
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [wrapper_file, execute_file],
|
|
).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr._job_wrapper.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
# Define the provider
|
|
DataBuildJobInfo = provider(
|
|
doc = "Information about a DataBuild job",
|
|
fields = {
|
|
"configure": "Target that implements the configuration logic",
|
|
"execute": "Target that implements the execution logic",
|
|
"deps": "List of dependencies (other DataBuildJobInfo providers)",
|
|
},
|
|
)
|
|
|
|
_databuild_job_exec_rule = rule(
|
|
implementation = _databuild_job_exec_impl,
|
|
attrs = {
|
|
"execute": attr.label(
|
|
doc = "Target that implements the execution logic",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"_job_wrapper": attr.label(
|
|
default = "@databuild//databuild/job:job_wrapper",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_job_impl(ctx):
|
|
"""Wraps the configure and execute targets in a shell script."""
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
ctx.actions.write(
|
|
output = script,
|
|
is_executable = True,
|
|
content = RUNFILES_PREFIX + """
|
|
$(rlocation _main/{configure_path}) $@ | $(rlocation _main/{execute_path})
|
|
""".format(
|
|
configure_path = ctx.attr.configure.files_to_run.executable.short_path,
|
|
execute_path = ctx.attr.execute.files_to_run.executable.short_path,
|
|
),
|
|
)
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.execute, ctx.executable.configure],
|
|
).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr.configure.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
DataBuildJobInfo(
|
|
configure = ctx.attr.configure,
|
|
execute = script,
|
|
),
|
|
]
|
|
|
|
_databuild_job_rule = rule(
|
|
implementation = _databuild_job_impl,
|
|
attrs = {
|
|
"configure": attr.label(
|
|
doc = "Target that implements the configuration logic",
|
|
mandatory = True,
|
|
executable = True,
|
|
# TODO all these cdf=execs are probably a problem for deployment
|
|
cfg = "target",
|
|
),
|
|
"execute": attr.label(
|
|
doc = "Target that implements the execution logic",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def databuild_graph(name, jobs, lookup, visibility = None):
|
|
"""Creates a databuild graph target.
|
|
|
|
Args:
|
|
name: Name of the graph target
|
|
jobs: List of job targets
|
|
lookup: Job lookup binary
|
|
visibility: Visibility specification
|
|
|
|
Note: Build event logging is configured via the DATABUILD_BUILD_EVENT_LOG environment variable.
|
|
"""
|
|
_databuild_graph_lookup(
|
|
name = "%s.lookup" % name,
|
|
lookup = lookup,
|
|
visibility = visibility,
|
|
)
|
|
_databuild_graph_analyze(
|
|
name = "%s.analyze" % name,
|
|
lookup = "%s.lookup" % name,
|
|
jobs = jobs,
|
|
visibility = visibility,
|
|
)
|
|
_databuild_graph_mermaid(
|
|
name = "%s.mermaid" % name,
|
|
lookup = "%s.lookup" % name,
|
|
jobs = jobs,
|
|
visibility = visibility,
|
|
)
|
|
_databuild_graph_exec(
|
|
name = "%s.exec" % name,
|
|
jobs = jobs,
|
|
visibility = visibility,
|
|
)
|
|
_databuild_graph_build(
|
|
name = "%s.build" % name,
|
|
cli_wrapper = "@databuild//databuild/cli:databuild_cli",
|
|
jobs = jobs,
|
|
lookup = "%s.lookup" % name,
|
|
graph_label = "//%s:%s" % (native.package_name(), name),
|
|
visibility = visibility,
|
|
)
|
|
|
|
# Build deployment targets (renamed for hierarchical namespacing)
|
|
tar(
|
|
name = "%s.build.tar" % name,
|
|
srcs = [":%s.build" % name],
|
|
visibility = visibility,
|
|
)
|
|
oci_image(
|
|
name = "%s.build.image" % name,
|
|
base = "@debian",
|
|
cmd = ["/%s.build" % name],
|
|
tars = [":%s.build.tar" % name],
|
|
visibility = visibility,
|
|
)
|
|
oci_load(
|
|
name = "%s.build.image_load" % name,
|
|
image = ":%s.build.image" % name,
|
|
visibility = visibility,
|
|
repo_tags = ["databuild_%s_build:latest" % name],
|
|
)
|
|
|
|
# Service targets
|
|
_databuild_graph_service(
|
|
name = "%s.service" % name,
|
|
lookup = "%s.lookup" % name,
|
|
analyze = "%s.analyze" % name,
|
|
exec = "%s.exec" % name,
|
|
jobs = jobs,
|
|
graph_label = "//%s:%s" % (native.package_name(), name),
|
|
visibility = visibility,
|
|
)
|
|
tar(
|
|
name = "%s.service.tar" % name,
|
|
srcs = [":%s.service" % name],
|
|
visibility = visibility,
|
|
)
|
|
oci_image(
|
|
name = "%s.service.image" % name,
|
|
base = "@debian",
|
|
cmd = ["/%s.service" % name],
|
|
tars = [":%s.service.tar" % name],
|
|
visibility = visibility,
|
|
)
|
|
oci_load(
|
|
name = "%s.service.image_load" % name,
|
|
image = ":%s.service.image" % name,
|
|
visibility = visibility,
|
|
repo_tags = ["databuild_%s_service:latest" % name],
|
|
)
|
|
|
|
# TODO there feels like a lot of boilerplate around wrapping a target with a script - can this be simplified?
|
|
def _databuild_graph_lookup_impl(ctx):
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
ctx.actions.expand_template(
|
|
template = ctx.file._template,
|
|
output = script,
|
|
substitutions = {
|
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
|
"%{PREFIX}": "",
|
|
"%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path,
|
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.lookup.files_to_run.executable.short_path,
|
|
},
|
|
is_executable = True,
|
|
)
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.lookup],
|
|
).merge(ctx.attr.lookup.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_lookup = rule(
|
|
implementation = _databuild_graph_lookup_impl,
|
|
attrs = {
|
|
"lookup": attr.label(
|
|
doc = "Target that implements job lookup for desired partition refs",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_graph_analyze_impl(ctx):
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
config_paths = {
|
|
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
|
|
for job in ctx.attr.jobs
|
|
}
|
|
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
|
|
|
|
candidate_job_env_var = "'" + ",".join([
|
|
"//" + target.label.package + ":" + target.label.name
|
|
for target in ctx.attr.jobs
|
|
]) + "'"
|
|
|
|
env_setup = """
|
|
export DATABUILD_CANDIDATE_JOBS_CFG="{candidate_job_env_var}"
|
|
export DATABUILD_MODE=plan
|
|
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
|
|
""".format(
|
|
candidate_job_env_var = config_paths_str,
|
|
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
|
|
)
|
|
|
|
script_prefix = env_setup
|
|
|
|
ctx.actions.expand_template(
|
|
template = ctx.file._template,
|
|
output = script,
|
|
substitutions = {
|
|
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
|
|
"%{EXECUTABLE_SHORT_PATH}": ctx.attr._analyze.files_to_run.executable.short_path,
|
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
|
"%{PREFIX}": script_prefix,
|
|
},
|
|
is_executable = True,
|
|
)
|
|
|
|
# Gather the configure executables
|
|
configure_executables = [
|
|
job[DataBuildJobInfo].configure.files_to_run.executable
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables,
|
|
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
|
|
|
|
# Merge runfiles from all configure targets
|
|
for job in ctx.attr.jobs:
|
|
configure_target = job[DataBuildJobInfo].configure
|
|
runfiles = runfiles.merge(configure_target.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_analyze = rule(
|
|
implementation = _databuild_graph_analyze_impl,
|
|
attrs = {
|
|
"lookup": attr.label(
|
|
doc = "Target that implements job lookup for desired partition refs",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"jobs": attr.label_list(
|
|
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
|
|
allow_empty = False,
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
"_analyze": attr.label(
|
|
default = "@databuild//databuild/graph:analyze",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_graph_mermaid_impl(ctx):
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
config_paths = {
|
|
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
|
|
for job in ctx.attr.jobs
|
|
}
|
|
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
|
|
|
|
candidate_job_env_var = "'" + ",".join([
|
|
"//" + target.label.package + ":" + target.label.name
|
|
for target in ctx.attr.jobs
|
|
]) + "'"
|
|
|
|
env_setup = """
|
|
export DATABUILD_CANDIDATE_JOBS_CFG="{candidate_job_env_var}"
|
|
export DATABUILD_MODE=mermaid
|
|
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
|
|
""".format(
|
|
candidate_job_env_var = config_paths_str,
|
|
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
|
|
)
|
|
|
|
script_prefix = env_setup
|
|
|
|
ctx.actions.expand_template(
|
|
template = ctx.file._template,
|
|
output = script,
|
|
substitutions = {
|
|
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
|
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
|
"%{PREFIX}": script_prefix,
|
|
},
|
|
is_executable = True,
|
|
)
|
|
|
|
# Gather the configure executables
|
|
configure_executables = [
|
|
job[DataBuildJobInfo].configure.files_to_run.executable
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables,
|
|
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
|
|
|
|
# Merge runfiles from all configure targets
|
|
for job in ctx.attr.jobs:
|
|
configure_target = job[DataBuildJobInfo].configure
|
|
runfiles = runfiles.merge(configure_target.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_mermaid = rule(
|
|
implementation = _databuild_graph_mermaid_impl,
|
|
attrs = {
|
|
"lookup": attr.label(
|
|
doc = "Target that implements job lookup for desired partition refs",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"jobs": attr.label_list(
|
|
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
|
|
allow_empty = False,
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
"_analyze": attr.label(
|
|
default = "@databuild//databuild/graph:analyze",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_graph_exec_impl(ctx):
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
# Gather the execute executables
|
|
execute_executables = [
|
|
job[DataBuildJobInfo].execute
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
prefix_setup = """
|
|
"""
|
|
|
|
ctx.actions.expand_template(
|
|
template = ctx.file._template,
|
|
output = script,
|
|
substitutions = {
|
|
"%{EXECUTABLE_PATH}": ctx.attr._execute.files_to_run.executable.path,
|
|
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
|
|
"%{PREFIX}": prefix_setup,
|
|
},
|
|
is_executable = True,
|
|
)
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable._execute] + execute_executables,
|
|
).merge(ctx.attr._execute.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
|
|
|
|
# Merge runfiles from all execute targets
|
|
for job in ctx.attr.jobs:
|
|
execute_target = job[DataBuildJobInfo].execute
|
|
if hasattr(execute_target, "default_runfiles"):
|
|
runfiles = runfiles.merge(execute_target.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_exec = rule(
|
|
implementation = _databuild_graph_exec_impl,
|
|
attrs = {
|
|
"jobs": attr.label_list(
|
|
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
|
|
allow_empty = False,
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/graph:rust_execute_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
"_execute": attr.label(
|
|
default = "@databuild//databuild/graph:execute",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
DataBuildGraphInfo = provider(
|
|
doc = "Information about a DataBuild graph",
|
|
fields = {
|
|
"analyze": "Target that implements the graph analysis logic",
|
|
"exec": "Target that implements the graph execution logic",
|
|
"jobs": "List of jobs that are candidates for building partitions in this databuild graph",
|
|
},
|
|
)
|
|
|
|
def _databuild_graph_build_impl(ctx):
|
|
"""Wraps the DataBuild CLI wrapper in a shell script."""
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
# Build DATABUILD_CANDIDATE_JOBS_CFG JSON string with runtime rlocation resolution
|
|
candidate_jobs_cfg_script_lines = ["CANDIDATE_JOBS_JSON_CFG=\"{\""]
|
|
for i, job in enumerate(ctx.attr.jobs):
|
|
job_label = "//" + job.label.package + ":" + job.label.name
|
|
configure_path = job[DataBuildJobInfo].configure.files_to_run.executable.short_path
|
|
separator = "," if i < len(ctx.attr.jobs) - 1 else ""
|
|
candidate_jobs_cfg_script_lines.append(
|
|
'CANDIDATE_JOBS_JSON_CFG="${CANDIDATE_JOBS_JSON_CFG}\\"%s\\":\\"$(rlocation _main/%s)\\"%s"' % (
|
|
job_label,
|
|
configure_path,
|
|
separator,
|
|
),
|
|
)
|
|
candidate_jobs_cfg_script_lines.append('CANDIDATE_JOBS_JSON_CFG="${CANDIDATE_JOBS_JSON_CFG}}"')
|
|
candidate_jobs_cfg_script = "\n".join(candidate_jobs_cfg_script_lines)
|
|
|
|
# Build DATABUILD_CANDIDATE_JOBS_EXEC JSON string with runtime rlocation resolution
|
|
candidate_jobs_exec_script_lines = ["CANDIDATE_JOBS_JSON_EXEC=\"{\""]
|
|
for i, job in enumerate(ctx.attr.jobs):
|
|
job_label = "//" + job.label.package + ":" + job.label.name
|
|
configure_path = job[DataBuildJobInfo].execute.short_path
|
|
separator = "," if i < len(ctx.attr.jobs) - 1 else ""
|
|
candidate_jobs_exec_script_lines.append(
|
|
'CANDIDATE_JOBS_JSON_EXEC="${CANDIDATE_JOBS_JSON_EXEC}\\"%s\\":\\"$(rlocation _main/%s.exec)\\"%s"' % (
|
|
job_label,
|
|
configure_path,
|
|
separator,
|
|
),
|
|
)
|
|
candidate_jobs_exec_script_lines.append('CANDIDATE_JOBS_JSON_EXEC="${CANDIDATE_JOBS_JSON_EXEC}}"')
|
|
candidate_jobs_exec_script = "\n".join(candidate_jobs_exec_script_lines)
|
|
|
|
script_content = RUNFILES_PREFIX + """
|
|
# Build DATABUILD_CANDIDATE_JOBS_CFG dynamically with proper rlocation resolution
|
|
{candidate_jobs_cfg_script}
|
|
{candidate_jobs_exec_script}
|
|
|
|
export DATABUILD_CANDIDATE_JOBS_CFG="$CANDIDATE_JOBS_JSON_CFG"
|
|
export DATABUILD_CANDIDATE_JOBS_EXEC="$CANDIDATE_JOBS_JSON_EXEC"
|
|
|
|
# Resolve binary paths with error checking
|
|
DATABUILD_JOB_LOOKUP_PATH="$(rlocation _main/{lookup_path})"
|
|
if [[ -z "$DATABUILD_JOB_LOOKUP_PATH" || ! -f "$DATABUILD_JOB_LOOKUP_PATH" ]]; then
|
|
echo "ERROR: Could not find job lookup binary at _main/{lookup_path}" >&2
|
|
exit 1
|
|
fi
|
|
export DATABUILD_JOB_LOOKUP_PATH
|
|
|
|
DATABUILD_ANALYZE_BINARY="$(rlocation _main/{analyze_path})"
|
|
if [[ -z "$DATABUILD_ANALYZE_BINARY" || ! -f "$DATABUILD_ANALYZE_BINARY" ]]; then
|
|
echo "ERROR: Could not find analyze binary at _main/{analyze_path}" >&2
|
|
exit 1
|
|
fi
|
|
export DATABUILD_ANALYZE_BINARY
|
|
|
|
DATABUILD_EXECUTE_BINARY="$(rlocation _main/{execute_path})"
|
|
if [[ -z "$DATABUILD_EXECUTE_BINARY" || ! -f "$DATABUILD_EXECUTE_BINARY" ]]; then
|
|
echo "ERROR: Could not find execute binary at _main/{execute_path}" >&2
|
|
exit 1
|
|
fi
|
|
export DATABUILD_EXECUTE_BINARY
|
|
|
|
export DATABUILD_GRAPH_LABEL="{graph_label}"
|
|
|
|
# Generate a single build request ID for the entire CLI operation
|
|
export DATABUILD_BUILD_REQUEST_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
|
|
|
|
# Run unified DataBuild CLI wrapper using the provided cli_wrapper attribute
|
|
CLI_BINARY="$(rlocation _main/{cli_path})"
|
|
if [[ -z "$CLI_BINARY" || ! -f "$CLI_BINARY" ]]; then
|
|
echo "ERROR: Could not find CLI binary at _main/{cli_path}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
"$CLI_BINARY" "$@"
|
|
""".format(
|
|
candidate_jobs_cfg_script = candidate_jobs_cfg_script,
|
|
candidate_jobs_exec_script = candidate_jobs_exec_script,
|
|
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
|
|
analyze_path = ctx.attr._analyze.files_to_run.executable.short_path,
|
|
execute_path = ctx.attr._execute.files_to_run.executable.short_path,
|
|
graph_label = ctx.attr.graph_label,
|
|
cli_path = ctx.attr.cli_wrapper.files_to_run.executable.short_path,
|
|
)
|
|
|
|
ctx.actions.write(
|
|
output = script,
|
|
is_executable = True,
|
|
content = script_content,
|
|
)
|
|
|
|
# Gather the configure executables
|
|
configure_executables = [
|
|
job[DataBuildJobInfo].configure.files_to_run.executable
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
# Gather the exec executables
|
|
exec_executables = [
|
|
job[DataBuildJobInfo].execute
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
# Create runfiles including the CLI binary, analyze/execute binaries and all dependencies
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.cli_wrapper, ctx.executable.lookup, ctx.executable._analyze, ctx.executable._execute] + configure_executables + exec_executables,
|
|
).merge(ctx.attr.cli_wrapper.default_runfiles).merge(ctx.attr.lookup.default_runfiles).merge(
|
|
ctx.attr._analyze.default_runfiles,
|
|
).merge(ctx.attr._execute.default_runfiles).merge(ctx.attr._bash_runfiles.default_runfiles).merge_all([job.default_runfiles for job in ctx.attr.jobs])
|
|
|
|
# Merge runfiles from all configure targets
|
|
for job in ctx.attr.jobs:
|
|
configure_target = job[DataBuildJobInfo].configure
|
|
runfiles = runfiles.merge(configure_target.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_build = rule(
|
|
implementation = _databuild_graph_build_impl,
|
|
attrs = {
|
|
"cli_wrapper": attr.label(
|
|
doc = "Target that implements the unified DataBuild CLI",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"jobs": attr.label_list(
|
|
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
|
|
allow_empty = False,
|
|
),
|
|
"lookup": attr.label(
|
|
doc = "Target that implements job lookup for desired partition refs",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"graph_label": attr.string(
|
|
doc = "The label of this graph for identification",
|
|
mandatory = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
"_analyze": attr.label(
|
|
default = "@databuild//databuild/graph:analyze",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"_execute": attr.label(
|
|
default = "@databuild//databuild/graph:execute",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def _databuild_graph_service_impl(ctx):
|
|
"""Implementation of the service target that runs the Build Graph Service."""
|
|
script = ctx.actions.declare_file(ctx.label.name)
|
|
|
|
# Build job configurations mapping for DATABUILD_CANDIDATE_JOBS_CFG
|
|
config_paths = {
|
|
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
|
|
for job in ctx.attr.jobs
|
|
}
|
|
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
|
|
|
|
# Default service configuration
|
|
default_port = "8080"
|
|
default_db = "sqlite:///tmp/%s.db" % ctx.label.name.replace(".", "_")
|
|
|
|
env_setup = """
|
|
export DATABUILD_CANDIDATE_JOBS_CFG="{candidate_jobs}"
|
|
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
|
|
export DATABUILD_ANALYZE_BINARY=$(rlocation _main/{analyze_path})
|
|
export DATABUILD_EXECUTE_BINARY=$(rlocation _main/{exec_path})
|
|
export DATABUILD_SERVICE_BINARY=$(rlocation _main/{service_path})
|
|
""".format(
|
|
candidate_jobs = config_paths_str,
|
|
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
|
|
analyze_path = ctx.attr.analyze.files_to_run.executable.short_path,
|
|
exec_path = ctx.attr.exec.files_to_run.executable.short_path,
|
|
service_path = ctx.attr._service.files_to_run.executable.short_path,
|
|
)
|
|
|
|
# Generate a custom script instead of using the template to handle the external binary correctly
|
|
script_content = RUNFILES_PREFIX + env_setup + """
|
|
# Always pass graph-specific configuration, allow user args to override defaults like port/host
|
|
# Graph-specific args that should always be set:
|
|
GRAPH_ARGS=(
|
|
"--graph-label" "{graph_label}"
|
|
"--job-lookup-path" "$(rlocation _main/{lookup_path})"
|
|
"--event-log" "{db}"
|
|
)
|
|
|
|
# Add default port if no port specified in user args
|
|
if [[ ! "$*" =~ --port ]]; then
|
|
GRAPH_ARGS+=("--port" "{port}")
|
|
fi
|
|
|
|
# Add default host if no host specified in user args
|
|
if [[ ! "$*" =~ --host ]]; then
|
|
GRAPH_ARGS+=("--host" "0.0.0.0")
|
|
fi
|
|
|
|
# Run the service with graph-specific args + user args
|
|
if [[ -n "${{EXECUTABLE_SUBCOMMAND:-}}" ]]; then
|
|
exec "${{DATABUILD_SERVICE_BINARY}}" "${{EXECUTABLE_SUBCOMMAND}}" "${{GRAPH_ARGS[@]}}" "$@"
|
|
else
|
|
exec "${{DATABUILD_SERVICE_BINARY}}" "${{GRAPH_ARGS[@]}}" "$@"
|
|
fi
|
|
""".format(
|
|
graph_label = ctx.attr.graph_label,
|
|
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
|
|
db = default_db,
|
|
port = default_port,
|
|
)
|
|
|
|
ctx.actions.write(
|
|
output = script,
|
|
content = script_content,
|
|
is_executable = True,
|
|
)
|
|
|
|
# Gather all dependencies for runfiles
|
|
configure_executables = [
|
|
job[DataBuildJobInfo].configure.files_to_run.executable
|
|
for job in ctx.attr.jobs
|
|
]
|
|
|
|
runfiles = ctx.runfiles(
|
|
files = [ctx.executable.lookup, ctx.executable._service, ctx.executable.analyze, ctx.executable.exec] + configure_executables + ctx.files._dashboard,
|
|
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._service.default_runfiles).merge(
|
|
ctx.attr.analyze.default_runfiles,
|
|
).merge(ctx.attr.exec.default_runfiles).merge(
|
|
ctx.attr._bash_runfiles.default_runfiles,
|
|
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
|
|
|
|
# Merge runfiles from all configure targets
|
|
for job in ctx.attr.jobs:
|
|
configure_target = job[DataBuildJobInfo].configure
|
|
runfiles = runfiles.merge(configure_target.default_runfiles)
|
|
|
|
return [
|
|
DefaultInfo(
|
|
executable = script,
|
|
runfiles = runfiles,
|
|
),
|
|
]
|
|
|
|
_databuild_graph_service = rule(
|
|
implementation = _databuild_graph_service_impl,
|
|
attrs = {
|
|
"lookup": attr.label(
|
|
doc = "Target that implements job lookup for desired partition refs",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"jobs": attr.label_list(
|
|
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
|
|
allow_empty = False,
|
|
),
|
|
"analyze": attr.label(
|
|
doc = "Target that implements the graph analysis logic",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"exec": attr.label(
|
|
doc = "Target that implements the graph execution logic",
|
|
mandatory = True,
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"graph_label": attr.string(
|
|
doc = "The label of this graph for service identification",
|
|
mandatory = True,
|
|
),
|
|
"_template": attr.label(
|
|
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
|
allow_single_file = True,
|
|
),
|
|
"_bash_runfiles": attr.label(
|
|
default = Label("@bazel_tools//tools/bash/runfiles"),
|
|
allow_files = True,
|
|
),
|
|
"_service": attr.label(
|
|
default = "@databuild//databuild:build_graph_service",
|
|
executable = True,
|
|
cfg = "target",
|
|
),
|
|
"_dashboard": attr.label(
|
|
default = "@databuild//databuild/dashboard:dist",
|
|
allow_files = True,
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|
|
|
|
def databuild_dsl_generator(
|
|
name,
|
|
graph_file,
|
|
graph_attr = "graph",
|
|
output_package = None,
|
|
deps = [],
|
|
visibility = None):
|
|
"""Creates a DataBuild DSL code generator that can generate BUILD.bazel and job binaries.
|
|
|
|
Args:
|
|
name: Name of the generator target (typically ends with .generate)
|
|
graph_file: Python file containing the DSL graph definition
|
|
graph_attr: Name of the graph attribute in the module (default: "graph")
|
|
output_package: Target package for generated files (default: current package)
|
|
deps: Dependencies needed to load the graph
|
|
visibility: Visibility specification
|
|
"""
|
|
if not output_package:
|
|
output_package = "//" + native.package_name()
|
|
|
|
_databuild_dsl_generator_rule(
|
|
name = name,
|
|
graph_file = graph_file,
|
|
graph_attr = graph_attr,
|
|
output_package = output_package,
|
|
deps = deps,
|
|
visibility = visibility,
|
|
)
|
|
|
|
def _generate_custom_generator_script(module_path, graph_attr, package_path, deps):
|
|
"""Generate the custom generator script content with embedded parameters."""
|
|
return """#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
|
|
# Setup runfiles for proper module resolution
|
|
# Try to find the runfiles directory relative to this script
|
|
script_path = os.path.abspath(__file__)
|
|
runfiles_dir = script_path + '.runfiles'
|
|
|
|
# Debug: Runfiles path setup for cross-workspace usage
|
|
# Setting up runfiles paths for cross-workspace usage
|
|
|
|
if os.path.exists(runfiles_dir):
|
|
# Found runfiles directory, add _main to Python path
|
|
main_runfiles_path = os.path.join(runfiles_dir, '_main')
|
|
if os.path.exists(main_runfiles_path):
|
|
sys.path.insert(0, main_runfiles_path)
|
|
# Successfully added main runfiles path
|
|
|
|
# Check what other directories exist in runfiles for cross-workspace usage
|
|
# All runfiles directories available
|
|
else:
|
|
# _main directory not found in runfiles
|
|
pass
|
|
|
|
# Add external repository runfiles (like databuild+) for cross-workspace usage
|
|
for entry in os.listdir(runfiles_dir):
|
|
if entry.endswith('+') and os.path.isdir(os.path.join(runfiles_dir, entry)):
|
|
external_path = os.path.join(runfiles_dir, entry)
|
|
sys.path.insert(0, external_path)
|
|
# Added external repository path
|
|
|
|
# Also add pip package runfiles to Python path
|
|
for entry in os.listdir(runfiles_dir):
|
|
if entry.startswith('rules_python++pip+') and os.path.isdir(os.path.join(runfiles_dir, entry)):
|
|
pip_site_packages = os.path.join(runfiles_dir, entry, 'site-packages')
|
|
if os.path.exists(pip_site_packages):
|
|
sys.path.insert(0, pip_site_packages)
|
|
# Added pip package path
|
|
else:
|
|
# Runfiles directory not found, falling back to workspace root
|
|
# If runfiles not available, we're probably running in development
|
|
# Add the workspace root to the path
|
|
workspace_root = os.environ.get('BUILD_WORKSPACE_DIRECTORY')
|
|
if workspace_root:
|
|
sys.path.insert(0, workspace_root)
|
|
# Successfully added workspace root as fallback
|
|
|
|
from databuild.dsl.python.generator_lib import generate_dsl_package
|
|
|
|
def main():
|
|
# Determine output directory
|
|
workspace_root = os.environ.get('BUILD_WORKSPACE_DIRECTORY')
|
|
if workspace_root:
|
|
# Running with bazel run - write to source tree
|
|
output_dir = os.path.join(workspace_root, '{package_path}')
|
|
else:
|
|
# Running with bazel build - write to current directory (bazel-bin)
|
|
output_dir = '.'
|
|
|
|
print(f"Generating DataBuild DSL code to {{output_dir}}")
|
|
|
|
try:
|
|
generate_dsl_package('{module_path}', '{graph_attr}', output_dir, {deps})
|
|
except Exception as e:
|
|
print(f"ERROR: {{e}}", file=sys.stderr)
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
""".format(
|
|
module_path=module_path,
|
|
graph_attr=graph_attr,
|
|
package_path=package_path,
|
|
deps=deps,
|
|
)
|
|
|
|
def _databuild_dsl_generator_impl(ctx):
|
|
"""Implementation of the DSL generator rule."""
|
|
# Create custom generator script
|
|
custom_generator = ctx.actions.declare_file(ctx.label.name + "_generator.py")
|
|
|
|
# Get the module path from the graph file
|
|
graph_file_path = ctx.file.graph_file.short_path
|
|
if graph_file_path.endswith(".py"):
|
|
graph_file_path = graph_file_path[:-3]
|
|
module_path = graph_file_path.replace("/", ".")
|
|
|
|
# Get the package path for output
|
|
package_path = ctx.attr.output_package.strip("//").replace(":", "/")
|
|
|
|
# Generate script content with embedded parameters
|
|
# Convert deps to list of strings
|
|
dep_labels = [str(dep.label) for dep in ctx.attr.deps] if ctx.attr.deps else []
|
|
|
|
script_content = _generate_custom_generator_script(
|
|
module_path=module_path,
|
|
graph_attr=ctx.attr.graph_attr,
|
|
package_path=package_path,
|
|
deps=dep_labels
|
|
)
|
|
|
|
ctx.actions.write(
|
|
output=custom_generator,
|
|
content=script_content,
|
|
is_executable=True,
|
|
)
|
|
|
|
# Create runfiles with all dependencies
|
|
runfiles = ctx.runfiles(files=[custom_generator, ctx.file.graph_file])
|
|
|
|
# Merge runfiles from all user-specified dependencies
|
|
for dep in ctx.attr.deps:
|
|
if hasattr(dep, "default_runfiles"):
|
|
runfiles = runfiles.merge(dep.default_runfiles)
|
|
|
|
# Include generator_lib and its dependencies
|
|
if hasattr(ctx.attr._generator_lib, "default_runfiles"):
|
|
runfiles = runfiles.merge(ctx.attr._generator_lib.default_runfiles)
|
|
|
|
# Explicitly include py_proto dependencies since we removed the _py_proto attribute
|
|
# but generator_lib still needs it
|
|
for py_proto_dep in [ctx.attr._py_proto]:
|
|
if hasattr(py_proto_dep, "default_runfiles"):
|
|
runfiles = runfiles.merge(py_proto_dep.default_runfiles)
|
|
|
|
# Add Python runfiles for proper module resolution
|
|
if hasattr(ctx.attr._python_runfiles, "default_runfiles"):
|
|
runfiles = runfiles.merge(ctx.attr._python_runfiles.default_runfiles)
|
|
|
|
return [DefaultInfo(
|
|
executable=custom_generator,
|
|
runfiles=runfiles,
|
|
)]
|
|
|
|
_databuild_dsl_generator_rule = rule(
|
|
implementation = _databuild_dsl_generator_impl,
|
|
attrs = {
|
|
"graph_file": attr.label(
|
|
doc = "Python file containing the DSL graph definition",
|
|
allow_single_file = [".py"],
|
|
mandatory = True,
|
|
),
|
|
"graph_attr": attr.string(
|
|
doc = "Name of the graph attribute in the module",
|
|
default = "graph",
|
|
),
|
|
"output_package": attr.string(
|
|
doc = "Target package for generated files",
|
|
mandatory = True,
|
|
),
|
|
"deps": attr.label_list(
|
|
doc = "Dependencies needed to load the graph",
|
|
allow_empty = True,
|
|
),
|
|
"_python_runfiles": attr.label(
|
|
default = "@rules_python//python/runfiles",
|
|
allow_files = True,
|
|
),
|
|
"_generator_lib": attr.label(
|
|
default = "@databuild//databuild/dsl/python:generator_lib",
|
|
),
|
|
"_py_proto": attr.label(
|
|
default = "//databuild:py_proto",
|
|
),
|
|
},
|
|
executable = True,
|
|
)
|