databuild/databuild/rules.bzl
2025-07-31 19:59:13 -07:00

904 lines
30 KiB
Python

load("@aspect_bazel_lib//lib:tar.bzl", "tar")
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load")
RUNFILES_PREFIX = """
# ================= BEGIN RUNFILES INIT =================
SCRIPT_PATH="$(realpath "$0")"
# TODO should this be extracted to shared init script
# Get the directory where the script is located
if [[ -z "${RUNFILES_DIR:-}" ]]; then
SCRIPT_DIR="$(readlink -f "${BASH_SOURCE[0]}")"
# Set RUNFILES_DIR relative to the script location
export RUNFILES_DIR="${SCRIPT_DIR}.runfiles"
fi
# --- begin runfiles.bash initialization v3 ---
# Copy-pasted from the Bazel Bash runfiles library v3.
set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source $f || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v3 ---
# ================== END RUNFILES INIT ==================
"""
def databuild_job(
name,
binary,
visibility = None):
"""Creates a DataBuild job target with configuration and execution capabilities.
Args:
name: Name of the job target
binary: Single binary target that handles both config and exec via subcommands
visibility: Visibility specification
"""
# Single binary approach - use subcommands
_databuild_job_cfg_rule(
name = name + ".cfg",
configure = binary,
visibility = visibility,
)
_databuild_job_exec_rule(
name = name + ".exec",
execute = binary,
visibility = visibility,
)
# Create a job target that configures then executes
_databuild_job_rule(
name = name,
configure = ":%s.cfg" % name,
execute = ":%s.exec" % name,
visibility = visibility,
)
def _databuild_job_cfg_impl(ctx):
configure_file = ctx.executable.configure
configure_path = ctx.attr.configure.files_to_run.executable.path
script = ctx.actions.declare_file(ctx.label.name)
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{EXECUTABLE_PATH}": configure_path,
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.configure.files_to_run.executable.short_path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": "EXECUTABLE_SUBCOMMAND=\"config\"\n",
},
is_executable = True,
)
runfiles = ctx.runfiles(
files = [configure_file],
).merge(ctx.attr.configure.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_job_cfg_rule = rule(
implementation = _databuild_job_cfg_impl,
attrs = {
"configure": attr.label(
doc = "Target that implements the configuration logic",
executable = True,
cfg = "target",
mandatory = True,
),
"_template": attr.label(
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
},
executable = True,
)
def _databuild_job_exec_impl(ctx):
execute_file = ctx.executable.execute
wrapper_file = ctx.executable._job_wrapper
script = ctx.actions.declare_file(ctx.label.name)
# Get the correct runfiles paths
wrapper_path = ctx.attr._job_wrapper.files_to_run.executable.path
execute_path = ctx.attr.execute.files_to_run.executable.path
# Create a simple script that calls the job wrapper with the original binary
script_content = RUNFILES_PREFIX + """
export DATABUILD_JOB_BINARY="$(rlocation _main/{execute_path})"
exec "$(rlocation databuild+/databuild/job/job_wrapper)" exec "$@"
""".format(
execute_path = ctx.attr.execute.files_to_run.executable.short_path,
)
ctx.actions.write(
output = script,
content = script_content,
is_executable = True,
)
runfiles = ctx.runfiles(
files = [wrapper_file, execute_file],
).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr._job_wrapper.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
# Define the provider
DataBuildJobInfo = provider(
doc = "Information about a DataBuild job",
fields = {
"configure": "Target that implements the configuration logic",
"execute": "Target that implements the execution logic",
"deps": "List of dependencies (other DataBuildJobInfo providers)",
},
)
_databuild_job_exec_rule = rule(
implementation = _databuild_job_exec_impl,
attrs = {
"execute": attr.label(
doc = "Target that implements the execution logic",
mandatory = True,
executable = True,
cfg = "target",
),
"_job_wrapper": attr.label(
default = "@databuild//databuild/job:job_wrapper",
executable = True,
cfg = "target",
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
},
executable = True,
)
def _databuild_job_impl(ctx):
"""Wraps the configure and execute targets in a shell script."""
script = ctx.actions.declare_file(ctx.label.name)
ctx.actions.write(
output = script,
is_executable = True,
content = RUNFILES_PREFIX + """
$(rlocation _main/{configure_path}) $@ | $(rlocation _main/{execute_path})
""".format(
configure_path = ctx.attr.configure.files_to_run.executable.short_path,
execute_path = ctx.attr.execute.files_to_run.executable.short_path,
),
)
runfiles = ctx.runfiles(
files = [ctx.executable.execute, ctx.executable.configure],
).merge(ctx.attr.execute.default_runfiles).merge(ctx.attr.configure.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
DataBuildJobInfo(
configure = ctx.attr.configure,
execute = script,
),
]
_databuild_job_rule = rule(
implementation = _databuild_job_impl,
attrs = {
"configure": attr.label(
doc = "Target that implements the configuration logic",
mandatory = True,
executable = True,
# TODO all these cdf=execs are probably a problem for deployment
cfg = "target",
),
"execute": attr.label(
doc = "Target that implements the execution logic",
mandatory = True,
executable = True,
cfg = "target",
),
},
executable = True,
)
def databuild_graph(name, jobs, lookup, visibility = None):
"""Creates a databuild graph target.
Args:
name: Name of the graph target
jobs: List of job targets
lookup: Job lookup binary
visibility: Visibility specification
Note: Build event logging is configured via the DATABUILD_BUILD_EVENT_LOG environment variable.
"""
_databuild_graph_lookup(
name = "%s.lookup" % name,
lookup = lookup,
visibility = visibility,
)
_databuild_graph_analyze(
name = "%s.analyze" % name,
lookup = "%s.lookup" % name,
jobs = jobs,
visibility = visibility,
)
_databuild_graph_mermaid(
name = "%s.mermaid" % name,
lookup = "%s.lookup" % name,
jobs = jobs,
visibility = visibility,
)
_databuild_graph_exec(
name = "%s.exec" % name,
jobs = jobs,
visibility = visibility,
)
_databuild_graph_build(
name = "%s.build" % name,
cli_wrapper = "@databuild//databuild/cli:databuild_cli",
jobs = jobs,
lookup = "%s.lookup" % name,
graph_label = "//%s:%s" % (native.package_name(), name),
visibility = visibility,
)
# Build deployment targets (renamed for hierarchical namespacing)
tar(
name = "%s.build.tar" % name,
srcs = [":%s.build" % name],
visibility = visibility,
)
oci_image(
name = "%s.build.image" % name,
base = "@debian",
cmd = ["/%s.build" % name],
tars = [":%s.build.tar" % name],
visibility = visibility,
)
oci_load(
name = "%s.build.image_load" % name,
image = ":%s.build.image" % name,
visibility = visibility,
repo_tags = ["databuild_%s_build:latest" % name],
)
# Service targets
_databuild_graph_service(
name = "%s.service" % name,
lookup = "%s.lookup" % name,
analyze = "%s.analyze" % name,
exec = "%s.exec" % name,
jobs = jobs,
graph_label = "//%s:%s" % (native.package_name(), name),
visibility = visibility,
)
tar(
name = "%s.service.tar" % name,
srcs = [":%s.service" % name],
visibility = visibility,
)
oci_image(
name = "%s.service.image" % name,
base = "@debian",
cmd = ["/%s.service" % name],
tars = [":%s.service.tar" % name],
visibility = visibility,
)
oci_load(
name = "%s.service.image_load" % name,
image = ":%s.service.image" % name,
visibility = visibility,
repo_tags = ["databuild_%s_service:latest" % name],
)
# TODO there feels like a lot of boilerplate around wrapping a target with a script - can this be simplified?
def _databuild_graph_lookup_impl(ctx):
script = ctx.actions.declare_file(ctx.label.name)
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": "",
"%{EXECUTABLE_PATH}": ctx.attr.lookup.files_to_run.executable.path,
"%{EXECUTABLE_SHORT_PATH}": ctx.attr.lookup.files_to_run.executable.short_path,
},
is_executable = True,
)
runfiles = ctx.runfiles(
files = [ctx.executable.lookup],
).merge(ctx.attr.lookup.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_graph_lookup = rule(
implementation = _databuild_graph_lookup_impl,
attrs = {
"lookup": attr.label(
doc = "Target that implements job lookup for desired partition refs",
mandatory = True,
executable = True,
cfg = "target",
),
"_template": attr.label(
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
},
executable = True,
)
def _databuild_graph_analyze_impl(ctx):
script = ctx.actions.declare_file(ctx.label.name)
config_paths = {
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
for job in ctx.attr.jobs
}
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
candidate_job_env_var = "'" + ",".join([
"//" + target.label.package + ":" + target.label.name
for target in ctx.attr.jobs
]) + "'"
env_setup = """
export DATABUILD_CANDIDATE_JOBS="{candidate_job_env_var}"
export DATABUILD_MODE=plan
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
""".format(
candidate_job_env_var = config_paths_str,
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
)
script_prefix = env_setup
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
"%{EXECUTABLE_SHORT_PATH}": ctx.attr._analyze.files_to_run.executable.short_path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": script_prefix,
},
is_executable = True,
)
# Gather the configure executables
configure_executables = [
job[DataBuildJobInfo].configure.files_to_run.executable
for job in ctx.attr.jobs
]
runfiles = ctx.runfiles(
files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables,
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
# Merge runfiles from all configure targets
for job in ctx.attr.jobs:
configure_target = job[DataBuildJobInfo].configure
runfiles = runfiles.merge(configure_target.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_graph_analyze = rule(
implementation = _databuild_graph_analyze_impl,
attrs = {
"lookup": attr.label(
doc = "Target that implements job lookup for desired partition refs",
mandatory = True,
executable = True,
cfg = "target",
),
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"_template": attr.label(
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
"_analyze": attr.label(
default = "@databuild//databuild/graph:analyze",
executable = True,
cfg = "target",
),
},
executable = True,
)
def _databuild_graph_mermaid_impl(ctx):
script = ctx.actions.declare_file(ctx.label.name)
config_paths = {
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
for job in ctx.attr.jobs
}
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
candidate_job_env_var = "'" + ",".join([
"//" + target.label.package + ":" + target.label.name
for target in ctx.attr.jobs
]) + "'"
env_setup = """
export DATABUILD_CANDIDATE_JOBS="{candidate_job_env_var}"
export DATABUILD_MODE=mermaid
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
""".format(
candidate_job_env_var = config_paths_str,
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
)
script_prefix = env_setup
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{EXECUTABLE_PATH}": ctx.attr._analyze.files_to_run.executable.path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": script_prefix,
},
is_executable = True,
)
# Gather the configure executables
configure_executables = [
job[DataBuildJobInfo].configure.files_to_run.executable
for job in ctx.attr.jobs
]
runfiles = ctx.runfiles(
files = [ctx.executable.lookup, ctx.executable._analyze] + configure_executables,
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._analyze.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
# Merge runfiles from all configure targets
for job in ctx.attr.jobs:
configure_target = job[DataBuildJobInfo].configure
runfiles = runfiles.merge(configure_target.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_graph_mermaid = rule(
implementation = _databuild_graph_mermaid_impl,
attrs = {
"lookup": attr.label(
doc = "Target that implements job lookup for desired partition refs",
mandatory = True,
executable = True,
cfg = "target",
),
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"_template": attr.label(
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
"_analyze": attr.label(
default = "@databuild//databuild/graph:analyze",
executable = True,
cfg = "target",
),
},
executable = True,
)
def _databuild_graph_exec_impl(ctx):
script = ctx.actions.declare_file(ctx.label.name)
# Gather the execute executables
execute_executables = [
job[DataBuildJobInfo].execute
for job in ctx.attr.jobs
]
prefix_setup = """
"""
ctx.actions.expand_template(
template = ctx.file._template,
output = script,
substitutions = {
"%{EXECUTABLE_PATH}": ctx.attr._execute.files_to_run.executable.path,
"%{RUNFILES_PREFIX}": RUNFILES_PREFIX,
"%{PREFIX}": prefix_setup,
},
is_executable = True,
)
runfiles = ctx.runfiles(
files = [ctx.executable._execute] + execute_executables,
).merge(ctx.attr._execute.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
# Merge runfiles from all execute targets
for job in ctx.attr.jobs:
execute_target = job[DataBuildJobInfo].execute
if hasattr(execute_target, "default_runfiles"):
runfiles = runfiles.merge(execute_target.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_graph_exec = rule(
implementation = _databuild_graph_exec_impl,
attrs = {
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"_template": attr.label(
default = "@databuild//databuild/graph:rust_execute_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
"_execute": attr.label(
default = "@databuild//databuild/graph:execute",
executable = True,
cfg = "target",
),
},
executable = True,
)
DataBuildGraphInfo = provider(
doc = "Information about a DataBuild graph",
fields = {
"analyze": "Target that implements the graph analysis logic",
"exec": "Target that implements the graph execution logic",
"jobs": "List of jobs that are candidates for building partitions in this databuild graph",
},
)
def _databuild_graph_build_impl(ctx):
"""Wraps the DataBuild CLI wrapper in a shell script."""
script = ctx.actions.declare_file(ctx.label.name)
# Build DATABUILD_CANDIDATE_JOBS JSON string with runtime rlocation resolution
candidate_jobs_script_lines = ["CANDIDATE_JOBS_JSON=\"{\""]
for i, job in enumerate(ctx.attr.jobs):
job_label = "//" + job.label.package + ":" + job.label.name
configure_path = job[DataBuildJobInfo].configure.files_to_run.executable.short_path
separator = "," if i < len(ctx.attr.jobs) - 1 else ""
candidate_jobs_script_lines.append(
'CANDIDATE_JOBS_JSON="${CANDIDATE_JOBS_JSON}\\"%s\\":\\"$(rlocation _main/%s)\\"%s"' % (
job_label,
configure_path,
separator,
),
)
candidate_jobs_script_lines.append('CANDIDATE_JOBS_JSON="${CANDIDATE_JOBS_JSON}}"')
candidate_jobs_script = "\n".join(candidate_jobs_script_lines)
script_content = RUNFILES_PREFIX + """
# Build DATABUILD_CANDIDATE_JOBS dynamically with proper rlocation resolution
%s
export DATABUILD_CANDIDATE_JOBS="$CANDIDATE_JOBS_JSON"
export DATABUILD_JOB_LOOKUP_PATH="$(rlocation _main/%s)"
export DATABUILD_GRAPH_LABEL="%s"
# Generate a single build request ID for the entire CLI operation
export DATABUILD_BUILD_REQUEST_ID=$(python3 -c "import uuid; print(uuid.uuid4())")
# Run unified DataBuild CLI wrapper
"$(rlocation databuild+/databuild/cli/databuild_cli)" "$@"
""" % (
candidate_jobs_script,
ctx.attr.lookup.files_to_run.executable.short_path,
ctx.attr.graph_label,
)
ctx.actions.write(
output = script,
is_executable = True,
content = script_content,
)
# Gather the configure and execute executables
configure_executables = [
job[DataBuildJobInfo].configure.files_to_run.executable
for job in ctx.attr.jobs
]
# Get the execute targets - these are the .exec files that need to be in runfiles
execute_executables = []
for job in ctx.attr.jobs:
# The job target itself contains references to both configure and execute
# We need to find the .exec target for each job
job_name = job.label.name
exec_target_name = job_name + ".exec"
# Find the .exec target in the same package
for attr_name in dir(job):
if attr_name.endswith("_exec") or exec_target_name in attr_name:
exec_target = getattr(job, attr_name, None)
if exec_target and hasattr(exec_target, "files_to_run"):
execute_executables.append(exec_target.files_to_run.executable)
break
# Also check if we can access exec targets directly from job dependencies
all_job_files = []
for job in ctx.attr.jobs:
if hasattr(job, "default_runfiles") and job.default_runfiles:
all_job_files.extend(job.default_runfiles.files.to_list())
runfiles = ctx.runfiles(
files = [ctx.executable.cli_wrapper, ctx.executable.lookup] + configure_executables + execute_executables + all_job_files,
).merge(ctx.attr.cli_wrapper.default_runfiles).merge(ctx.attr.lookup.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
)
# Merge runfiles from all configure targets and job targets
for job in ctx.attr.jobs:
configure_target = job[DataBuildJobInfo].configure
runfiles = runfiles.merge(configure_target.default_runfiles)
# Also merge the job's own runfiles which should include the .exec target
runfiles = runfiles.merge(job.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
DataBuildGraphInfo(
analyze = ctx.attr.cli_wrapper,
exec = ctx.attr.cli_wrapper,
jobs = ctx.attr.jobs,
),
]
_databuild_graph_build = rule(
implementation = _databuild_graph_build_impl,
attrs = {
"cli_wrapper": attr.label(
doc = "Target that implements the unified DataBuild CLI",
mandatory = True,
executable = True,
cfg = "target",
),
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"lookup": attr.label(
doc = "Target that implements job lookup for desired partition refs",
mandatory = True,
executable = True,
cfg = "target",
),
"graph_label": attr.string(
doc = "The label of this graph for identification",
mandatory = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
},
executable = True,
)
def _databuild_graph_service_impl(ctx):
"""Implementation of the service target that runs the Build Graph Service."""
script = ctx.actions.declare_file(ctx.label.name)
# Build job configurations mapping for DATABUILD_CANDIDATE_JOBS
config_paths = {
"//" + job.label.package + ":" + job.label.name: "$(rlocation _main/" + job[DataBuildJobInfo].configure.files_to_run.executable.short_path + ")"
for job in ctx.attr.jobs
}
config_paths_str = "{" + ",".join(['\\"%s\\":\\"%s\\"' % (k, v) for k, v in config_paths.items()]) + "}"
# Default service configuration
default_port = "8080"
default_db = "sqlite:///tmp/%s.db" % ctx.label.name.replace(".", "_")
env_setup = """
export DATABUILD_CANDIDATE_JOBS="{candidate_jobs}"
export DATABUILD_JOB_LOOKUP_PATH=$(rlocation _main/{lookup_path})
export DATABUILD_ANALYZE_BINARY=$(rlocation _main/{analyze_path})
export DATABUILD_EXECUTE_BINARY=$(rlocation _main/{exec_path})
""".format(
candidate_jobs = config_paths_str,
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
analyze_path = ctx.attr.analyze.files_to_run.executable.short_path,
exec_path = ctx.attr.exec.files_to_run.executable.short_path,
)
# Generate a custom script instead of using the template to handle the external binary correctly
script_content = RUNFILES_PREFIX + env_setup + """
EXECUTABLE_BINARY="$(rlocation "databuild+/databuild/build_graph_service")"
# Always pass graph-specific configuration, allow user args to override defaults like port/host
# Graph-specific args that should always be set:
GRAPH_ARGS=(
"--graph-label" "{graph_label}"
"--job-lookup-path" "$(rlocation _main/{lookup_path})"
"--event-log" "{db}"
)
# Add default port if no port specified in user args
if [[ ! "$*" =~ --port ]]; then
GRAPH_ARGS+=("--port" "{port}")
fi
# Add default host if no host specified in user args
if [[ ! "$*" =~ --host ]]; then
GRAPH_ARGS+=("--host" "0.0.0.0")
fi
# Run the service with graph-specific args + user args
if [[ -n "${{EXECUTABLE_SUBCOMMAND:-}}" ]]; then
exec "${{EXECUTABLE_BINARY}}" "${{EXECUTABLE_SUBCOMMAND}}" "${{GRAPH_ARGS[@]}}" "$@"
else
exec "${{EXECUTABLE_BINARY}}" "${{GRAPH_ARGS[@]}}" "$@"
fi
""".format(
graph_label = ctx.attr.graph_label,
lookup_path = ctx.attr.lookup.files_to_run.executable.short_path,
db = default_db,
port = default_port,
)
ctx.actions.write(
output = script,
content = script_content,
is_executable = True,
)
# Gather all dependencies for runfiles
configure_executables = [
job[DataBuildJobInfo].configure.files_to_run.executable
for job in ctx.attr.jobs
]
runfiles = ctx.runfiles(
files = [ctx.executable.lookup, ctx.executable._service, ctx.executable.analyze, ctx.executable.exec] + configure_executables + ctx.files._dashboard,
).merge(ctx.attr.lookup.default_runfiles).merge(ctx.attr._service.default_runfiles).merge(
ctx.attr.analyze.default_runfiles,
).merge(ctx.attr.exec.default_runfiles).merge(
ctx.attr._bash_runfiles.default_runfiles,
).merge_all([job.default_runfiles for job in ctx.attr.jobs])
# Merge runfiles from all configure targets
for job in ctx.attr.jobs:
configure_target = job[DataBuildJobInfo].configure
runfiles = runfiles.merge(configure_target.default_runfiles)
return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]
_databuild_graph_service = rule(
implementation = _databuild_graph_service_impl,
attrs = {
"lookup": attr.label(
doc = "Target that implements job lookup for desired partition refs",
mandatory = True,
executable = True,
cfg = "target",
),
"jobs": attr.label_list(
doc = "The list of jobs that are candidates for building partitions in this databuild graph",
allow_empty = False,
),
"analyze": attr.label(
doc = "Target that implements the graph analysis logic",
mandatory = True,
executable = True,
cfg = "target",
),
"exec": attr.label(
doc = "Target that implements the graph execution logic",
mandatory = True,
executable = True,
cfg = "target",
),
"graph_label": attr.string(
doc = "The label of this graph for service identification",
mandatory = True,
),
"_template": attr.label(
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
allow_single_file = True,
),
"_bash_runfiles": attr.label(
default = Label("@bazel_tools//tools/bash/runfiles"),
allow_files = True,
),
"_service": attr.label(
default = "@databuild//databuild:build_graph_service",
executable = True,
cfg = "target",
),
"_dashboard": attr.label(
default = "@databuild//databuild/dashboard:dist",
allow_files = True,
),
},
executable = True,
)