Reorganize repo
This commit is contained in:
parent
f5eeeeb3dd
commit
f2567f7567
25 changed files with 113 additions and 125 deletions
20
BUILD.bazel
20
BUILD.bazel
|
|
@ -1,24 +1,6 @@
|
|||
|
||||
filegroup(
|
||||
name = "jq",
|
||||
srcs = ["//runtime:jq"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "proto",
|
||||
srcs = ["databuild.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# Expose Cargo.toml for crate_universe
|
||||
exports_files(
|
||||
["Cargo.toml"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# Create an empty Cargo.lock file that will be generated by Bazel
|
||||
exports_files(
|
||||
["Cargo.lock"],
|
||||
srcs = ["//databuild/runtime:jq"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -37,14 +37,14 @@ In DataBuild, `Job`s are the atomic unit of data processing, representing the ma
|
|||
|
||||
Jobs are assumed to be idempotent and independent, such that two jobs configured to produce separate partitions can run without interaction. These assumptions allow jobs to state only their immediate upstream and output data dependencies (the partitions they consume and produce), and in a graph leave no ambiguity about what must be done to produce a desired partition.
|
||||
|
||||
Jobs are implemented via the [`databuild_job`](./rules.bzl) bazel rule. An extremely basic job definition can be found in the [basic_job example](./examples/basic_job/).
|
||||
Jobs are implemented via the [`databuild_job`](databuild/rules.bzl) bazel rule. An extremely basic job definition can be found in the [basic_job example](./examples/basic_job/).
|
||||
|
||||
## Graphs
|
||||
A `Graph` is the composition of jobs and partitions via their data dependencies. Graphs answer "what partitions does a job require to produce its outputs?", and "what job must be run to produce a given partition?" Defining a graph relies on only the list of involved jobs, and a lookup executable that transforms desired partitions into the job(s) that produce.
|
||||
|
||||
Graphs expose two entrypoints: `graph.analyze`, which produces the literal `JobGraph` specifying the structure of the build graph to be execute to build a specific set of partitions (enabling visualization, planning, precondition checking, etc); and `graph.build`, which runs the build process for a set of requested partitions (relying on `graph.analyze` to plan). Other entrypoints are described in the [graph README](./graph/README.md).
|
||||
Graphs expose two entrypoints: `graph.analyze`, which produces the literal `JobGraph` specifying the structure of the build graph to be execute to build a specific set of partitions (enabling visualization, planning, precondition checking, etc); and `graph.build`, which runs the build process for a set of requested partitions (relying on `graph.analyze` to plan). Other entrypoints are described in the [graph README](databuild/graph/README.md).
|
||||
|
||||
Graphs are implemented via the [`databuild_graph`](./rules.bzl) bazel rule. A basic graph definition can be found in the [basic_graph example](./examples/basic_graph/).
|
||||
Graphs are implemented via the [`databuild_graph`](databuild/rules.bzl) bazel rule. A basic graph definition can be found in the [basic_graph example](./examples/basic_graph/).
|
||||
|
||||
### Implementing a Graph
|
||||
To make a fully described graph, engineers must define:
|
||||
|
|
|
|||
20
databuild/BUILD.bazel
Normal file
20
databuild/BUILD.bazel
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
load("@rules_rust//rust:defs.bzl", "rust_library")
|
||||
|
||||
filegroup(
|
||||
name = "proto",
|
||||
srcs = ["databuild.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
rust_library(
|
||||
name = "structs",
|
||||
srcs = [
|
||||
"structs.rs",
|
||||
],
|
||||
deps = [
|
||||
"@crates//:serde",
|
||||
],
|
||||
edition = "2021",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
|
@ -18,6 +18,7 @@ rust_binary(
|
|||
srcs = ["analyze.rs"],
|
||||
edition = "2021",
|
||||
deps = [
|
||||
"//databuild:structs",
|
||||
"@crates//:serde",
|
||||
"@crates//:serde_json",
|
||||
"@crates//:log",
|
||||
|
|
@ -3,67 +3,9 @@ use std::env;
|
|||
use std::process::{Command, exit};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{self, json};
|
||||
use log::{info, error};
|
||||
use simple_logger::SimpleLogger;
|
||||
use std::str::FromStr;
|
||||
|
||||
// Data structures that mirror the Go implementation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
enum DataDepType {
|
||||
Query,
|
||||
Materialize,
|
||||
}
|
||||
|
||||
impl FromStr for DataDepType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"query" => Ok(DataDepType::Query),
|
||||
"materialize" => Ok(DataDepType::Materialize),
|
||||
_ => Err(format!("Unknown DataDepType: {}", s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct DataDep {
|
||||
#[serde(rename = "depType")]
|
||||
dep_type: DataDepType,
|
||||
#[serde(rename = "ref")]
|
||||
reference: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct JobConfig {
|
||||
inputs: Vec<DataDep>,
|
||||
outputs: Vec<String>,
|
||||
args: Vec<String>,
|
||||
env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct Task {
|
||||
#[serde(rename = "jobLabel")]
|
||||
job_label: String,
|
||||
config: JobConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct JobGraph {
|
||||
outputs: Vec<String>,
|
||||
nodes: Vec<Task>,
|
||||
}
|
||||
|
||||
// Function to convert a job label to a configuration path
|
||||
fn job_label_to_cfg_path(job_label: &str) -> String {
|
||||
let without_prefix = job_label.replace("//", "");
|
||||
let with_slash = without_prefix.replace(":", "/");
|
||||
format!(".{}.cfg", with_slash)
|
||||
}
|
||||
use structs::*;
|
||||
|
||||
// Configure a job to produce the desired outputs
|
||||
fn configure(job_label: &str, output_refs: &[String]) -> Result<Vec<Task>, String> {
|
||||
|
|
@ -5,7 +5,7 @@ set -e
|
|||
|
||||
%{PREFIX}
|
||||
|
||||
EXECUTABLE_BINARY="$(rlocation "databuild+/graph/$(basename "%{EXECUTABLE_PATH}")_")/execute"
|
||||
EXECUTABLE_BINARY="$(rlocation "databuild+/databuild/graph/$(basename "%{EXECUTABLE_PATH}")_")/execute"
|
||||
|
||||
# Run the execution
|
||||
exec "${EXECUTABLE_BINARY}" "$@"
|
||||
|
|
@ -7,7 +7,7 @@ set -e
|
|||
|
||||
# Locate the Rust binary using its standard runfiles path
|
||||
# Assumes workspace name is 'databuild'
|
||||
EXECUTABLE_BINARY="$(rlocation "databuild/graph/analyze")"
|
||||
EXECUTABLE_BINARY="$(rlocation "databuild/databuild/graph/analyze")"
|
||||
|
||||
# Run the analysis
|
||||
exec "${EXECUTABLE_BINARY}" "$@"
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
sh_test(
|
||||
name = "analyze_test",
|
||||
srcs = ["analyze_test.sh"],
|
||||
data = ["//graph:analyze"],
|
||||
data = ["//databuild/graph:analyze"],
|
||||
)
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
DATABUILD_MODE=import_test DATABUILD_JOB_LOOKUP_PATH=foo DATABUILD_CANDIDATE_JOBS=bar graph/analyze
|
||||
DATABUILD_MODE=import_test DATABUILD_JOB_LOOKUP_PATH=foo DATABUILD_CANDIDATE_JOBS=bar databuild/graph/analyze
|
||||
|
|
@ -4,7 +4,7 @@ set -e
|
|||
%{RUNFILES_PREFIX}
|
||||
|
||||
EXECUTE_BINARY="$(rlocation "_main/$(basename "%{EXECUTE_PATH}")")"
|
||||
JQ="$(rlocation "databuild+/runtime/$(basename "%{JQ_PATH}")")"
|
||||
JQ="$(rlocation "databuild+/databuild/runtime/$(basename "%{JQ_PATH}")")"
|
||||
|
||||
# First argument should be the path to a config file
|
||||
CONFIG_FILE=${1:-}
|
||||
|
|
@ -104,7 +104,7 @@ _databuild_job_cfg_rule = rule(
|
|||
mandatory = True,
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//runtime:simple_executable_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_bash_runfiles": attr.label(
|
||||
|
|
@ -169,11 +169,11 @@ _databuild_job_exec_rule = rule(
|
|||
cfg = "target",
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//job:execute_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/job:execute_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_jq": attr.label(
|
||||
default = "@databuild//runtime:jq",
|
||||
default = "@databuild//databuild/runtime:jq",
|
||||
executable = True,
|
||||
cfg = "target",
|
||||
),
|
||||
|
|
@ -323,7 +323,7 @@ _databuild_graph_lookup = rule(
|
|||
cfg = "target",
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//runtime:simple_executable_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/runtime:simple_executable_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_bash_runfiles": attr.label(
|
||||
|
|
@ -410,7 +410,7 @@ _databuild_graph_analyze = rule(
|
|||
allow_empty = False,
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//graph:rust_analyze_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_bash_runfiles": attr.label(
|
||||
|
|
@ -418,7 +418,7 @@ _databuild_graph_analyze = rule(
|
|||
allow_files = True,
|
||||
),
|
||||
"_analyze": attr.label(
|
||||
default = "@databuild//graph:analyze",
|
||||
default = "@databuild//databuild/graph:analyze",
|
||||
executable = True,
|
||||
cfg = "target",
|
||||
),
|
||||
|
|
@ -501,7 +501,7 @@ _databuild_graph_mermaid = rule(
|
|||
allow_empty = False,
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//graph:go_analyze_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/graph:rust_analyze_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_bash_runfiles": attr.label(
|
||||
|
|
@ -509,7 +509,7 @@ _databuild_graph_mermaid = rule(
|
|||
allow_files = True,
|
||||
),
|
||||
"_analyze": attr.label(
|
||||
default = "@databuild//graph:analyze",
|
||||
default = "@databuild//databuild/graph:analyze",
|
||||
executable = True,
|
||||
cfg = "target",
|
||||
),
|
||||
|
|
@ -564,7 +564,7 @@ _databuild_graph_exec = rule(
|
|||
allow_empty = False,
|
||||
),
|
||||
"_template": attr.label(
|
||||
default = "@databuild//graph:go_exec_wrapper.sh.tpl",
|
||||
default = "@databuild//databuild/graph:go_exec_wrapper.sh.tpl",
|
||||
allow_single_file = True,
|
||||
),
|
||||
"_bash_runfiles": attr.label(
|
||||
|
|
@ -572,7 +572,7 @@ _databuild_graph_exec = rule(
|
|||
allow_files = True,
|
||||
),
|
||||
"_execute": attr.label(
|
||||
default = "@databuild//graph:execute",
|
||||
default = "@databuild//databuild/graph:execute",
|
||||
executable = True,
|
||||
cfg = "target",
|
||||
)
|
||||
52
databuild/structs.rs
Normal file
52
databuild/structs.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
use std::collections::HashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::str::FromStr;
|
||||
|
||||
// Data structures that mirror the Go implementation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum DataDepType {
|
||||
Query,
|
||||
Materialize,
|
||||
}
|
||||
|
||||
impl FromStr for DataDepType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"query" => Ok(DataDepType::Query),
|
||||
"materialize" => Ok(DataDepType::Materialize),
|
||||
_ => Err(format!("Unknown DataDepType: {}", s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DataDep {
|
||||
#[serde(rename = "depType")]
|
||||
pub dep_type: DataDepType,
|
||||
#[serde(rename = "ref")]
|
||||
pub reference: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JobConfig {
|
||||
pub inputs: Vec<DataDep>,
|
||||
pub outputs: Vec<String>,
|
||||
pub args: Vec<String>,
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Task {
|
||||
#[serde(rename = "jobLabel")]
|
||||
pub job_label: String,
|
||||
pub config: JobConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct JobGraph {
|
||||
pub outputs: Vec<String>,
|
||||
pub nodes: Vec<Task>,
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
load("@databuild//:rules.bzl", "databuild_graph", "databuild_job")
|
||||
load("@databuild//databuild:rules.bzl", "databuild_graph", "databuild_job")
|
||||
load("@rules_java//java:defs.bzl", "java_binary")
|
||||
|
||||
platform(
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@
|
|||
set -e
|
||||
|
||||
# Test the .exec rule
|
||||
echo exec
|
||||
basic_graph.exec < <(basic_graph.analyze /tmp/databuild_test/examples/basic_graph/generated_number/pippin_salem_sadie)
|
||||
|
||||
# Test the .build rule
|
||||
basic_graph.build /tmp/databuild_test/examples/basic_graph/generated_number/pippin_salem_sadie
|
||||
echo build
|
||||
basic_graph.build /tmp/databuild_test/examples/basic_graph/generated_number/pippin_salem_sadie
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
load("@databuild//:rules.bzl", "databuild_job")
|
||||
load("@databuild//databuild:rules.bzl", "databuild_job")
|
||||
|
||||
databuild_job(
|
||||
name = "test_job",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
load("//:py_repl.bzl", "py_repl")
|
||||
load("@databuild//:rules.bzl", "databuild_job")
|
||||
load("@databuild//databuild:rules.bzl", "databuild_job")
|
||||
load("@rules_python//python:pip.bzl", "compile_pip_requirements")
|
||||
load("@pypi//:requirements.bzl", "requirement")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
%{RUNFILES_PREFIX}
|
||||
|
||||
%{PREFIX}
|
||||
|
||||
EXECUTABLE_BINARY="$(rlocation "databuild+/graph/$(basename "%{EXECUTABLE_PATH}")_")/analyze"
|
||||
|
||||
# Run the configuration
|
||||
exec "${EXECUTABLE_BINARY}" "$@"
|
||||
Loading…
Reference in a new issue