Implement prost struct generation

This commit is contained in:
Stuart Axelbrooke 2025-07-03 22:36:12 -07:00
parent 9c5e57ac81
commit 6fc49006ad
6 changed files with 847 additions and 140 deletions

View file

@ -9,16 +9,8 @@ proto_library(
visibility = ["//visibility:public"],
)
# Step 1: Use protoc to generate file descriptor
genrule(
name = "simple_descriptor",
srcs = ["simple.proto"],
outs = ["simple.desc"],
tools = ["@com_google_protobuf//:protoc"],
cmd = "$(location @com_google_protobuf//:protoc) --descriptor_set_out=$@ --include_source_info --include_imports $(SRCS)",
)
# Step 2: Use a Rust binary to generate prost code from the descriptor
# Prost generator binary for converting proto files to Rust code
rust_binary(
name = "prost_generator",
srcs = ["prost_generator.rs"],
@ -26,31 +18,23 @@ rust_binary(
"@crates//:prost",
"@crates//:prost-build",
"@crates//:tempfile",
"@crates//:serde",
],
edition = "2021",
)
# Step 3: Generate Rust code using our prost generator
# Generate Rust code for simple proto using prost generator
genrule(
name = "generate_simple_rust",
srcs = [
"simple.proto",
":simple_descriptor",
],
srcs = ["simple.proto"],
outs = ["simple.rs"],
tools = [":prost_generator"],
cmd = "$(location :prost_generator) $(location simple.proto) $(location :simple_descriptor) $@",
tools = [
":prost_generator",
"@com_google_protobuf//:protoc",
],
cmd = "PROTOC=$(location @com_google_protobuf//:protoc) $(location :prost_generator) $(location simple.proto) /dev/null $@",
)
# Test library using simple generated code
rust_library(
name = "simple_lib",
srcs = [":generate_simple_rust"],
deps = [
"@crates//:prost",
],
edition = "2021",
)
# Test the simple generation
rust_test(
@ -61,6 +45,51 @@ rust_test(
],
deps = [
"@crates//:prost",
"@crates//:serde",
"@crates//:serde_json",
],
edition = "2021",
)
# Generate Rust code for databuild proto
genrule(
name = "generate_databuild_rust",
srcs = [
"databuild.proto",
],
outs = ["databuild.rs"],
tools = [
":prost_generator",
"@com_google_protobuf//:protoc",
],
cmd = "PROTOC=$(location @com_google_protobuf//:protoc) $(location :prost_generator) $(location databuild.proto) /dev/null $@",
)
# DataBuild library using generated prost code
rust_library(
name = "databuild",
srcs = [":generate_databuild_rust"],
deps = [
"@crates//:prost",
"@crates//:prost-types",
"@crates//:serde",
],
edition = "2021",
visibility = ["//visibility:public"],
)
# Test the databuild generation
rust_test(
name = "databuild_test",
srcs = [
"databuild_test.rs",
":generate_databuild_rust",
],
deps = [
"@crates//:prost",
"@crates//:serde",
"@crates//:serde_json",
],
edition = "2021",
)
@ -72,15 +101,3 @@ filegroup(
visibility = ["//visibility:public"],
)
# Legacy structs library (deprecated - use :databuild instead)
rust_library(
name = "structs",
srcs = [
"structs.rs",
],
deps = [
"@crates//:serde",
],
edition = "2021",
visibility = ["//visibility:public"],
)

View file

@ -1,8 +1,5 @@
syntax = "proto3";
import "google/protobuf/timestamp.proto";
import "google/protobuf/struct.proto";
package databuild.v1;
message PartitionRef {
@ -68,11 +65,11 @@ message PartitionManifest {
// Input partition manifests
repeated PartitionManifest inputs = 2;
// Start time of job execution (Unix timestamp)
google.protobuf.Timestamp start_time = 3;
// Start time of job execution (Unix timestamp seconds)
int64 start_time = 3;
// End time of job execution (Unix timestamp)
google.protobuf.Timestamp end_time = 4;
// End time of job execution (Unix timestamp seconds)
int64 end_time = 4;
// The configuration used to run the job
Task task = 5;
@ -193,7 +190,7 @@ enum PartitionStatus {
message PartitionEvent {
// Event identity
string partition_event_id = 1;
google.protobuf.Timestamp timestamp = 2;
int64 timestamp = 2;
// Partition identification
PartitionRef partition_ref = 3;
@ -233,8 +230,8 @@ message PartitionEventQuery {
}
message TimeRange {
google.protobuf.Timestamp start = 1;
google.protobuf.Timestamp end = 2;
int64 start = 1;
int64 end = 2;
}
message OrderBy {
@ -246,7 +243,7 @@ message OrderBy {
message EventStreamFilter {
repeated PartitionRef partition_refs = 1;
repeated PartitionStatus statuses = 2;
google.protobuf.Timestamp since = 3;
int64 since = 3;
}
// Coordination support
@ -255,7 +252,7 @@ message ActiveBuild {
string job_graph_run_id = 2;
PartitionStatus status = 3;
repeated string requesting_clients = 4;
google.protobuf.Timestamp started_at = 5;
int64 started_at = 5;
}
message DependencyGraph {
@ -267,7 +264,7 @@ message DependencyGraph {
message DependencyNode {
PartitionRef partition_ref = 1;
PartitionStatus status = 2;
google.protobuf.Timestamp last_updated = 3;
int64 last_updated = 3;
}
message DependencyEdge {
@ -341,12 +338,12 @@ message PartitionLocation {
message DelegationToken {
string job_graph_run_id = 1;
google.protobuf.Timestamp estimated_completion = 2;
int64 estimated_completion = 2;
}
message BuildToken {
string job_graph_run_id = 1;
google.protobuf.Timestamp started_at = 2;
int64 started_at = 2;
}
message GetActiveBuildStatusRequest {
@ -462,7 +459,7 @@ message GraphEvent {
// The sequence of events that completely describes progress of the job graph build
message JobGraphRunEvent {
string job_graph_run_event_id = 1;
google.protobuf.Timestamp timestamp = 2;
int64 timestamp = 2;
// Sum type for potential events
JobEvent job_event = 10;

View file

@ -0,0 +1,79 @@
// Include the generated protobuf code
include!("databuild.rs");
#[cfg(test)]
mod tests {
use super::*;
use prost::Message;
#[test]
fn test_partition_ref_creation() {
let partition_ref = PartitionRef { str: "test-partition".to_string() };
assert_eq!(partition_ref.str, "test-partition");
}
#[test]
fn test_job_config_creation() {
let partition = PartitionRef { str: "output-partition".to_string() };
let mut job_config = JobConfig::default();
job_config.outputs.push(partition);
job_config.args.push("arg1".to_string());
assert_eq!(job_config.outputs.len(), 1);
assert_eq!(job_config.args.len(), 1);
assert_eq!(job_config.outputs[0].str, "output-partition");
assert_eq!(job_config.args[0], "arg1");
}
#[test]
fn test_prost_serialization() {
// Test that we can properly serialize and deserialize with prost
let partition_ref = PartitionRef { str: "test-partition".to_string() };
// Encode to bytes using prost
let mut buf = Vec::new();
partition_ref.encode(&mut buf).expect("Failed to encode");
// Decode from bytes using prost
let decoded_partition = PartitionRef::decode(&buf[..]).expect("Failed to decode");
assert_eq!(partition_ref.str, decoded_partition.str);
}
#[test]
fn test_serde_serialization() {
// Test that we can serialize to JSON using serde
let partition_ref = PartitionRef { str: "test-partition".to_string() };
// Serialize to JSON
let json = serde_json::to_string(&partition_ref).expect("Failed to serialize to JSON");
// Deserialize from JSON
let decoded_partition: PartitionRef = serde_json::from_str(&json).expect("Failed to deserialize from JSON");
assert_eq!(partition_ref.str, decoded_partition.str);
}
#[test]
fn test_job_graph_creation() {
let _job_label = JobLabel { label: "//my:job".to_string() };
let graph_label = GraphLabel { label: "//my:graph".to_string() };
let mut job_graph = JobGraph::default();
job_graph.label = Some(graph_label);
job_graph.outputs.push(PartitionRef { str: "output".to_string() });
assert!(job_graph.label.is_some());
assert_eq!(job_graph.label.unwrap().label, "//my:graph");
assert_eq!(job_graph.outputs.len(), 1);
}
#[test]
fn test_dep_type_enum() {
let query_dep = DepType::Query;
let materialize_dep = DepType::Materialize;
assert_eq!(query_dep as i32, 0);
assert_eq!(materialize_dep as i32, 1);
}
}

View file

@ -10,90 +10,83 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
let proto_file = &args[1];
let _descriptor_file = &args[2]; // We'll use this when we get more sophisticated
let _descriptor_file = &args[2]; // For future use if needed
let output_file = &args[3];
// Read the proto file and generate Rust code directly
let proto_content = fs::read_to_string(proto_file)?;
// Generate Rust code from the proto content
let rust_code = generate_rust_from_proto(&proto_content);
fs::write(output_file, rust_code)?;
// Generate Rust code using proper prost-build
generate_prost_code(proto_file, output_file)?;
Ok(())
}
fn generate_rust_from_proto(proto_content: &str) -> String {
// Simple parser to extract message names and fields
let mut rust_code = String::from("// Generated by hermetic prost generator\n\n");
fn generate_prost_code(proto_file: &str, output_file: &str) -> Result<(), Box<dyn std::error::Error>> {
// Create a temporary directory for prost-build output
let temp_dir = tempfile::tempdir()?;
let temp_path = temp_dir.path();
// Look for message definitions
for line in proto_content.lines() {
let line = line.trim();
if line.starts_with("message ") {
if let Some(message_name) = line.strip_prefix("message ").and_then(|s| s.split_whitespace().next()) {
rust_code.push_str(&format!("#[derive(Clone, PartialEq, Debug)]\npub struct {} {{\n", message_name));
rust_code.push_str(" // Fields will be added here\n");
rust_code.push_str("}\n\n");
rust_code.push_str(&format!("impl {} {{\n", message_name));
rust_code.push_str(" pub fn new() -> Self {\n");
rust_code.push_str(" Self {\n");
rust_code.push_str(" // Default values will be added here\n");
rust_code.push_str(" }\n");
rust_code.push_str(" }\n");
rust_code.push_str("}\n\n");
// Configure prost-build
let mut config = prost_build::Config::new();
// Set output directory
config.out_dir(temp_path);
// Configure derive traits - prost::Message provides Debug automatically
config.type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]");
// Try to find protoc in the environment (Bazel should provide this)
if let Ok(protoc_path) = env::var("PROTOC") {
config.protoc_executable(&protoc_path);
}
// Get the directory containing the proto file for include path
let proto_path = Path::new(proto_file);
let include_dir = proto_path.parent().unwrap_or(Path::new("."));
// Try to compile the protos
match config.compile_protos(&[proto_file], &[include_dir]) {
Ok(_) => {
// Find and read the generated files
let generated_content = find_and_read_generated_files(temp_path)?;
fs::write(output_file, generated_content)?;
}
Err(e) => {
eprintln!("prost-build failed: {}", e);
eprintln!("Available environment variables:");
for (key, value) in env::vars() {
if key.contains("PROTOC") || key.contains("PATH") {
eprintln!(" {}: {}", key, value);
}
}
// This error should be surfaced to help debug the protoc issue
return Err(format!("prost-build compilation failed: {}. This indicates protoc is not available in the Bazel sandbox. Consider passing protoc path via environment or simplifying the proto dependencies.", e).into());
}
}
// Add some basic implementations for our simple test case
if proto_content.contains("message Person") {
rust_code = r#"// Generated by hermetic prost generator
#[derive(Clone, PartialEq, Debug)]
pub struct Person {
pub name: String,
pub age: i32,
pub email: String,
Ok(())
}
#[derive(Clone, PartialEq, Debug)]
pub struct GetPersonRequest {
pub person_id: String,
}
#[derive(Clone, PartialEq, Debug)]
pub struct GetPersonResponse {
pub person: Option<Person>,
}
impl Person {
pub fn new() -> Self {
Self {
name: String::new(),
age: 0,
email: String::new(),
fn find_and_read_generated_files(dir: &Path) -> Result<String, Box<dyn std::error::Error>> {
let mut content = String::from("// Generated by prost-build\n\n");
let mut found_files = false;
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("rs") {
let file_content = fs::read_to_string(&path)?;
content.push_str(&file_content);
content.push('\n');
found_files = true;
}
}
}
impl GetPersonRequest {
pub fn new() -> Self {
Self {
person_id: String::new(),
}
}
}
impl GetPersonResponse {
pub fn new() -> Self {
Self {
person: None,
}
}
}
"#.to_string();
}
rust_code
}
if !found_files {
return Err("No generated Rust files found from prost-build".into());
}
Ok(content)
}

View file

@ -7,7 +7,7 @@ mod tests {
#[test]
fn test_person_creation() {
let person = Person::new();
let person = Person::default();
assert_eq!(person.name, "");
assert_eq!(person.age, 0);
assert_eq!(person.email, "");
@ -27,7 +27,7 @@ mod tests {
#[test]
fn test_get_person_request() {
let request = GetPersonRequest::new();
let request = GetPersonRequest::default();
assert_eq!(request.person_id, "");
let request_with_id = GetPersonRequest {
@ -38,7 +38,7 @@ mod tests {
#[test]
fn test_get_person_response() {
let response = GetPersonResponse::new();
let response = GetPersonResponse::default();
assert!(response.person.is_none());
let person = Person {
@ -54,4 +54,47 @@ mod tests {
assert!(response_with_person.person.is_some());
assert_eq!(response_with_person.person.unwrap().name, "Bob");
}
#[test]
fn test_prost_serialization() {
// Test that we can properly serialize and deserialize with prost
use prost::Message;
let person = Person {
name: "Alice".to_string(),
age: 30,
email: "alice@example.com".to_string(),
};
// Encode to bytes using prost
let mut buf = Vec::new();
person.encode(&mut buf).expect("Failed to encode");
// Decode from bytes using prost
let decoded_person = Person::decode(&buf[..]).expect("Failed to decode");
assert_eq!(person.name, decoded_person.name);
assert_eq!(person.age, decoded_person.age);
assert_eq!(person.email, decoded_person.email);
}
#[test]
fn test_serde_serialization() {
// Test that we can serialize to JSON using serde
let person = Person {
name: "Charlie".to_string(),
age: 28,
email: "charlie@example.com".to_string(),
};
// Serialize to JSON
let json = serde_json::to_string(&person).expect("Failed to serialize to JSON");
// Deserialize from JSON
let decoded_person: Person = serde_json::from_str(&json).expect("Failed to deserialize from JSON");
assert_eq!(person.name, decoded_person.name);
assert_eq!(person.age, decoded_person.age);
assert_eq!(person.email, decoded_person.email);
}
}

File diff suppressed because one or more lines are too long