databuild/examples/podcast_reviews/BUILD.bazel
2025-07-02 21:49:15 -07:00

222 lines
4.7 KiB
Python

load("@databuild//databuild:rules.bzl", "databuild_graph", "databuild_job")
load("@pypi//:requirements.bzl", "requirement")
load("@rules_python//python:pip.bzl", "compile_pip_requirements")
load("//:py_repl.bzl", "py_repl")
compile_pip_requirements(
name = "py_requirements",
src = "requirements.in",
requirements_txt = "requirements_lock.txt",
)
platform(
name = "linux_arm",
constraint_values = [
"@platforms//os:linux",
"@platforms//cpu:arm64",
],
)
platform(
name = "linux_x86",
constraint_values = [
"@platforms//os:linux",
"@platforms//cpu:x86_64",
],
)
# Podcast Reviews Graph
databuild_graph(
name = "podcast_reviews_graph",
jobs = [
":extract_reviews_job",
":extract_podcasts_job",
":categorize_reviews_job",
":phrase_modeling_job",
":phrase_stats_job",
":daily_summary_job",
],
lookup = ":job_lookup",
visibility = ["//visibility:public"],
)
py_binary(
name = "job_lookup",
srcs = ["job_lookup.py"],
main = "job_lookup.py",
)
# Extract Reviews Job
databuild_job(
name = "extract_reviews_job",
binary = ":extract_reviews_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "extract_reviews_binary",
srcs = [
"duckdb_utils.py",
"extract_reviews_job.py",
],
main = "extract_reviews_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Extract Podcasts Job
databuild_job(
name = "extract_podcasts_job",
binary = ":extract_podcasts_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "extract_podcasts_binary",
srcs = [
"duckdb_utils.py",
"extract_podcasts_job.py",
],
main = "extract_podcasts_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Categorize Reviews Job
databuild_job(
name = "categorize_reviews_job",
binary = ":categorize_reviews_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "categorize_reviews_binary",
srcs = [
"categorize_reviews_job.py",
"duckdb_utils.py",
],
main = "categorize_reviews_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Phrase Modeling Job
databuild_job(
name = "phrase_modeling_job",
binary = ":phrase_modeling_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "phrase_modeling_binary",
srcs = [
"duckdb_utils.py",
"phrase_modeling_job.py",
],
main = "phrase_modeling_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Phrase Stats Job
databuild_job(
name = "phrase_stats_job",
binary = ":phrase_stats_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "phrase_stats_binary",
srcs = [
"duckdb_utils.py",
"phrase_stats_job.py",
],
main = "phrase_stats_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Daily Summary Job
databuild_job(
name = "daily_summary_job",
binary = ":daily_summary_binary",
visibility = ["//visibility:public"],
)
py_binary(
name = "daily_summary_binary",
srcs = [
"daily_summary_job.py",
"duckdb_utils.py",
],
main = "daily_summary_job.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
# Legacy test job (kept for compatibility)
databuild_job(
name = "test_job",
binary = ":test_job_binary",
)
py_binary(
name = "test_job_binary",
srcs = ["unified_job.py"],
main = "unified_job.py",
)
# Test target
py_binary(
name = "test_jobs",
srcs = [
"categorize_reviews_job.py",
"daily_summary_job.py",
"duckdb_utils.py",
"extract_podcasts_job.py",
"extract_reviews_job.py",
"job_lookup.py",
"phrase_modeling_job.py",
"phrase_stats_job.py",
"test_jobs.py",
],
main = "test_jobs.py",
deps = [
requirement("duckdb"),
requirement("pydantic"),
requirement("pandas"),
requirement("pyarrow"),
],
)
py_repl(
name = "repl",
deps = [
requirement("duckdb"),
requirement("pydantic"),
],
)