load("@databuild//databuild:rules.bzl", "databuild_graph", "databuild_job") load("@pypi//:requirements.bzl", "requirement") load("@rules_python//python:pip.bzl", "compile_pip_requirements") load("//:py_repl.bzl", "py_repl") compile_pip_requirements( name = "py_requirements", src = "requirements.in", requirements_txt = "requirements_lock.txt", ) platform( name = "linux_arm", constraint_values = [ "@platforms//os:linux", "@platforms//cpu:arm64", ], ) platform( name = "linux_x86", constraint_values = [ "@platforms//os:linux", "@platforms//cpu:x86_64", ], ) # Podcast Reviews Graph databuild_graph( name = "podcast_reviews_graph", jobs = [ ":extract_reviews_job", ":extract_podcasts_job", ":categorize_reviews_job", ":phrase_modeling_job", ":phrase_stats_job", ":daily_summary_job", ], lookup = ":job_lookup", visibility = ["//visibility:public"], ) py_binary( name = "job_lookup", srcs = ["job_lookup.py"], main = "job_lookup.py", ) # Extract Reviews Job databuild_job( name = "extract_reviews_job", binary = ":extract_reviews_binary", visibility = ["//visibility:public"], ) py_binary( name = "extract_reviews_binary", srcs = [ "duckdb_utils.py", "extract_reviews_job.py", ], main = "extract_reviews_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Extract Podcasts Job databuild_job( name = "extract_podcasts_job", binary = ":extract_podcasts_binary", visibility = ["//visibility:public"], ) py_binary( name = "extract_podcasts_binary", srcs = [ "duckdb_utils.py", "extract_podcasts_job.py", ], main = "extract_podcasts_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Categorize Reviews Job databuild_job( name = "categorize_reviews_job", binary = ":categorize_reviews_binary", visibility = ["//visibility:public"], ) py_binary( name = "categorize_reviews_binary", srcs = [ "categorize_reviews_job.py", "duckdb_utils.py", ], main = "categorize_reviews_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Phrase Modeling Job databuild_job( name = "phrase_modeling_job", binary = ":phrase_modeling_binary", visibility = ["//visibility:public"], ) py_binary( name = "phrase_modeling_binary", srcs = [ "duckdb_utils.py", "phrase_modeling_job.py", ], main = "phrase_modeling_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Phrase Stats Job databuild_job( name = "phrase_stats_job", binary = ":phrase_stats_binary", visibility = ["//visibility:public"], ) py_binary( name = "phrase_stats_binary", srcs = [ "duckdb_utils.py", "phrase_stats_job.py", ], main = "phrase_stats_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Daily Summary Job databuild_job( name = "daily_summary_job", binary = ":daily_summary_binary", visibility = ["//visibility:public"], ) py_binary( name = "daily_summary_binary", srcs = [ "daily_summary_job.py", "duckdb_utils.py", ], main = "daily_summary_job.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) # Test target py_binary( name = "test_jobs", srcs = [ "categorize_reviews_job.py", "daily_summary_job.py", "duckdb_utils.py", "extract_podcasts_job.py", "extract_reviews_job.py", "job_lookup.py", "phrase_modeling_job.py", "phrase_stats_job.py", "test_jobs.py", ], main = "test_jobs.py", deps = [ requirement("duckdb"), requirement("pydantic"), requirement("pandas"), requirement("pyarrow"), ], ) py_repl( name = "repl", deps = [ requirement("duckdb"), requirement("pydantic"), ], )