210 lines
4.4 KiB
Python
210 lines
4.4 KiB
Python
load("@databuild//databuild:rules.bzl", "databuild_graph", "databuild_job")
|
|
load("@pypi//:requirements.bzl", "requirement")
|
|
load("@rules_python//python:pip.bzl", "compile_pip_requirements")
|
|
load("//:py_repl.bzl", "py_repl")
|
|
|
|
compile_pip_requirements(
|
|
name = "py_requirements",
|
|
src = "requirements.in",
|
|
requirements_txt = "requirements_lock.txt",
|
|
)
|
|
|
|
platform(
|
|
name = "linux_arm",
|
|
constraint_values = [
|
|
"@platforms//os:linux",
|
|
"@platforms//cpu:arm64",
|
|
],
|
|
)
|
|
|
|
platform(
|
|
name = "linux_x86",
|
|
constraint_values = [
|
|
"@platforms//os:linux",
|
|
"@platforms//cpu:x86_64",
|
|
],
|
|
)
|
|
|
|
# Podcast Reviews Graph
|
|
databuild_graph(
|
|
name = "podcast_reviews_graph",
|
|
jobs = [
|
|
":extract_reviews_job",
|
|
":extract_podcasts_job",
|
|
":categorize_reviews_job",
|
|
":phrase_modeling_job",
|
|
":phrase_stats_job",
|
|
":daily_summary_job",
|
|
],
|
|
lookup = ":job_lookup",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "job_lookup",
|
|
srcs = ["job_lookup.py"],
|
|
main = "job_lookup.py",
|
|
)
|
|
|
|
# Extract Reviews Job
|
|
databuild_job(
|
|
name = "extract_reviews_job",
|
|
binary = ":extract_reviews_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "extract_reviews_binary",
|
|
srcs = [
|
|
"duckdb_utils.py",
|
|
"extract_reviews_job.py",
|
|
],
|
|
main = "extract_reviews_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Extract Podcasts Job
|
|
databuild_job(
|
|
name = "extract_podcasts_job",
|
|
binary = ":extract_podcasts_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "extract_podcasts_binary",
|
|
srcs = [
|
|
"duckdb_utils.py",
|
|
"extract_podcasts_job.py",
|
|
],
|
|
main = "extract_podcasts_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Categorize Reviews Job
|
|
databuild_job(
|
|
name = "categorize_reviews_job",
|
|
binary = ":categorize_reviews_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "categorize_reviews_binary",
|
|
srcs = [
|
|
"categorize_reviews_job.py",
|
|
"duckdb_utils.py",
|
|
],
|
|
main = "categorize_reviews_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Phrase Modeling Job
|
|
databuild_job(
|
|
name = "phrase_modeling_job",
|
|
binary = ":phrase_modeling_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "phrase_modeling_binary",
|
|
srcs = [
|
|
"duckdb_utils.py",
|
|
"phrase_modeling_job.py",
|
|
],
|
|
main = "phrase_modeling_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Phrase Stats Job
|
|
databuild_job(
|
|
name = "phrase_stats_job",
|
|
binary = ":phrase_stats_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "phrase_stats_binary",
|
|
srcs = [
|
|
"duckdb_utils.py",
|
|
"phrase_stats_job.py",
|
|
],
|
|
main = "phrase_stats_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Daily Summary Job
|
|
databuild_job(
|
|
name = "daily_summary_job",
|
|
binary = ":daily_summary_binary",
|
|
visibility = ["//visibility:public"],
|
|
)
|
|
|
|
py_binary(
|
|
name = "daily_summary_binary",
|
|
srcs = [
|
|
"daily_summary_job.py",
|
|
"duckdb_utils.py",
|
|
],
|
|
main = "daily_summary_job.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
# Test target
|
|
py_binary(
|
|
name = "test_jobs",
|
|
srcs = [
|
|
"categorize_reviews_job.py",
|
|
"daily_summary_job.py",
|
|
"duckdb_utils.py",
|
|
"extract_podcasts_job.py",
|
|
"extract_reviews_job.py",
|
|
"job_lookup.py",
|
|
"phrase_modeling_job.py",
|
|
"phrase_stats_job.py",
|
|
"test_jobs.py",
|
|
],
|
|
main = "test_jobs.py",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
requirement("pandas"),
|
|
requirement("pyarrow"),
|
|
],
|
|
)
|
|
|
|
py_repl(
|
|
name = "repl",
|
|
deps = [
|
|
requirement("duckdb"),
|
|
requirement("pydantic"),
|
|
],
|
|
)
|