Integrating clickhouse for metrics.
This commit is contained in:
@@ -94,7 +94,7 @@ config :spark,
|
|||||||
]
|
]
|
||||||
|
|
||||||
config :mixer,
|
config :mixer,
|
||||||
ecto_repos: [Mixer.Repo],
|
ecto_repos: [Mixer.Repo, Mixer.ClickhouseRepo],
|
||||||
generators: [timestamp_type: :utc_datetime],
|
generators: [timestamp_type: :utc_datetime],
|
||||||
ash_domains: [Mixer.Accounts, Mixer.Posts],
|
ash_domains: [Mixer.Accounts, Mixer.Posts],
|
||||||
ash_authentication: [return_error_on_invalid_magic_link_token?: true]
|
ash_authentication: [return_error_on_invalid_magic_link_token?: true]
|
||||||
@@ -158,6 +158,11 @@ config :logger, :default_formatter,
|
|||||||
# Use Jason for JSON parsing in Phoenix
|
# Use Jason for JSON parsing in Phoenix
|
||||||
config :phoenix, :json_library, Jason
|
config :phoenix, :json_library, Jason
|
||||||
|
|
||||||
|
# ClickHouse repo — migrations live in priv/clickhouse/migrations
|
||||||
|
config :mixer, Mixer.ClickhouseRepo,
|
||||||
|
priv: "priv/clickhouse",
|
||||||
|
migration_source: "ch_schema_migrations"
|
||||||
|
|
||||||
# Import environment specific config. This must remain at the bottom
|
# Import environment specific config. This must remain at the bottom
|
||||||
# of this file so it overrides the configuration defined above.
|
# of this file so it overrides the configuration defined above.
|
||||||
import_config "#{config_env()}.exs"
|
import_config "#{config_env()}.exs"
|
||||||
|
|||||||
@@ -106,3 +106,12 @@ config :ex_aws, :s3,
|
|||||||
config :waffle,
|
config :waffle,
|
||||||
bucket: "mixer-bucket",
|
bucket: "mixer-bucket",
|
||||||
asset_host: "http://localhost:9000"
|
asset_host: "http://localhost:9000"
|
||||||
|
|
||||||
|
# ClickHouse (default local install)
|
||||||
|
config :mixer, Mixer.ClickhouseRepo,
|
||||||
|
scheme: "http",
|
||||||
|
hostname: "localhost",
|
||||||
|
port: 8123,
|
||||||
|
database: "mixer_metrics",
|
||||||
|
username: "default",
|
||||||
|
password: ""
|
||||||
|
|||||||
@@ -22,6 +22,11 @@ end
|
|||||||
|
|
||||||
config :mixer, MixerWeb.Endpoint, http: [port: String.to_integer(System.get_env("PORT", "4000"))]
|
config :mixer, MixerWeb.Endpoint, http: [port: String.to_integer(System.get_env("PORT", "4000"))]
|
||||||
|
|
||||||
|
# ClickHouse is available in all environments via env vars when set
|
||||||
|
if clickhouse_url = System.get_env("CLICKHOUSE_URL") do
|
||||||
|
config :mixer, Mixer.ClickhouseRepo, url: clickhouse_url
|
||||||
|
end
|
||||||
|
|
||||||
if config_env() == :prod do
|
if config_env() == :prod do
|
||||||
database_url =
|
database_url =
|
||||||
System.get_env("DATABASE_URL") ||
|
System.get_env("DATABASE_URL") ||
|
||||||
@@ -40,6 +45,19 @@ if config_env() == :prod do
|
|||||||
# pool_count: 4,
|
# pool_count: 4,
|
||||||
socket_options: maybe_ipv6
|
socket_options: maybe_ipv6
|
||||||
|
|
||||||
|
# ClickHouse — configure via CLICKHOUSE_URL or individual vars
|
||||||
|
unless System.get_env("CLICKHOUSE_URL") do
|
||||||
|
config :mixer, Mixer.ClickhouseRepo,
|
||||||
|
scheme: System.get_env("CLICKHOUSE_SCHEME", "http"),
|
||||||
|
hostname:
|
||||||
|
System.get_env("CLICKHOUSE_HOST") ||
|
||||||
|
raise("Missing environment variable `CLICKHOUSE_HOST`!"),
|
||||||
|
port: String.to_integer(System.get_env("CLICKHOUSE_PORT", "8123")),
|
||||||
|
database: System.get_env("CLICKHOUSE_DATABASE", "mixer_metrics"),
|
||||||
|
username: System.get_env("CLICKHOUSE_USERNAME", "default"),
|
||||||
|
password: System.get_env("CLICKHOUSE_PASSWORD", "")
|
||||||
|
end
|
||||||
|
|
||||||
# The secret key base is used to sign/encrypt cookies and other secrets.
|
# The secret key base is used to sign/encrypt cookies and other secrets.
|
||||||
# A default value is used in config/dev.exs and config/test.exs but you
|
# A default value is used in config/dev.exs and config/test.exs but you
|
||||||
# want to use a different value for prod and you most likely don't want
|
# want to use a different value for prod and you most likely don't want
|
||||||
|
|||||||
@@ -42,3 +42,12 @@ config :phoenix_live_view,
|
|||||||
# Sort query params output of verified routes for robust url comparisons
|
# Sort query params output of verified routes for robust url comparisons
|
||||||
config :phoenix,
|
config :phoenix,
|
||||||
sort_verified_routes_query_params: true
|
sort_verified_routes_query_params: true
|
||||||
|
|
||||||
|
# ClickHouse — point at a dedicated test database
|
||||||
|
config :mixer, Mixer.ClickhouseRepo,
|
||||||
|
scheme: "http",
|
||||||
|
hostname: "localhost",
|
||||||
|
port: 8123,
|
||||||
|
database: "mixer_metrics_test",
|
||||||
|
username: "default",
|
||||||
|
password: ""
|
||||||
|
|||||||
@@ -10,6 +10,10 @@ defmodule Mixer.Application do
|
|||||||
children = [
|
children = [
|
||||||
MixerWeb.Telemetry,
|
MixerWeb.Telemetry,
|
||||||
Mixer.Repo,
|
Mixer.Repo,
|
||||||
|
# ClickHouse repo for analytics — started before the metrics buffer
|
||||||
|
Mixer.ClickhouseRepo,
|
||||||
|
# In-memory event buffer that batches writes to ClickHouse
|
||||||
|
Mixer.Metrics.Buffer,
|
||||||
{DNSCluster, query: Application.get_env(:mixer, :dns_cluster_query) || :ignore},
|
{DNSCluster, query: Application.get_env(:mixer, :dns_cluster_query) || :ignore},
|
||||||
{Phoenix.PubSub, name: Mixer.PubSub},
|
{Phoenix.PubSub, name: Mixer.PubSub},
|
||||||
# Start a worker by calling: Mixer.Worker.start_link(arg)
|
# Start a worker by calling: Mixer.Worker.start_link(arg)
|
||||||
|
|||||||
13
lib/mixer/clickhouse_repo.ex
Normal file
13
lib/mixer/clickhouse_repo.ex
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
defmodule Mixer.ClickhouseRepo do
|
||||||
|
@moduledoc """
|
||||||
|
Ecto repository for ClickHouse, backed by the `ecto_ch` / `Ch` adapter.
|
||||||
|
|
||||||
|
Used exclusively for analytics writes (via `Mixer.Metrics.Buffer`) and
|
||||||
|
read queries (via `Mixer.Metrics`). It is **not** an Ash repo and must
|
||||||
|
never be used for transactional application data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
use Ecto.Repo,
|
||||||
|
otp_app: :mixer,
|
||||||
|
adapter: Ecto.Adapters.ClickHouse
|
||||||
|
end
|
||||||
263
lib/mixer/metrics.ex
Normal file
263
lib/mixer/metrics.ex
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
defmodule Mixer.Metrics do
|
||||||
|
@moduledoc """
|
||||||
|
Public API for tracking and querying post (tweet) metrics via ClickHouse.
|
||||||
|
|
||||||
|
## Tracking events
|
||||||
|
|
||||||
|
Tracking calls are non-blocking — events are handed off to the in-memory
|
||||||
|
`Mixer.Metrics.Buffer` GenServer and written to ClickHouse in batches.
|
||||||
|
|
||||||
|
# Record a tweet view (anonymous)
|
||||||
|
Mixer.Metrics.track_view(tweet_id)
|
||||||
|
|
||||||
|
# Record a view with a logged-in user and their IP
|
||||||
|
Mixer.Metrics.track_view(tweet_id, user_id: user.id, ip_address: conn.remote_ip)
|
||||||
|
|
||||||
|
## Querying metrics
|
||||||
|
|
||||||
|
Query functions execute synchronous ClickHouse SQL and return plain maps.
|
||||||
|
|
||||||
|
{:ok, summary} = Mixer.Metrics.get_summary(tweet_id)
|
||||||
|
# => %{views: 42, likes: 7, unlikes: 1, comments: 3, shares: 0}
|
||||||
|
|
||||||
|
{:ok, rows} = Mixer.Metrics.get_top_posts(10)
|
||||||
|
# => [%{tweet_id: "...", views: 99}, ...]
|
||||||
|
"""
|
||||||
|
|
||||||
|
require Logger
|
||||||
|
|
||||||
|
alias Mixer.ClickhouseRepo
|
||||||
|
alias Mixer.Metrics.Buffer
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Event types
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@type event_type :: :view | :like | :unlike | :comment | :share
|
||||||
|
|
||||||
|
@type track_opt ::
|
||||||
|
{:user_id, binary() | nil}
|
||||||
|
| {:ip_address, binary() | :inet.ip_address() | nil}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tracking helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Track a tweet view event.
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
* `:user_id` — UUID of the viewing user (nil for anonymous)
|
||||||
|
* `:ip_address` — originating IP; accepts a string or an `:inet` tuple
|
||||||
|
"""
|
||||||
|
@spec track_view(binary(), [track_opt()]) :: :ok
|
||||||
|
def track_view(tweet_id, opts \\ []), do: enqueue("view", tweet_id, opts)
|
||||||
|
|
||||||
|
@doc "Track a tweet like event."
|
||||||
|
@spec track_like(binary(), [track_opt()]) :: :ok
|
||||||
|
def track_like(tweet_id, opts \\ []), do: enqueue("like", tweet_id, opts)
|
||||||
|
|
||||||
|
@doc "Track a tweet unlike event."
|
||||||
|
@spec track_unlike(binary(), [track_opt()]) :: :ok
|
||||||
|
def track_unlike(tweet_id, opts \\ []), do: enqueue("unlike", tweet_id, opts)
|
||||||
|
|
||||||
|
@doc "Track a comment (reply) event on a tweet."
|
||||||
|
@spec track_comment(binary(), [track_opt()]) :: :ok
|
||||||
|
def track_comment(tweet_id, opts \\ []), do: enqueue("comment", tweet_id, opts)
|
||||||
|
|
||||||
|
@doc "Track a tweet share / repost event."
|
||||||
|
@spec track_share(binary(), [track_opt()]) :: :ok
|
||||||
|
def track_share(tweet_id, opts \\ []), do: enqueue("share", tweet_id, opts)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Query helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Return a summary of all event counts for a single tweet.
|
||||||
|
|
||||||
|
Returns `{:ok, map}` on success or `{:error, reason}` on failure.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
{:ok, %{views: 12, likes: 3, unlikes: 0, comments: 5, shares: 1}} =
|
||||||
|
Mixer.Metrics.get_summary(tweet_id)
|
||||||
|
"""
|
||||||
|
@spec get_summary(binary()) :: {:ok, map()} | {:error, term()}
|
||||||
|
def get_summary(tweet_id) do
|
||||||
|
sql = """
|
||||||
|
SELECT
|
||||||
|
countIf(event_type = 'view') AS views,
|
||||||
|
countIf(event_type = 'like') AS likes,
|
||||||
|
countIf(event_type = 'unlike') AS unlikes,
|
||||||
|
countIf(event_type = 'comment') AS comments,
|
||||||
|
countIf(event_type = 'share') AS shares
|
||||||
|
FROM post_events
|
||||||
|
WHERE tweet_id = {tweet_id:String}
|
||||||
|
"""
|
||||||
|
|
||||||
|
case ClickhouseRepo.query(sql, %{"tweet_id" => tweet_id}) do
|
||||||
|
{:ok, result} ->
|
||||||
|
{:ok, row_to_summary(result)}
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("[Mixer.Metrics] get_summary failed for #{tweet_id}: #{inspect(reason)}")
|
||||||
|
{:error, reason}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Return view counts bucketed by UTC hour for the past `hours` hours.
|
||||||
|
|
||||||
|
Useful for rendering a sparkline on a tweet detail page.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
{:ok, rows} = Mixer.Metrics.get_hourly_views(tweet_id, 24)
|
||||||
|
# => [%{hour: ~N[2026-04-07 00:00:00], views: 5}, ...]
|
||||||
|
"""
|
||||||
|
@spec get_hourly_views(binary(), pos_integer()) :: {:ok, [map()]} | {:error, term()}
|
||||||
|
def get_hourly_views(tweet_id, hours \\ 24) when is_integer(hours) and hours > 0 do
|
||||||
|
sql = """
|
||||||
|
SELECT
|
||||||
|
toStartOfHour(occurred_at) AS hour,
|
||||||
|
count() AS views
|
||||||
|
FROM post_events
|
||||||
|
WHERE
|
||||||
|
tweet_id = {tweet_id:String}
|
||||||
|
AND event_type = 'view'
|
||||||
|
AND occurred_at >= now() - toIntervalHour({hours:UInt32})
|
||||||
|
GROUP BY hour
|
||||||
|
ORDER BY hour ASC
|
||||||
|
"""
|
||||||
|
|
||||||
|
case ClickhouseRepo.query(sql, %{"tweet_id" => tweet_id, "hours" => hours}) do
|
||||||
|
{:ok, %{rows: rows}} ->
|
||||||
|
{:ok, Enum.map(rows, fn [hour, views] -> %{hour: hour, views: views} end)}
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("[Mixer.Metrics] get_hourly_views failed: #{inspect(reason)}")
|
||||||
|
{:error, reason}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Return the top `limit` tweets ordered by total view count across all time.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
{:ok, rows} = Mixer.Metrics.get_top_posts(10)
|
||||||
|
# => [%{tweet_id: "...", views: 99}, %{tweet_id: "...", views: 72}, ...]
|
||||||
|
"""
|
||||||
|
@spec get_top_posts(pos_integer()) :: {:ok, [map()]} | {:error, term()}
|
||||||
|
def get_top_posts(limit \\ 10) when is_integer(limit) and limit > 0 do
|
||||||
|
sql = """
|
||||||
|
SELECT
|
||||||
|
tweet_id,
|
||||||
|
countIf(event_type = 'view') AS views
|
||||||
|
FROM post_events
|
||||||
|
GROUP BY tweet_id
|
||||||
|
ORDER BY views DESC
|
||||||
|
LIMIT {limit:UInt32}
|
||||||
|
"""
|
||||||
|
|
||||||
|
case ClickhouseRepo.query(sql, %{"limit" => limit}) do
|
||||||
|
{:ok, %{rows: rows}} ->
|
||||||
|
{:ok, Enum.map(rows, fn [tweet_id, views] -> %{tweet_id: tweet_id, views: views} end)}
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("[Mixer.Metrics] get_top_posts failed: #{inspect(reason)}")
|
||||||
|
{:error, reason}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Return per-event-type counts for a list of tweet IDs in a single query.
|
||||||
|
|
||||||
|
Handy for batch-enriching a feed with metrics without N+1 queries.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
{:ok, map} = Mixer.Metrics.get_bulk_summaries(tweet_ids)
|
||||||
|
# => %{"<uuid>" => %{views: 5, likes: 2, ...}, ...}
|
||||||
|
"""
|
||||||
|
@spec get_bulk_summaries([binary()]) :: {:ok, %{binary() => map()}} | {:error, term()}
|
||||||
|
def get_bulk_summaries([]), do: {:ok, %{}}
|
||||||
|
|
||||||
|
def get_bulk_summaries(tweet_ids) when is_list(tweet_ids) do
|
||||||
|
# ecto_ch supports passing arrays as query parameters
|
||||||
|
sql = """
|
||||||
|
SELECT
|
||||||
|
tweet_id,
|
||||||
|
countIf(event_type = 'view') AS views,
|
||||||
|
countIf(event_type = 'like') AS likes,
|
||||||
|
countIf(event_type = 'unlike') AS unlikes,
|
||||||
|
countIf(event_type = 'comment') AS comments,
|
||||||
|
countIf(event_type = 'share') AS shares
|
||||||
|
FROM post_events
|
||||||
|
WHERE tweet_id IN {tweet_ids:Array(String)}
|
||||||
|
GROUP BY tweet_id
|
||||||
|
"""
|
||||||
|
|
||||||
|
case ClickhouseRepo.query(sql, %{"tweet_ids" => tweet_ids}) do
|
||||||
|
{:ok, %{rows: rows}} ->
|
||||||
|
summaries =
|
||||||
|
Map.new(rows, fn [tweet_id, views, likes, unlikes, comments, shares] ->
|
||||||
|
{tweet_id,
|
||||||
|
%{
|
||||||
|
views: views,
|
||||||
|
likes: likes,
|
||||||
|
unlikes: unlikes,
|
||||||
|
comments: comments,
|
||||||
|
shares: shares
|
||||||
|
}}
|
||||||
|
end)
|
||||||
|
|
||||||
|
{:ok, summaries}
|
||||||
|
|
||||||
|
{:error, reason} ->
|
||||||
|
Logger.error("[Mixer.Metrics] get_bulk_summaries failed: #{inspect(reason)}")
|
||||||
|
{:error, reason}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Private helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
defp enqueue(event_type, tweet_id, opts) do
|
||||||
|
event = %{
|
||||||
|
event_type: event_type,
|
||||||
|
tweet_id: tweet_id,
|
||||||
|
user_id: Keyword.get(opts, :user_id),
|
||||||
|
occurred_at: DateTime.utc_now() |> DateTime.truncate(:second),
|
||||||
|
ip_address: opts |> Keyword.get(:ip_address) |> format_ip()
|
||||||
|
}
|
||||||
|
|
||||||
|
Buffer.track(event)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp format_ip(nil), do: nil
|
||||||
|
defp format_ip(ip) when is_binary(ip), do: ip
|
||||||
|
|
||||||
|
defp format_ip({a, b, c, d}), do: "#{a}.#{b}.#{c}.#{d}"
|
||||||
|
|
||||||
|
defp format_ip({a, b, c, d, e, f, g, h}) do
|
||||||
|
[a, b, c, d, e, f, g, h]
|
||||||
|
|> Enum.map_join(":", &Integer.to_string(&1, 16))
|
||||||
|
end
|
||||||
|
|
||||||
|
defp row_to_summary(%{rows: [[views, likes, unlikes, comments, shares] | _]}) do
|
||||||
|
%{
|
||||||
|
views: views,
|
||||||
|
likes: likes,
|
||||||
|
unlikes: unlikes,
|
||||||
|
comments: comments,
|
||||||
|
shares: shares
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
# ClickHouse returns no rows when the tweet has zero events — default to 0
|
||||||
|
defp row_to_summary(_), do: %{views: 0, likes: 0, unlikes: 0, comments: 0, shares: 0}
|
||||||
|
end
|
||||||
147
lib/mixer/metrics/buffer.ex
Normal file
147
lib/mixer/metrics/buffer.ex
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
defmodule Mixer.Metrics.Buffer do
|
||||||
|
@moduledoc """
|
||||||
|
GenServer that accumulates post metric events in memory and flushes them
|
||||||
|
to ClickHouse in batches.
|
||||||
|
|
||||||
|
Two conditions trigger a flush:
|
||||||
|
|
||||||
|
1. **Timer** — every `@flush_interval` milliseconds (default 10 s).
|
||||||
|
2. **Threshold** — whenever the in-memory buffer reaches `@max_buffer_size`
|
||||||
|
rows (default 500).
|
||||||
|
|
||||||
|
If ClickHouse is unavailable the error is logged and the buffered events
|
||||||
|
are discarded rather than retried indefinitely, preventing unbounded memory
|
||||||
|
growth. For production deployments that require durability, consider adding
|
||||||
|
a persistent queue in front of this buffer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
use GenServer
|
||||||
|
|
||||||
|
require Logger
|
||||||
|
|
||||||
|
alias Mixer.Metrics.PostEvent
|
||||||
|
|
||||||
|
@flush_interval :timer.seconds(10)
|
||||||
|
@max_buffer_size 500
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Start the buffer process and link it to the calling process.
|
||||||
|
|
||||||
|
Accepts an optional keyword list of overrides:
|
||||||
|
|
||||||
|
* `:flush_interval` — milliseconds between scheduled flushes
|
||||||
|
* `:max_buffer_size` — row count that triggers an immediate flush
|
||||||
|
"""
|
||||||
|
@spec start_link(keyword()) :: GenServer.on_start()
|
||||||
|
def start_link(opts \\ []) do
|
||||||
|
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Enqueue a single analytics event map for buffered insertion into ClickHouse.
|
||||||
|
|
||||||
|
The map must contain at minimum the fields required by `Mixer.Metrics.PostEvent`:
|
||||||
|
`:event_type`, `:tweet_id`, `:occurred_at`. Other fields are optional.
|
||||||
|
|
||||||
|
This call is asynchronous (cast) and returns `:ok` immediately.
|
||||||
|
"""
|
||||||
|
@spec track(map()) :: :ok
|
||||||
|
def track(event) when is_map(event) do
|
||||||
|
GenServer.cast(__MODULE__, {:track, event})
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Force an immediate flush of all buffered events to ClickHouse, regardless
|
||||||
|
of the timer or threshold. Returns `:ok` after the flush completes.
|
||||||
|
|
||||||
|
Primarily useful in tests.
|
||||||
|
"""
|
||||||
|
@spec flush() :: :ok
|
||||||
|
def flush do
|
||||||
|
GenServer.call(__MODULE__, :flush)
|
||||||
|
end
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GenServer callbacks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@impl GenServer
|
||||||
|
def init(opts) do
|
||||||
|
flush_interval = Keyword.get(opts, :flush_interval, @flush_interval)
|
||||||
|
max_buffer_size = Keyword.get(opts, :max_buffer_size, @max_buffer_size)
|
||||||
|
|
||||||
|
schedule_flush(flush_interval)
|
||||||
|
|
||||||
|
state = %{
|
||||||
|
events: [],
|
||||||
|
count: 0,
|
||||||
|
flush_interval: flush_interval,
|
||||||
|
max_buffer_size: max_buffer_size
|
||||||
|
}
|
||||||
|
|
||||||
|
{:ok, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl GenServer
|
||||||
|
def handle_cast({:track, event}, state) do
|
||||||
|
new_count = state.count + 1
|
||||||
|
new_events = [event | state.events]
|
||||||
|
|
||||||
|
if new_count >= state.max_buffer_size do
|
||||||
|
do_flush(new_events)
|
||||||
|
{:noreply, %{state | events: [], count: 0}}
|
||||||
|
else
|
||||||
|
{:noreply, %{state | events: new_events, count: new_count}}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl GenServer
|
||||||
|
def handle_call(:flush, _from, state) do
|
||||||
|
do_flush(state.events)
|
||||||
|
{:reply, :ok, %{state | events: [], count: 0}}
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl GenServer
|
||||||
|
def handle_info(:flush, state) do
|
||||||
|
do_flush(state.events)
|
||||||
|
schedule_flush(state.flush_interval)
|
||||||
|
{:noreply, %{state | events: [], count: 0}}
|
||||||
|
end
|
||||||
|
|
||||||
|
@impl GenServer
|
||||||
|
def terminate(_reason, state) do
|
||||||
|
# Best-effort flush on shutdown so we don't lose buffered events during
|
||||||
|
# graceful stops (e.g., deploys).
|
||||||
|
do_flush(state.events)
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Private helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
defp do_flush([]), do: :ok
|
||||||
|
|
||||||
|
defp do_flush(events) do
|
||||||
|
rows = Enum.reverse(events)
|
||||||
|
|
||||||
|
try do
|
||||||
|
{count, _} = Mixer.ClickhouseRepo.insert_all(PostEvent, rows)
|
||||||
|
Logger.debug("[Mixer.Metrics.Buffer] Flushed #{count} event(s) to ClickHouse")
|
||||||
|
rescue
|
||||||
|
error ->
|
||||||
|
Logger.error(
|
||||||
|
"[Mixer.Metrics.Buffer] Failed to flush #{length(rows)} event(s) to ClickHouse: " <>
|
||||||
|
Exception.message(error)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp schedule_flush(interval) do
|
||||||
|
Process.send_after(self(), :flush, interval)
|
||||||
|
end
|
||||||
|
end
|
||||||
40
lib/mixer/metrics/post_event.ex
Normal file
40
lib/mixer/metrics/post_event.ex
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
defmodule Mixer.Metrics.PostEvent do
|
||||||
|
@moduledoc """
|
||||||
|
Ecto schema that maps to the `post_events` table in ClickHouse.
|
||||||
|
|
||||||
|
Each row represents a single analytics event tied to a tweet (post).
|
||||||
|
The table uses a MergeTree engine ordered by `(occurred_at, event_type,
|
||||||
|
tweet_id)` for efficient time-range scans and per-tweet aggregations.
|
||||||
|
|
||||||
|
## Event types
|
||||||
|
|
||||||
|
| event_type | Description |
|
||||||
|
|-------------|------------------------------------------|
|
||||||
|
| `"view"` | A tweet was displayed to a user |
|
||||||
|
| `"like"` | A user liked a tweet |
|
||||||
|
| `"unlike"` | A user removed their like from a tweet |
|
||||||
|
| `"comment"` | A user replied to a tweet |
|
||||||
|
| `"share"` | A user shared / reposted a tweet |
|
||||||
|
"""
|
||||||
|
|
||||||
|
use Ecto.Schema
|
||||||
|
|
||||||
|
@primary_key false
|
||||||
|
|
||||||
|
schema "post_events" do
|
||||||
|
# LowCardinality(String) in ClickHouse — keep values in the set above
|
||||||
|
field :event_type, :string
|
||||||
|
|
||||||
|
# The tweet that the event relates to
|
||||||
|
field :tweet_id, Ecto.UUID
|
||||||
|
|
||||||
|
# The acting user; may be nil for anonymous views
|
||||||
|
field :user_id, Ecto.UUID
|
||||||
|
|
||||||
|
# Wall-clock time of the event (UTC, second precision)
|
||||||
|
field :occurred_at, :utc_datetime
|
||||||
|
|
||||||
|
# Optional originating IP, useful for deduplicating anonymous views
|
||||||
|
field :ip_address, :string
|
||||||
|
end
|
||||||
|
end
|
||||||
3
mix.exs
3
mix.exs
@@ -91,7 +91,8 @@ defmodule Mixer.MixProject do
|
|||||||
{:ex_aws, "~> 2.1.2"},
|
{:ex_aws, "~> 2.1.2"},
|
||||||
{:ex_aws_s3, "~> 2.0"},
|
{:ex_aws_s3, "~> 2.0"},
|
||||||
{:hackney, "~> 1.9"},
|
{:hackney, "~> 1.9"},
|
||||||
{:sweet_xml, "~> 0.6"}
|
{:sweet_xml, "~> 0.6"},
|
||||||
|
{:ecto_ch, "~> 0.3"}
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
2
mix.lock
2
mix.lock
@@ -20,6 +20,7 @@
|
|||||||
"castore": {:hex, :castore, "1.0.18", "5e43ef0ec7d31195dfa5a65a86e6131db999d074179d2ba5a8de11fe14570f55", [:mix], [], "hexpm", "f393e4fe6317829b158fb74d86eb681f737d2fe326aa61ccf6293c4104957e34"},
|
"castore": {:hex, :castore, "1.0.18", "5e43ef0ec7d31195dfa5a65a86e6131db999d074179d2ba5a8de11fe14570f55", [:mix], [], "hexpm", "f393e4fe6317829b158fb74d86eb681f737d2fe326aa61ccf6293c4104957e34"},
|
||||||
"cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"},
|
"cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"},
|
||||||
"certifi": {:hex, :certifi, "2.15.0", "0e6e882fcdaaa0a5a9f2b3db55b1394dba07e8d6d9bcad08318fb604c6839712", [:rebar3], [], "hexpm", "b147ed22ce71d72eafdad94f055165c1c182f61a2ff49df28bcc71d1d5b94a60"},
|
"certifi": {:hex, :certifi, "2.15.0", "0e6e882fcdaaa0a5a9f2b3db55b1394dba07e8d6d9bcad08318fb604c6839712", [:rebar3], [], "hexpm", "b147ed22ce71d72eafdad94f055165c1c182f61a2ff49df28bcc71d1d5b94a60"},
|
||||||
|
"ch": {:hex, :ch, "0.7.1", "116c08094b30d095c3bd6a8fe4ebe19fdaaf3dce84e2413cfdd6af157baf6303", [:mix], [{:db_connection, "~> 2.9.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "3c1c900291ff9c4c077cd1dc0c265051a3f1d26320d58b37ed9e91b33d41a868"},
|
||||||
"cinder": {:hex, :cinder, "0.12.1", "02ae4988e025fb32c37e4e7f2e491586b952918c0dd99d856da13271cd680e16", [:mix], [{:ash, "~> 3.0", [hex: :ash, repo: "hexpm", optional: false]}, {:ash_phoenix, "~> 2.3", [hex: :ash_phoenix, repo: "hexpm", optional: false]}, {:gettext, "~> 1.0.0", [hex: :gettext, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:spark, "~> 2.0", [hex: :spark, repo: "hexpm", optional: false]}], "hexpm", "a48b5677c1f57619d9d7564fb2bd7928f93750a2e8c0b1b145852a30ecf2aa20"},
|
"cinder": {:hex, :cinder, "0.12.1", "02ae4988e025fb32c37e4e7f2e491586b952918c0dd99d856da13271cd680e16", [:mix], [{:ash, "~> 3.0", [hex: :ash, repo: "hexpm", optional: false]}, {:ash_phoenix, "~> 2.3", [hex: :ash_phoenix, repo: "hexpm", optional: false]}, {:gettext, "~> 1.0.0", [hex: :gettext, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 1.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}, {:spark, "~> 2.0", [hex: :spark, repo: "hexpm", optional: false]}], "hexpm", "a48b5677c1f57619d9d7564fb2bd7928f93750a2e8c0b1b145852a30ecf2aa20"},
|
||||||
"comeonin": {:hex, :comeonin, "5.5.1", "5113e5f3800799787de08a6e0db307133850e635d34e9fab23c70b6501669510", [:mix], [], "hexpm", "65aac8f19938145377cee73973f192c5645873dcf550a8a6b18187d17c13ccdb"},
|
"comeonin": {:hex, :comeonin, "5.5.1", "5113e5f3800799787de08a6e0db307133850e635d34e9fab23c70b6501669510", [:mix], [], "hexpm", "65aac8f19938145377cee73973f192c5645873dcf550a8a6b18187d17c13ccdb"},
|
||||||
"conv_case": {:hex, :conv_case, "0.2.3", "c1455c27d3c1ffcdd5f17f1e91f40b8a0bc0a337805a6e8302f441af17118ed8", [:mix], [], "hexpm", "88f29a3d97d1742f9865f7e394ed3da011abb7c5e8cc104e676fdef6270d4b4a"},
|
"conv_case": {:hex, :conv_case, "0.2.3", "c1455c27d3c1ffcdd5f17f1e91f40b8a0bc0a337805a6e8302f441af17118ed8", [:mix], [], "hexpm", "88f29a3d97d1742f9865f7e394ed3da011abb7c5e8cc104e676fdef6270d4b4a"},
|
||||||
@@ -29,6 +30,7 @@
|
|||||||
"dns_cluster": {:hex, :dns_cluster, "0.2.0", "aa8eb46e3bd0326bd67b84790c561733b25c5ba2fe3c7e36f28e88f384ebcb33", [:mix], [], "hexpm", "ba6f1893411c69c01b9e8e8f772062535a4cf70f3f35bcc964a324078d8c8240"},
|
"dns_cluster": {:hex, :dns_cluster, "0.2.0", "aa8eb46e3bd0326bd67b84790c561733b25c5ba2fe3c7e36f28e88f384ebcb33", [:mix], [], "hexpm", "ba6f1893411c69c01b9e8e8f772062535a4cf70f3f35bcc964a324078d8c8240"},
|
||||||
"dotenvy": {:hex, :dotenvy, "1.1.1", "00e318f3c51de9fafc4b48598447e386f19204dc18ca69886905bb8f8b08b667", [:mix], [], "hexpm", "c8269471b5701e9e56dc86509c1199ded2b33dce088c3471afcfef7839766d8e"},
|
"dotenvy": {:hex, :dotenvy, "1.1.1", "00e318f3c51de9fafc4b48598447e386f19204dc18ca69886905bb8f8b08b667", [:mix], [], "hexpm", "c8269471b5701e9e56dc86509c1199ded2b33dce088c3471afcfef7839766d8e"},
|
||||||
"ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"},
|
"ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"},
|
||||||
|
"ecto_ch": {:hex, :ecto_ch, "0.8.6", "f31b507e86690c003f46e75d6e742e6b5d8ce34b6b10a86604b1c3aa785e0b56", [:mix], [{:ch, "~> 0.5.0 or ~> 0.6.0 or ~> 0.7.0", [hex: :ch, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.13.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}], "hexpm", "6ca9f1cf9680452b1925c6a3a7b5e3d8b12e38ee134b03c6a45a8b26434fad97"},
|
||||||
"ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"},
|
"ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"},
|
||||||
"elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
|
"elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
|
||||||
"esbuild": {:hex, :esbuild, "0.10.0", "b0aa3388a1c23e727c5a3e7427c932d89ee791746b0081bbe56103e9ef3d291f", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "468489cda427b974a7cc9f03ace55368a83e1a7be12fba7e30969af78e5f8c70"},
|
"esbuild": {:hex, :esbuild, "0.10.0", "b0aa3388a1c23e727c5a3e7427c932d89ee791746b0081bbe56103e9ef3d291f", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "468489cda427b974a7cc9f03ace55368a83e1a7be12fba7e30969af78e5f8c70"},
|
||||||
|
|||||||
4
priv/clickhouse/migrations/.formatter.exs
Normal file
4
priv/clickhouse/migrations/.formatter.exs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
[
|
||||||
|
import_deps: [:ecto_ch],
|
||||||
|
inputs: ["*.exs"]
|
||||||
|
]
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
defmodule Mixer.ClickhouseRepo.Migrations.CreatePostEvents do
|
||||||
|
use Ecto.Migration
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Creates the `post_events` table using a MergeTree engine.
|
||||||
|
|
||||||
|
Key design decisions:
|
||||||
|
|
||||||
|
* `LowCardinality(String)` for `event_type` — the cardinality is tiny
|
||||||
|
(5–10 values), so ClickHouse can store it as a dictionary, giving both
|
||||||
|
compression and faster filtering.
|
||||||
|
|
||||||
|
* `Nullable(UUID)` / `Nullable(String)` for optional columns — ClickHouse
|
||||||
|
handles NULLs differently from PostgreSQL; we make the nullable fields
|
||||||
|
explicit so the schema is unambiguous.
|
||||||
|
|
||||||
|
* `ORDER BY (occurred_at, event_type, tweet_id)` — optimises the two most
|
||||||
|
common query patterns:
|
||||||
|
1. Time-range scans (`WHERE occurred_at >= now() - interval 24 HOUR`)
|
||||||
|
2. Per-tweet aggregations (`WHERE tweet_id = ?`)
|
||||||
|
|
||||||
|
* `PARTITION BY toYYYYMM(occurred_at)` — monthly partitions make it cheap
|
||||||
|
to drop old data with `ALTER TABLE … DROP PARTITION`.
|
||||||
|
|
||||||
|
* `TTL occurred_at + INTERVAL 1 YEAR DELETE` — automatically reclaim disk
|
||||||
|
space after two years. Adjust as required.
|
||||||
|
"""
|
||||||
|
def up do
|
||||||
|
execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS post_events
|
||||||
|
(
|
||||||
|
event_type LowCardinality(String),
|
||||||
|
tweet_id UUID,
|
||||||
|
user_id Nullable(UUID),
|
||||||
|
occurred_at DateTime,
|
||||||
|
ip_address Nullable(String)
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree()
|
||||||
|
PARTITION BY toYYYYMM(occurred_at)
|
||||||
|
ORDER BY (occurred_at, event_type, tweet_id)
|
||||||
|
TTL occurred_at + INTERVAL 1 YEAR DELETE
|
||||||
|
SETTINGS index_granularity = 8192
|
||||||
|
""")
|
||||||
|
end
|
||||||
|
|
||||||
|
def down do
|
||||||
|
execute("DROP TABLE IF EXISTS post_events")
|
||||||
|
end
|
||||||
|
end
|
||||||
11
priv/clickhouse/seeds.exs
Normal file
11
priv/clickhouse/seeds.exs
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Script for populating the database. You can run it as:
|
||||||
|
#
|
||||||
|
# mix run priv/clickhouse/seeds.exs
|
||||||
|
#
|
||||||
|
# Inside the script, you can read and write to any of your
|
||||||
|
# repositories directly:
|
||||||
|
#
|
||||||
|
# Mixer.ClickhouseRepo.insert!(%Mixer.SomeSchema{})
|
||||||
|
#
|
||||||
|
# We recommend using the bang functions (`insert!`, `update!`
|
||||||
|
# and so on) as they will fail if something goes wrong.
|
||||||
Reference in New Issue
Block a user