Integrating clickhouse for metrics.
This commit is contained in:
@@ -10,6 +10,10 @@ defmodule Mixer.Application do
|
||||
children = [
|
||||
MixerWeb.Telemetry,
|
||||
Mixer.Repo,
|
||||
# ClickHouse repo for analytics — started before the metrics buffer
|
||||
Mixer.ClickhouseRepo,
|
||||
# In-memory event buffer that batches writes to ClickHouse
|
||||
Mixer.Metrics.Buffer,
|
||||
{DNSCluster, query: Application.get_env(:mixer, :dns_cluster_query) || :ignore},
|
||||
{Phoenix.PubSub, name: Mixer.PubSub},
|
||||
# Start a worker by calling: Mixer.Worker.start_link(arg)
|
||||
|
||||
13
lib/mixer/clickhouse_repo.ex
Normal file
13
lib/mixer/clickhouse_repo.ex
Normal file
@@ -0,0 +1,13 @@
|
||||
defmodule Mixer.ClickhouseRepo do
|
||||
@moduledoc """
|
||||
Ecto repository for ClickHouse, backed by the `ecto_ch` / `Ch` adapter.
|
||||
|
||||
Used exclusively for analytics writes (via `Mixer.Metrics.Buffer`) and
|
||||
read queries (via `Mixer.Metrics`). It is **not** an Ash repo and must
|
||||
never be used for transactional application data.
|
||||
"""
|
||||
|
||||
use Ecto.Repo,
|
||||
otp_app: :mixer,
|
||||
adapter: Ecto.Adapters.ClickHouse
|
||||
end
|
||||
263
lib/mixer/metrics.ex
Normal file
263
lib/mixer/metrics.ex
Normal file
@@ -0,0 +1,263 @@
|
||||
defmodule Mixer.Metrics do
|
||||
@moduledoc """
|
||||
Public API for tracking and querying post (tweet) metrics via ClickHouse.
|
||||
|
||||
## Tracking events
|
||||
|
||||
Tracking calls are non-blocking — events are handed off to the in-memory
|
||||
`Mixer.Metrics.Buffer` GenServer and written to ClickHouse in batches.
|
||||
|
||||
# Record a tweet view (anonymous)
|
||||
Mixer.Metrics.track_view(tweet_id)
|
||||
|
||||
# Record a view with a logged-in user and their IP
|
||||
Mixer.Metrics.track_view(tweet_id, user_id: user.id, ip_address: conn.remote_ip)
|
||||
|
||||
## Querying metrics
|
||||
|
||||
Query functions execute synchronous ClickHouse SQL and return plain maps.
|
||||
|
||||
{:ok, summary} = Mixer.Metrics.get_summary(tweet_id)
|
||||
# => %{views: 42, likes: 7, unlikes: 1, comments: 3, shares: 0}
|
||||
|
||||
{:ok, rows} = Mixer.Metrics.get_top_posts(10)
|
||||
# => [%{tweet_id: "...", views: 99}, ...]
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
alias Mixer.ClickhouseRepo
|
||||
alias Mixer.Metrics.Buffer
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Event types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@type event_type :: :view | :like | :unlike | :comment | :share
|
||||
|
||||
@type track_opt ::
|
||||
{:user_id, binary() | nil}
|
||||
| {:ip_address, binary() | :inet.ip_address() | nil}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tracking helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Track a tweet view event.
|
||||
|
||||
## Options
|
||||
|
||||
* `:user_id` — UUID of the viewing user (nil for anonymous)
|
||||
* `:ip_address` — originating IP; accepts a string or an `:inet` tuple
|
||||
"""
|
||||
@spec track_view(binary(), [track_opt()]) :: :ok
|
||||
def track_view(tweet_id, opts \\ []), do: enqueue("view", tweet_id, opts)
|
||||
|
||||
@doc "Track a tweet like event."
|
||||
@spec track_like(binary(), [track_opt()]) :: :ok
|
||||
def track_like(tweet_id, opts \\ []), do: enqueue("like", tweet_id, opts)
|
||||
|
||||
@doc "Track a tweet unlike event."
|
||||
@spec track_unlike(binary(), [track_opt()]) :: :ok
|
||||
def track_unlike(tweet_id, opts \\ []), do: enqueue("unlike", tweet_id, opts)
|
||||
|
||||
@doc "Track a comment (reply) event on a tweet."
|
||||
@spec track_comment(binary(), [track_opt()]) :: :ok
|
||||
def track_comment(tweet_id, opts \\ []), do: enqueue("comment", tweet_id, opts)
|
||||
|
||||
@doc "Track a tweet share / repost event."
|
||||
@spec track_share(binary(), [track_opt()]) :: :ok
|
||||
def track_share(tweet_id, opts \\ []), do: enqueue("share", tweet_id, opts)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Query helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Return a summary of all event counts for a single tweet.
|
||||
|
||||
Returns `{:ok, map}` on success or `{:error, reason}` on failure.
|
||||
|
||||
## Example
|
||||
|
||||
{:ok, %{views: 12, likes: 3, unlikes: 0, comments: 5, shares: 1}} =
|
||||
Mixer.Metrics.get_summary(tweet_id)
|
||||
"""
|
||||
@spec get_summary(binary()) :: {:ok, map()} | {:error, term()}
|
||||
def get_summary(tweet_id) do
|
||||
sql = """
|
||||
SELECT
|
||||
countIf(event_type = 'view') AS views,
|
||||
countIf(event_type = 'like') AS likes,
|
||||
countIf(event_type = 'unlike') AS unlikes,
|
||||
countIf(event_type = 'comment') AS comments,
|
||||
countIf(event_type = 'share') AS shares
|
||||
FROM post_events
|
||||
WHERE tweet_id = {tweet_id:String}
|
||||
"""
|
||||
|
||||
case ClickhouseRepo.query(sql, %{"tweet_id" => tweet_id}) do
|
||||
{:ok, result} ->
|
||||
{:ok, row_to_summary(result)}
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Mixer.Metrics] get_summary failed for #{tweet_id}: #{inspect(reason)}")
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Return view counts bucketed by UTC hour for the past `hours` hours.
|
||||
|
||||
Useful for rendering a sparkline on a tweet detail page.
|
||||
|
||||
## Example
|
||||
|
||||
{:ok, rows} = Mixer.Metrics.get_hourly_views(tweet_id, 24)
|
||||
# => [%{hour: ~N[2026-04-07 00:00:00], views: 5}, ...]
|
||||
"""
|
||||
@spec get_hourly_views(binary(), pos_integer()) :: {:ok, [map()]} | {:error, term()}
|
||||
def get_hourly_views(tweet_id, hours \\ 24) when is_integer(hours) and hours > 0 do
|
||||
sql = """
|
||||
SELECT
|
||||
toStartOfHour(occurred_at) AS hour,
|
||||
count() AS views
|
||||
FROM post_events
|
||||
WHERE
|
||||
tweet_id = {tweet_id:String}
|
||||
AND event_type = 'view'
|
||||
AND occurred_at >= now() - toIntervalHour({hours:UInt32})
|
||||
GROUP BY hour
|
||||
ORDER BY hour ASC
|
||||
"""
|
||||
|
||||
case ClickhouseRepo.query(sql, %{"tweet_id" => tweet_id, "hours" => hours}) do
|
||||
{:ok, %{rows: rows}} ->
|
||||
{:ok, Enum.map(rows, fn [hour, views] -> %{hour: hour, views: views} end)}
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Mixer.Metrics] get_hourly_views failed: #{inspect(reason)}")
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Return the top `limit` tweets ordered by total view count across all time.
|
||||
|
||||
## Example
|
||||
|
||||
{:ok, rows} = Mixer.Metrics.get_top_posts(10)
|
||||
# => [%{tweet_id: "...", views: 99}, %{tweet_id: "...", views: 72}, ...]
|
||||
"""
|
||||
@spec get_top_posts(pos_integer()) :: {:ok, [map()]} | {:error, term()}
|
||||
def get_top_posts(limit \\ 10) when is_integer(limit) and limit > 0 do
|
||||
sql = """
|
||||
SELECT
|
||||
tweet_id,
|
||||
countIf(event_type = 'view') AS views
|
||||
FROM post_events
|
||||
GROUP BY tweet_id
|
||||
ORDER BY views DESC
|
||||
LIMIT {limit:UInt32}
|
||||
"""
|
||||
|
||||
case ClickhouseRepo.query(sql, %{"limit" => limit}) do
|
||||
{:ok, %{rows: rows}} ->
|
||||
{:ok, Enum.map(rows, fn [tweet_id, views] -> %{tweet_id: tweet_id, views: views} end)}
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Mixer.Metrics] get_top_posts failed: #{inspect(reason)}")
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Return per-event-type counts for a list of tweet IDs in a single query.
|
||||
|
||||
Handy for batch-enriching a feed with metrics without N+1 queries.
|
||||
|
||||
## Example
|
||||
|
||||
{:ok, map} = Mixer.Metrics.get_bulk_summaries(tweet_ids)
|
||||
# => %{"<uuid>" => %{views: 5, likes: 2, ...}, ...}
|
||||
"""
|
||||
@spec get_bulk_summaries([binary()]) :: {:ok, %{binary() => map()}} | {:error, term()}
|
||||
def get_bulk_summaries([]), do: {:ok, %{}}
|
||||
|
||||
def get_bulk_summaries(tweet_ids) when is_list(tweet_ids) do
|
||||
# ecto_ch supports passing arrays as query parameters
|
||||
sql = """
|
||||
SELECT
|
||||
tweet_id,
|
||||
countIf(event_type = 'view') AS views,
|
||||
countIf(event_type = 'like') AS likes,
|
||||
countIf(event_type = 'unlike') AS unlikes,
|
||||
countIf(event_type = 'comment') AS comments,
|
||||
countIf(event_type = 'share') AS shares
|
||||
FROM post_events
|
||||
WHERE tweet_id IN {tweet_ids:Array(String)}
|
||||
GROUP BY tweet_id
|
||||
"""
|
||||
|
||||
case ClickhouseRepo.query(sql, %{"tweet_ids" => tweet_ids}) do
|
||||
{:ok, %{rows: rows}} ->
|
||||
summaries =
|
||||
Map.new(rows, fn [tweet_id, views, likes, unlikes, comments, shares] ->
|
||||
{tweet_id,
|
||||
%{
|
||||
views: views,
|
||||
likes: likes,
|
||||
unlikes: unlikes,
|
||||
comments: comments,
|
||||
shares: shares
|
||||
}}
|
||||
end)
|
||||
|
||||
{:ok, summaries}
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Mixer.Metrics] get_bulk_summaries failed: #{inspect(reason)}")
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
defp enqueue(event_type, tweet_id, opts) do
|
||||
event = %{
|
||||
event_type: event_type,
|
||||
tweet_id: tweet_id,
|
||||
user_id: Keyword.get(opts, :user_id),
|
||||
occurred_at: DateTime.utc_now() |> DateTime.truncate(:second),
|
||||
ip_address: opts |> Keyword.get(:ip_address) |> format_ip()
|
||||
}
|
||||
|
||||
Buffer.track(event)
|
||||
end
|
||||
|
||||
defp format_ip(nil), do: nil
|
||||
defp format_ip(ip) when is_binary(ip), do: ip
|
||||
|
||||
defp format_ip({a, b, c, d}), do: "#{a}.#{b}.#{c}.#{d}"
|
||||
|
||||
defp format_ip({a, b, c, d, e, f, g, h}) do
|
||||
[a, b, c, d, e, f, g, h]
|
||||
|> Enum.map_join(":", &Integer.to_string(&1, 16))
|
||||
end
|
||||
|
||||
defp row_to_summary(%{rows: [[views, likes, unlikes, comments, shares] | _]}) do
|
||||
%{
|
||||
views: views,
|
||||
likes: likes,
|
||||
unlikes: unlikes,
|
||||
comments: comments,
|
||||
shares: shares
|
||||
}
|
||||
end
|
||||
|
||||
# ClickHouse returns no rows when the tweet has zero events — default to 0
|
||||
defp row_to_summary(_), do: %{views: 0, likes: 0, unlikes: 0, comments: 0, shares: 0}
|
||||
end
|
||||
147
lib/mixer/metrics/buffer.ex
Normal file
147
lib/mixer/metrics/buffer.ex
Normal file
@@ -0,0 +1,147 @@
|
||||
defmodule Mixer.Metrics.Buffer do
|
||||
@moduledoc """
|
||||
GenServer that accumulates post metric events in memory and flushes them
|
||||
to ClickHouse in batches.
|
||||
|
||||
Two conditions trigger a flush:
|
||||
|
||||
1. **Timer** — every `@flush_interval` milliseconds (default 10 s).
|
||||
2. **Threshold** — whenever the in-memory buffer reaches `@max_buffer_size`
|
||||
rows (default 500).
|
||||
|
||||
If ClickHouse is unavailable the error is logged and the buffered events
|
||||
are discarded rather than retried indefinitely, preventing unbounded memory
|
||||
growth. For production deployments that require durability, consider adding
|
||||
a persistent queue in front of this buffer.
|
||||
"""
|
||||
|
||||
use GenServer
|
||||
|
||||
require Logger
|
||||
|
||||
alias Mixer.Metrics.PostEvent
|
||||
|
||||
@flush_interval :timer.seconds(10)
|
||||
@max_buffer_size 500
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Start the buffer process and link it to the calling process.
|
||||
|
||||
Accepts an optional keyword list of overrides:
|
||||
|
||||
* `:flush_interval` — milliseconds between scheduled flushes
|
||||
* `:max_buffer_size` — row count that triggers an immediate flush
|
||||
"""
|
||||
@spec start_link(keyword()) :: GenServer.on_start()
|
||||
def start_link(opts \\ []) do
|
||||
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Enqueue a single analytics event map for buffered insertion into ClickHouse.
|
||||
|
||||
The map must contain at minimum the fields required by `Mixer.Metrics.PostEvent`:
|
||||
`:event_type`, `:tweet_id`, `:occurred_at`. Other fields are optional.
|
||||
|
||||
This call is asynchronous (cast) and returns `:ok` immediately.
|
||||
"""
|
||||
@spec track(map()) :: :ok
|
||||
def track(event) when is_map(event) do
|
||||
GenServer.cast(__MODULE__, {:track, event})
|
||||
end
|
||||
|
||||
@doc """
|
||||
Force an immediate flush of all buffered events to ClickHouse, regardless
|
||||
of the timer or threshold. Returns `:ok` after the flush completes.
|
||||
|
||||
Primarily useful in tests.
|
||||
"""
|
||||
@spec flush() :: :ok
|
||||
def flush do
|
||||
GenServer.call(__MODULE__, :flush)
|
||||
end
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GenServer callbacks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@impl GenServer
|
||||
def init(opts) do
|
||||
flush_interval = Keyword.get(opts, :flush_interval, @flush_interval)
|
||||
max_buffer_size = Keyword.get(opts, :max_buffer_size, @max_buffer_size)
|
||||
|
||||
schedule_flush(flush_interval)
|
||||
|
||||
state = %{
|
||||
events: [],
|
||||
count: 0,
|
||||
flush_interval: flush_interval,
|
||||
max_buffer_size: max_buffer_size
|
||||
}
|
||||
|
||||
{:ok, state}
|
||||
end
|
||||
|
||||
@impl GenServer
|
||||
def handle_cast({:track, event}, state) do
|
||||
new_count = state.count + 1
|
||||
new_events = [event | state.events]
|
||||
|
||||
if new_count >= state.max_buffer_size do
|
||||
do_flush(new_events)
|
||||
{:noreply, %{state | events: [], count: 0}}
|
||||
else
|
||||
{:noreply, %{state | events: new_events, count: new_count}}
|
||||
end
|
||||
end
|
||||
|
||||
@impl GenServer
|
||||
def handle_call(:flush, _from, state) do
|
||||
do_flush(state.events)
|
||||
{:reply, :ok, %{state | events: [], count: 0}}
|
||||
end
|
||||
|
||||
@impl GenServer
|
||||
def handle_info(:flush, state) do
|
||||
do_flush(state.events)
|
||||
schedule_flush(state.flush_interval)
|
||||
{:noreply, %{state | events: [], count: 0}}
|
||||
end
|
||||
|
||||
@impl GenServer
|
||||
def terminate(_reason, state) do
|
||||
# Best-effort flush on shutdown so we don't lose buffered events during
|
||||
# graceful stops (e.g., deploys).
|
||||
do_flush(state.events)
|
||||
:ok
|
||||
end
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
defp do_flush([]), do: :ok
|
||||
|
||||
defp do_flush(events) do
|
||||
rows = Enum.reverse(events)
|
||||
|
||||
try do
|
||||
{count, _} = Mixer.ClickhouseRepo.insert_all(PostEvent, rows)
|
||||
Logger.debug("[Mixer.Metrics.Buffer] Flushed #{count} event(s) to ClickHouse")
|
||||
rescue
|
||||
error ->
|
||||
Logger.error(
|
||||
"[Mixer.Metrics.Buffer] Failed to flush #{length(rows)} event(s) to ClickHouse: " <>
|
||||
Exception.message(error)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
defp schedule_flush(interval) do
|
||||
Process.send_after(self(), :flush, interval)
|
||||
end
|
||||
end
|
||||
40
lib/mixer/metrics/post_event.ex
Normal file
40
lib/mixer/metrics/post_event.ex
Normal file
@@ -0,0 +1,40 @@
|
||||
defmodule Mixer.Metrics.PostEvent do
|
||||
@moduledoc """
|
||||
Ecto schema that maps to the `post_events` table in ClickHouse.
|
||||
|
||||
Each row represents a single analytics event tied to a tweet (post).
|
||||
The table uses a MergeTree engine ordered by `(occurred_at, event_type,
|
||||
tweet_id)` for efficient time-range scans and per-tweet aggregations.
|
||||
|
||||
## Event types
|
||||
|
||||
| event_type | Description |
|
||||
|-------------|------------------------------------------|
|
||||
| `"view"` | A tweet was displayed to a user |
|
||||
| `"like"` | A user liked a tweet |
|
||||
| `"unlike"` | A user removed their like from a tweet |
|
||||
| `"comment"` | A user replied to a tweet |
|
||||
| `"share"` | A user shared / reposted a tweet |
|
||||
"""
|
||||
|
||||
use Ecto.Schema
|
||||
|
||||
@primary_key false
|
||||
|
||||
schema "post_events" do
|
||||
# LowCardinality(String) in ClickHouse — keep values in the set above
|
||||
field :event_type, :string
|
||||
|
||||
# The tweet that the event relates to
|
||||
field :tweet_id, Ecto.UUID
|
||||
|
||||
# The acting user; may be nil for anonymous views
|
||||
field :user_id, Ecto.UUID
|
||||
|
||||
# Wall-clock time of the event (UTC, second precision)
|
||||
field :occurred_at, :utc_datetime
|
||||
|
||||
# Optional originating IP, useful for deduplicating anonymous views
|
||||
field :ip_address, :string
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user