Connection pool: Add client death tracking

While running this in production I noticed a number of ghost
processes with all their clients dead before they released the connection,
so let's track them to log it and remove them from clients
This commit is contained in:
rinpatch 2020-05-08 18:18:59 +03:00
parent e94ba05e52
commit 1b15cb066c
2 changed files with 45 additions and 2 deletions

View File

@ -20,7 +20,10 @@ def init([key, uri, opts, client_pid]) do
end) end)
send(client_pid, {:conn_pid, conn_pid}) send(client_pid, {:conn_pid, conn_pid})
{:ok, %{key: key, timer: nil}, :hibernate}
{:ok,
%{key: key, timer: nil, client_monitors: %{client_pid => Process.monitor(client_pid)}},
:hibernate}
else else
err -> {:stop, err} err -> {:stop, err}
end end
@ -45,6 +48,9 @@ def handle_cast({:add_client, client_pid, send_pid_back}, %{key: key} = state) d
state state
end end
ref = Process.monitor(client_pid)
state = put_in(state.client_monitors[client_pid], ref)
{:noreply, state, :hibernate} {:noreply, state, :hibernate}
end end
@ -55,6 +61,9 @@ def handle_cast({:remove_client, client_pid}, %{key: key} = state) do
{conn_pid, List.delete(used_by, client_pid), crf, last_reference} {conn_pid, List.delete(used_by, client_pid), crf, last_reference}
end) end)
{ref, state} = pop_in(state.client_monitors[client_pid])
Process.demonitor(ref)
timer = timer =
if used_by == [] do if used_by == [] do
max_idle = Pleroma.Config.get([:connections_pool, :max_idle_time], 30_000) max_idle = Pleroma.Config.get([:connections_pool, :max_idle_time], 30_000)
@ -85,6 +94,26 @@ def handle_info({:gun_down, _pid, _protocol, _reason, _killed_streams} = down_me
{:stop, {:error, down_message}, state} {:stop, {:error, down_message}, state}
end end
@impl true
def handle_info({:DOWN, _ref, :process, pid, reason}, state) do
# Sometimes the client is dead before we demonitor it in :remove_client, so the message
# arrives anyway
case state.client_monitors[pid] do
nil ->
{:noreply, state, :hibernate}
_ref ->
:telemetry.execute(
[:pleroma, :connection_pool, :client_death],
%{client_pid: pid, reason: reason},
%{key: state.key}
)
handle_cast({:remove_client, pid}, state)
end
end
# LRFU policy: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.1478 # LRFU policy: https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.55.1478
defp crf(time_delta, prev_crf) do defp crf(time_delta, prev_crf) do
1 + :math.pow(0.5, time_delta / 100) * prev_crf 1 + :math.pow(0.5, time_delta / 100) * prev_crf

View File

@ -6,7 +6,8 @@ defmodule Pleroma.Telemetry.Logger do
@events [ @events [
[:pleroma, :connection_pool, :reclaim, :start], [:pleroma, :connection_pool, :reclaim, :start],
[:pleroma, :connection_pool, :reclaim, :stop], [:pleroma, :connection_pool, :reclaim, :stop],
[:pleroma, :connection_pool, :provision_failure] [:pleroma, :connection_pool, :provision_failure],
[:pleroma, :connection_pool, :client_death]
] ]
def attach do def attach do
:telemetry.attach_many("pleroma-logger", @events, &handle_event/4, []) :telemetry.attach_many("pleroma-logger", @events, &handle_event/4, [])
@ -59,4 +60,17 @@ def handle_event(
"Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion" "Connection pool had to refuse opening a connection to #{key} due to connection limit exhaustion"
end) end)
end end
def handle_event(
[:pleroma, :connection_pool, :client_death],
%{client_pid: client_pid, reason: reason},
%{key: key},
_
) do
Logger.warn(fn ->
"Pool worker for #{key}: Client #{inspect(client_pid)} died before releasing the connection with #{
inspect(reason)
}"
end)
end
end end