Close TD-10 git command timeouts

This commit is contained in:
2026-06-12 12:31:23 +02:00
parent bad656924b
commit f088cfb77b
4 changed files with 300 additions and 26 deletions

View File

@@ -414,7 +414,18 @@ cleanup can trap exits locally. Keep the state bookkeeping identical
**Acceptance.** A test worker with `trap_exit` observes `:shutdown` and runs **Acceptance.** A test worker with `trap_exit` observes `:shutdown` and runs
its cleanup before dying; cancelled tasks still free a concurrency slot. its cleanup before dying; cancelled tasks still free a concurrency slot.
### TD-10: Timeouts for external `git` commands ### TD-10: Timeouts for external `git` commands ✅ DONE (2026-06-12)
**Status: implemented.** `BDS.Git` now applies config-driven timeout budgets
to every shell-out (`config :bds, :git` with `:local_timeout_ms` defaulting to
15s and `:network_timeout_ms` defaulting to 120s for fetch/pull/push). The
default runner no longer uses unbounded `System.cmd/3`; it launches the child
through `Port.open/2`, collects stdout/stderr, and on timeout closes the port
and explicitly terminates the OS process if it is still alive. Timeout results
surface as structured `%{kind: :timeout, operation:, timeout_ms:, message:}`
errors, while existing structured auth guidance is preserved. Acceptance proof
now includes a bounded runner-stub timeout test and a real-process cleanup test
that launches `sleep`, times out, and asserts the spawned PID is gone.
**Context.** `BDS.Git` shells out via `System.cmd`, which has **no timeout**. **Context.** `BDS.Git` shells out via `System.cmd`, which has **no timeout**.
`GIT_TERMINAL_PROMPT=0` and SSH BatchMode prevent interactive hangs, but a `GIT_TERMINAL_PROMPT=0` and SSH BatchMode prevent interactive hangs, but a

View File

@@ -78,6 +78,10 @@ config :bds, :chat,
stream_emit_interval_ms: 100, stream_emit_interval_ms: 100,
await_timeout_margin_ms: 5_000 await_timeout_margin_ms: 5_000
config :bds, :git,
local_timeout_ms: 15_000,
network_timeout_ms: 120_000
config :bds, :embeddings, config :bds, :embeddings,
backend: BDS.Embeddings.Backends.Neural, backend: BDS.Embeddings.Backends.Neural,
model_id: "Xenova/multilingual-e5-small", model_id: "Xenova/multilingual-e5-small",

View File

@@ -3,6 +3,9 @@ defmodule BDS.Git do
alias BDS.Projects alias BDS.Projects
@default_local_timeout_ms 15_000
@default_network_timeout_ms 120_000
@lfs_patterns [ @lfs_patterns [
"*.jpg", "*.jpg",
"*.jpeg", "*.jpeg",
@@ -94,9 +97,10 @@ defmodule BDS.Git do
when is_binary(project_id) and is_binary(file_path) and is_list(opts) do when is_binary(project_id) and is_binary(file_path) and is_list(opts) do
with {:ok, project_dir} <- project_dir(project_id) do with {:ok, project_dir} <- project_dir(project_id) do
runner = Keyword.get(opts, :runner, &system_runner/3) runner = Keyword.get(opts, :runner, &system_runner/3)
timeout_ms = git_timeout_ms(["show"], opts)
original = original =
case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir)) do case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir, timeout_ms)) do
{output, 0} -> output {output, 0} -> output
{_output, _status} -> "" {_output, _status} -> ""
end end
@@ -119,12 +123,8 @@ defmodule BDS.Git do
project_dir, project_dir,
["log", "--date=short", "--format=%H%x09%an%x09%ad%x09%s", branch], ["log", "--date=short", "--format=%H%x09%an%x09%ad%x09%s", branch],
opts opts
) do ),
remote_log = {:ok, remote_log} <- remote_history_log(project_dir, branch, opts) do
case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do
{:ok, output} -> output
{:error, {:git_failed, _message}} -> ""
end
local_commits = parse_history_log(local_log) local_commits = parse_history_log(local_log)
remote_hashes = MapSet.new(parse_remote_history(remote_log)) remote_hashes = MapSet.new(parse_remote_history(remote_log))
@@ -166,8 +166,8 @@ defmodule BDS.Git do
{:ok, output} -> {:ok, output} ->
{:ok, %{updated: true, output: output}} {:ok, %{updated: true, output: output}}
{:error, {:git_failed, message}} -> {:error, reason} ->
structured_git_error(project_dir, :fetch, message, opts) structured_git_error(project_dir, :fetch, reason, opts)
end end
end end
end end
@@ -244,15 +244,21 @@ defmodule BDS.Git do
}} }}
{:ok, upstream_branch} -> {:ok, upstream_branch} ->
with {:ok, ahead} <- revision_count(project_dir, "#{upstream_branch}..HEAD", opts),
{:ok, behind} <- revision_count(project_dir, "HEAD..#{upstream_branch}", opts) do
{:ok, {:ok,
%{ %{
local_branch: local_branch, local_branch: local_branch,
upstream_branch: upstream_branch, upstream_branch: upstream_branch,
has_upstream: true, has_upstream: true,
ahead: revision_count(project_dir, "#{upstream_branch}..HEAD", opts), ahead: ahead,
behind: revision_count(project_dir, "HEAD..#{upstream_branch}", opts) behind: behind
}} }}
end end
{:error, _reason} = error ->
error
end
end end
end end
@@ -304,6 +310,15 @@ defmodule BDS.Git do
case run_git(project_dir, ["remote", "get-url", "origin"], opts) do case run_git(project_dir, ["remote", "get-url", "origin"], opts) do
{:ok, output} -> {:ok, blank_to_nil(output)} {:ok, output} -> {:ok, blank_to_nil(output)}
{:error, {:git_failed, _message}} -> {:ok, nil} {:error, {:git_failed, _message}} -> {:ok, nil}
{:error, _reason} = error -> error
end
end
defp remote_history_log(project_dir, branch, opts) do
case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do
{:ok, output} -> {:ok, output}
{:error, {:git_failed, _message}} -> {:ok, ""}
{:error, _reason} = error -> error
end end
end end
@@ -311,6 +326,7 @@ defmodule BDS.Git do
case run_git(project_dir, ["lfs", "ls-files"], opts) do case run_git(project_dir, ["lfs", "ls-files"], opts) do
{:ok, _output} -> {:ok, true} {:ok, _output} -> {:ok, true}
{:error, {:git_failed, _message}} -> {:ok, false} {:error, {:git_failed, _message}} -> {:ok, false}
{:error, _reason} = error -> error
end end
end end
@@ -332,25 +348,53 @@ defmodule BDS.Git do
end end
defp run_git(project_dir, args, opts) do defp run_git(project_dir, args, opts) do
runner = Keyword.get(opts, :runner, &system_runner/3) timeout_ms = git_timeout_ms(args, opts)
command_opts = command_opts(project_dir, timeout_ms)
command = git_command(opts)
command_args = git_command_args(args, opts)
case runner.("git", args, command_opts(project_dir)) do result =
case Keyword.fetch(opts, :runner) do
{:ok, runner} -> run_runner_with_timeout(runner, command, command_args, command_opts)
:error -> system_runner(command, command_args, command_opts)
end
case result do
{:timeout, output} -> {:error, timeout_error(args, timeout_ms, output)}
{output, 0} -> {:ok, String.trim_trailing(output)} {output, 0} -> {:ok, String.trim_trailing(output)}
{output, _status} -> {:error, {:git_failed, String.trim(output)}} {output, _status} -> {:error, {:git_failed, String.trim(output)}}
end end
end end
defp system_runner(command, args, opts) do defp system_runner(command, args, opts) do
env = opts |> Keyword.get(:env, %{}) |> Enum.to_list()
cwd = Keyword.fetch!(opts, :cd) cwd = Keyword.fetch!(opts, :cd)
System.cmd(command, args, cd: cwd, env: env, stderr_to_stdout: true) timeout_ms = Keyword.fetch!(opts, :timeout_ms)
executable = System.find_executable(command) || raise "missing executable: #{command}"
port =
Port.open(
{:spawn_executable, executable},
[
:binary,
:stderr_to_stdout,
:exit_status,
:use_stdio,
:hide,
{:cd, cwd},
{:env, port_env(opts |> Keyword.get(:env, %{}))},
{:args, args}
]
)
receive_port_result(port, [], timeout_ms)
end end
defp command_opts(project_dir) do defp command_opts(project_dir, timeout_ms) do
ssh_command = "ssh -oBatchMode=yes" ssh_command = "ssh -oBatchMode=yes"
[ [
cd: project_dir, cd: project_dir,
timeout_ms: timeout_ms,
env: %{ env: %{
"GIT_TERMINAL_PROMPT" => "0", "GIT_TERMINAL_PROMPT" => "0",
"GCM_INTERACTIVE" => "never", "GCM_INTERACTIVE" => "never",
@@ -367,6 +411,7 @@ defmodule BDS.Git do
) do ) do
{:ok, output} -> {:ok, blank_to_nil(output)} {:ok, output} -> {:ok, blank_to_nil(output)}
{:error, {:git_failed, _message}} -> {:ok, nil} {:error, {:git_failed, _message}} -> {:ok, nil}
{:error, _reason} = error -> error
end end
end end
@@ -465,8 +510,151 @@ defmodule BDS.Git do
defp revision_count(project_dir, revision_range, opts) do defp revision_count(project_dir, revision_range, opts) do
case run_git(project_dir, ["rev-list", "--count", revision_range], opts) do case run_git(project_dir, ["rev-list", "--count", revision_range], opts) do
{:ok, output} -> parse_count(output) {:ok, output} -> {:ok, parse_count(output)}
{:error, {:git_failed, _message}} -> 0 {:error, {:git_failed, _message}} -> {:ok, 0}
{:error, _reason} = error -> error
end
end
defp run_runner_with_timeout(runner, command, args, opts) do
timeout_ms = Keyword.fetch!(opts, :timeout_ms)
task = Task.async(fn -> runner.(command, args, opts) end)
case Task.yield(task, timeout_ms) || Task.shutdown(task, :brutal_kill) do
{:ok, result} -> result
nil -> {:timeout, ""}
end
end
defp receive_port_result(port, output, timeout_ms) do
receive do
{^port, {:data, data}} ->
receive_port_result(port, [data | output], timeout_ms)
{^port, {:exit_status, status}} ->
{iodata_to_output(output), status}
after
timeout_ms ->
{:timeout, terminate_port(port, output)}
end
end
defp terminate_port(port, output) do
os_pid = port_os_pid(port)
safe_port_close(port)
output = drain_port_messages(port, output, 25)
maybe_kill_os_process(os_pid)
drain_port_messages(port, output, 25)
|> iodata_to_output()
end
defp drain_port_messages(port, output, timeout_ms) do
receive do
{^port, {:data, data}} -> drain_port_messages(port, [data | output], timeout_ms)
{^port, {:exit_status, _status}} -> output
after
timeout_ms -> output
end
end
defp safe_port_close(port) do
Port.close(port)
catch
:error, _reason -> :ok
end
defp port_os_pid(port) do
case Port.info(port, :os_pid) do
{:os_pid, os_pid} when is_integer(os_pid) -> os_pid
_other -> nil
end
end
defp maybe_kill_os_process(nil), do: :ok
defp maybe_kill_os_process(os_pid) when is_integer(os_pid) do
if os_process_alive?(os_pid) do
System.cmd("kill", ["-TERM", Integer.to_string(os_pid)], stderr_to_stdout: true)
if os_process_alive?(os_pid) do
System.cmd("kill", ["-KILL", Integer.to_string(os_pid)], stderr_to_stdout: true)
end
end
:ok
end
defp os_process_alive?(os_pid) when is_integer(os_pid) do
{_output, exit_code} =
System.cmd("kill", ["-0", Integer.to_string(os_pid)], stderr_to_stdout: true)
exit_code == 0
end
defp iodata_to_output(output) do
output
|> Enum.reverse()
|> IO.iodata_to_binary()
end
defp port_env(env) do
Enum.map(env, fn {key, value} ->
{String.to_charlist(to_string(key)), String.to_charlist(to_string(value))}
end)
end
defp git_timeout_ms(args, opts) do
Keyword.get(opts, :timeout_ms) ||
if(network_git_command?(args),
do: git_timeout_config(:network_timeout_ms, @default_network_timeout_ms),
else: git_timeout_config(:local_timeout_ms, @default_local_timeout_ms)
)
end
defp git_command(opts), do: Keyword.get(opts, :command, "git")
defp git_command_args(args, opts) do
Keyword.get(opts, :command_args, []) ++ args
end
defp git_timeout_config(key, default) do
:bds
|> Application.get_env(:git, [])
|> Keyword.get(key, default)
end
defp network_git_command?([operation | _rest]), do: operation in ["fetch", "pull", "push"]
defp timeout_error(args, timeout_ms, output) do
operation = git_operation(args)
%{
kind: :timeout,
operation: operation,
timeout_ms: timeout_ms,
message: "Git #{operation} timed out after #{timeout_ms}ms",
output: String.trim(output)
}
end
defp git_operation([operation | _rest]) do
case operation do
"add" -> :add
"commit" -> :commit
"diff" -> :diff
"fetch" -> :fetch
"log" -> :log
"lfs" -> :lfs
"pull" -> :pull
"push" -> :push
"remote" -> :remote
"rev-list" -> :rev_list
"rev-parse" -> :rev_parse
"show" -> :show
"status" -> :status
_other -> :git
end end
end end
@@ -482,10 +670,17 @@ defmodule BDS.Git do
defp category_for_path("templates/" <> _rest), do: :templates defp category_for_path("templates/" <> _rest), do: :templates
defp category_for_path(_path), do: nil defp category_for_path(_path), do: nil
defp structured_git_error(project_dir, _operation, message, opts) do defp structured_git_error(_project_dir, _operation, %{} = error, _opts), do: {:error, error}
defp structured_git_error(project_dir, operation, {:git_failed, message}, opts) do
structured_git_error(project_dir, operation, message, opts)
end
defp structured_git_error(project_dir, _operation, message, opts) when is_binary(message) do
provider = provider =
case remote_url(project_dir, opts) do case remote_url(project_dir, opts) do
{:ok, remote} -> provider_info(remote) {:ok, remote} -> provider_info(remote)
{:error, _reason} -> nil
end end
if auth_error?(message) do if auth_error?(message) do

View File

@@ -212,10 +212,74 @@ defmodule BDS.GitTest do
assert is_binary(error.guidance) assert is_binary(error.guidance)
end end
test "fetch returns a structured timeout error when a runner exceeds the deadline", %{
project: project
} do
with_git_timeouts(20, 20, fn ->
runner = fn _command, _args, _opts ->
Process.sleep(100)
{"", 0}
end
assert {:error, error} = Git.fetch(project.id, runner: runner)
assert %{kind: :timeout, operation: :fetch, timeout_ms: 20, message: _message} = error
assert error.message =~ "timed out"
end)
end
test "default runner kills the spawned git process when fetch times out", %{
project: project,
temp_root: temp_root
} do
pid_file = Path.join(temp_root, "git-timeout.pid")
sleep_executable = System.find_executable("sleep") || raise "missing sleep executable"
shell_executable = System.find_executable("sh") || raise "missing sh executable"
shell_script = "echo \"$$\" > \"#{pid_file}\"; exec \"#{sleep_executable}\" 2"
with_git_timeouts(50, 50, fn ->
assert {:error, error} =
Git.fetch(project.id,
command: shell_executable,
command_args: ["-c", shell_script]
)
assert %{kind: :timeout, operation: :fetch} = error
pid = pid_file |> File.read!() |> String.trim() |> String.to_integer()
refute os_process_alive?(pid)
end)
end
defp fake_runner(handler) do defp fake_runner(handler) do
fn command, args, opts -> handler.(command, args, opts) end fn command, args, opts -> handler.(command, args, opts) end
end end
defp with_git_timeouts(local_timeout_ms, network_timeout_ms, fun) do
original = Application.get_env(:bds, :git)
Application.put_env(:bds, :git,
local_timeout_ms: local_timeout_ms,
network_timeout_ms: network_timeout_ms
)
try do
fun.()
after
if original == nil do
Application.delete_env(:bds, :git)
else
Application.put_env(:bds, :git, original)
end
end
end
defp os_process_alive?(pid) when is_integer(pid) do
{_output, exit_code} =
System.cmd("kill", ["-0", Integer.to_string(pid)], stderr_to_stdout: true)
exit_code == 0
end
defp init_git_repo!(project_dir, message) do defp init_git_repo!(project_dir, message) do
run_git!(project_dir, ["init", "-b", "master"]) run_git!(project_dir, ["init", "-b", "master"])
run_git!(project_dir, ["config", "user.name", "bDS Tests"]) run_git!(project_dir, ["config", "user.name", "bDS Tests"])