From f088cfb77bc3cd057cc0ebb6fbbe56418818cac5 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 12 Jun 2026 12:31:23 +0200 Subject: [PATCH] Close TD-10 git command timeouts --- TECHDEBTS.md | 13 ++- config/config.exs | 4 + lib/bds/git.ex | 245 +++++++++++++++++++++++++++++++++++++----- test/bds/git_test.exs | 64 +++++++++++ 4 files changed, 300 insertions(+), 26 deletions(-) diff --git a/TECHDEBTS.md b/TECHDEBTS.md index 92fbd0f..cbee6b8 100644 --- a/TECHDEBTS.md +++ b/TECHDEBTS.md @@ -414,7 +414,18 @@ cleanup can trap exits locally. Keep the state bookkeeping identical **Acceptance.** A test worker with `trap_exit` observes `:shutdown` and runs its cleanup before dying; cancelled tasks still free a concurrency slot. -### TD-10: Timeouts for external `git` commands +### TD-10: Timeouts for external `git` commands ✅ DONE (2026-06-12) + +**Status: implemented.** `BDS.Git` now applies config-driven timeout budgets +to every shell-out (`config :bds, :git` with `:local_timeout_ms` defaulting to +15s and `:network_timeout_ms` defaulting to 120s for fetch/pull/push). The +default runner no longer uses unbounded `System.cmd/3`; it launches the child +through `Port.open/2`, collects stdout/stderr, and on timeout closes the port +and explicitly terminates the OS process if it is still alive. Timeout results +surface as structured `%{kind: :timeout, operation:, timeout_ms:, message:}` +errors, while existing structured auth guidance is preserved. Acceptance proof +now includes a bounded runner-stub timeout test and a real-process cleanup test +that launches `sleep`, times out, and asserts the spawned PID is gone. **Context.** `BDS.Git` shells out via `System.cmd`, which has **no timeout**. `GIT_TERMINAL_PROMPT=0` and SSH BatchMode prevent interactive hangs, but a diff --git a/config/config.exs b/config/config.exs index f75836c..04d705b 100644 --- a/config/config.exs +++ b/config/config.exs @@ -78,6 +78,10 @@ config :bds, :chat, stream_emit_interval_ms: 100, await_timeout_margin_ms: 5_000 +config :bds, :git, + local_timeout_ms: 15_000, + network_timeout_ms: 120_000 + config :bds, :embeddings, backend: BDS.Embeddings.Backends.Neural, model_id: "Xenova/multilingual-e5-small", diff --git a/lib/bds/git.ex b/lib/bds/git.ex index 84186fd..96af3a5 100644 --- a/lib/bds/git.ex +++ b/lib/bds/git.ex @@ -3,6 +3,9 @@ defmodule BDS.Git do alias BDS.Projects + @default_local_timeout_ms 15_000 + @default_network_timeout_ms 120_000 + @lfs_patterns [ "*.jpg", "*.jpeg", @@ -94,9 +97,10 @@ defmodule BDS.Git do when is_binary(project_id) and is_binary(file_path) and is_list(opts) do with {:ok, project_dir} <- project_dir(project_id) do runner = Keyword.get(opts, :runner, &system_runner/3) + timeout_ms = git_timeout_ms(["show"], opts) original = - case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir)) do + case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir, timeout_ms)) do {output, 0} -> output {_output, _status} -> "" end @@ -119,12 +123,8 @@ defmodule BDS.Git do project_dir, ["log", "--date=short", "--format=%H%x09%an%x09%ad%x09%s", branch], opts - ) do - remote_log = - case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do - {:ok, output} -> output - {:error, {:git_failed, _message}} -> "" - end + ), + {:ok, remote_log} <- remote_history_log(project_dir, branch, opts) do local_commits = parse_history_log(local_log) remote_hashes = MapSet.new(parse_remote_history(remote_log)) @@ -166,8 +166,8 @@ defmodule BDS.Git do {:ok, output} -> {:ok, %{updated: true, output: output}} - {:error, {:git_failed, message}} -> - structured_git_error(project_dir, :fetch, message, opts) + {:error, reason} -> + structured_git_error(project_dir, :fetch, reason, opts) end end end @@ -244,14 +244,20 @@ defmodule BDS.Git do }} {:ok, upstream_branch} -> - {:ok, - %{ - local_branch: local_branch, - upstream_branch: upstream_branch, - has_upstream: true, - ahead: revision_count(project_dir, "#{upstream_branch}..HEAD", opts), - behind: revision_count(project_dir, "HEAD..#{upstream_branch}", opts) - }} + with {:ok, ahead} <- revision_count(project_dir, "#{upstream_branch}..HEAD", opts), + {:ok, behind} <- revision_count(project_dir, "HEAD..#{upstream_branch}", opts) do + {:ok, + %{ + local_branch: local_branch, + upstream_branch: upstream_branch, + has_upstream: true, + ahead: ahead, + behind: behind + }} + end + + {:error, _reason} = error -> + error end end end @@ -304,6 +310,15 @@ defmodule BDS.Git do case run_git(project_dir, ["remote", "get-url", "origin"], opts) do {:ok, output} -> {:ok, blank_to_nil(output)} {:error, {:git_failed, _message}} -> {:ok, nil} + {:error, _reason} = error -> error + end + end + + defp remote_history_log(project_dir, branch, opts) do + case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do + {:ok, output} -> {:ok, output} + {:error, {:git_failed, _message}} -> {:ok, ""} + {:error, _reason} = error -> error end end @@ -311,6 +326,7 @@ defmodule BDS.Git do case run_git(project_dir, ["lfs", "ls-files"], opts) do {:ok, _output} -> {:ok, true} {:error, {:git_failed, _message}} -> {:ok, false} + {:error, _reason} = error -> error end end @@ -332,25 +348,53 @@ defmodule BDS.Git do end defp run_git(project_dir, args, opts) do - runner = Keyword.get(opts, :runner, &system_runner/3) + timeout_ms = git_timeout_ms(args, opts) + command_opts = command_opts(project_dir, timeout_ms) + command = git_command(opts) + command_args = git_command_args(args, opts) - case runner.("git", args, command_opts(project_dir)) do + result = + case Keyword.fetch(opts, :runner) do + {:ok, runner} -> run_runner_with_timeout(runner, command, command_args, command_opts) + :error -> system_runner(command, command_args, command_opts) + end + + case result do + {:timeout, output} -> {:error, timeout_error(args, timeout_ms, output)} {output, 0} -> {:ok, String.trim_trailing(output)} {output, _status} -> {:error, {:git_failed, String.trim(output)}} end end defp system_runner(command, args, opts) do - env = opts |> Keyword.get(:env, %{}) |> Enum.to_list() cwd = Keyword.fetch!(opts, :cd) - System.cmd(command, args, cd: cwd, env: env, stderr_to_stdout: true) + timeout_ms = Keyword.fetch!(opts, :timeout_ms) + executable = System.find_executable(command) || raise "missing executable: #{command}" + + port = + Port.open( + {:spawn_executable, executable}, + [ + :binary, + :stderr_to_stdout, + :exit_status, + :use_stdio, + :hide, + {:cd, cwd}, + {:env, port_env(opts |> Keyword.get(:env, %{}))}, + {:args, args} + ] + ) + + receive_port_result(port, [], timeout_ms) end - defp command_opts(project_dir) do + defp command_opts(project_dir, timeout_ms) do ssh_command = "ssh -oBatchMode=yes" [ cd: project_dir, + timeout_ms: timeout_ms, env: %{ "GIT_TERMINAL_PROMPT" => "0", "GCM_INTERACTIVE" => "never", @@ -367,6 +411,7 @@ defmodule BDS.Git do ) do {:ok, output} -> {:ok, blank_to_nil(output)} {:error, {:git_failed, _message}} -> {:ok, nil} + {:error, _reason} = error -> error end end @@ -465,8 +510,151 @@ defmodule BDS.Git do defp revision_count(project_dir, revision_range, opts) do case run_git(project_dir, ["rev-list", "--count", revision_range], opts) do - {:ok, output} -> parse_count(output) - {:error, {:git_failed, _message}} -> 0 + {:ok, output} -> {:ok, parse_count(output)} + {:error, {:git_failed, _message}} -> {:ok, 0} + {:error, _reason} = error -> error + end + end + + defp run_runner_with_timeout(runner, command, args, opts) do + timeout_ms = Keyword.fetch!(opts, :timeout_ms) + task = Task.async(fn -> runner.(command, args, opts) end) + + case Task.yield(task, timeout_ms) || Task.shutdown(task, :brutal_kill) do + {:ok, result} -> result + nil -> {:timeout, ""} + end + end + + defp receive_port_result(port, output, timeout_ms) do + receive do + {^port, {:data, data}} -> + receive_port_result(port, [data | output], timeout_ms) + + {^port, {:exit_status, status}} -> + {iodata_to_output(output), status} + after + timeout_ms -> + {:timeout, terminate_port(port, output)} + end + end + + defp terminate_port(port, output) do + os_pid = port_os_pid(port) + safe_port_close(port) + + output = drain_port_messages(port, output, 25) + maybe_kill_os_process(os_pid) + + drain_port_messages(port, output, 25) + |> iodata_to_output() + end + + defp drain_port_messages(port, output, timeout_ms) do + receive do + {^port, {:data, data}} -> drain_port_messages(port, [data | output], timeout_ms) + {^port, {:exit_status, _status}} -> output + after + timeout_ms -> output + end + end + + defp safe_port_close(port) do + Port.close(port) + catch + :error, _reason -> :ok + end + + defp port_os_pid(port) do + case Port.info(port, :os_pid) do + {:os_pid, os_pid} when is_integer(os_pid) -> os_pid + _other -> nil + end + end + + defp maybe_kill_os_process(nil), do: :ok + + defp maybe_kill_os_process(os_pid) when is_integer(os_pid) do + if os_process_alive?(os_pid) do + System.cmd("kill", ["-TERM", Integer.to_string(os_pid)], stderr_to_stdout: true) + + if os_process_alive?(os_pid) do + System.cmd("kill", ["-KILL", Integer.to_string(os_pid)], stderr_to_stdout: true) + end + end + + :ok + end + + defp os_process_alive?(os_pid) when is_integer(os_pid) do + {_output, exit_code} = + System.cmd("kill", ["-0", Integer.to_string(os_pid)], stderr_to_stdout: true) + + exit_code == 0 + end + + defp iodata_to_output(output) do + output + |> Enum.reverse() + |> IO.iodata_to_binary() + end + + defp port_env(env) do + Enum.map(env, fn {key, value} -> + {String.to_charlist(to_string(key)), String.to_charlist(to_string(value))} + end) + end + + defp git_timeout_ms(args, opts) do + Keyword.get(opts, :timeout_ms) || + if(network_git_command?(args), + do: git_timeout_config(:network_timeout_ms, @default_network_timeout_ms), + else: git_timeout_config(:local_timeout_ms, @default_local_timeout_ms) + ) + end + + defp git_command(opts), do: Keyword.get(opts, :command, "git") + + defp git_command_args(args, opts) do + Keyword.get(opts, :command_args, []) ++ args + end + + defp git_timeout_config(key, default) do + :bds + |> Application.get_env(:git, []) + |> Keyword.get(key, default) + end + + defp network_git_command?([operation | _rest]), do: operation in ["fetch", "pull", "push"] + + defp timeout_error(args, timeout_ms, output) do + operation = git_operation(args) + + %{ + kind: :timeout, + operation: operation, + timeout_ms: timeout_ms, + message: "Git #{operation} timed out after #{timeout_ms}ms", + output: String.trim(output) + } + end + + defp git_operation([operation | _rest]) do + case operation do + "add" -> :add + "commit" -> :commit + "diff" -> :diff + "fetch" -> :fetch + "log" -> :log + "lfs" -> :lfs + "pull" -> :pull + "push" -> :push + "remote" -> :remote + "rev-list" -> :rev_list + "rev-parse" -> :rev_parse + "show" -> :show + "status" -> :status + _other -> :git end end @@ -482,10 +670,17 @@ defmodule BDS.Git do defp category_for_path("templates/" <> _rest), do: :templates defp category_for_path(_path), do: nil - defp structured_git_error(project_dir, _operation, message, opts) do + defp structured_git_error(_project_dir, _operation, %{} = error, _opts), do: {:error, error} + + defp structured_git_error(project_dir, operation, {:git_failed, message}, opts) do + structured_git_error(project_dir, operation, message, opts) + end + + defp structured_git_error(project_dir, _operation, message, opts) when is_binary(message) do provider = case remote_url(project_dir, opts) do {:ok, remote} -> provider_info(remote) + {:error, _reason} -> nil end if auth_error?(message) do diff --git a/test/bds/git_test.exs b/test/bds/git_test.exs index 490ea87..879db27 100644 --- a/test/bds/git_test.exs +++ b/test/bds/git_test.exs @@ -212,10 +212,74 @@ defmodule BDS.GitTest do assert is_binary(error.guidance) end + test "fetch returns a structured timeout error when a runner exceeds the deadline", %{ + project: project + } do + with_git_timeouts(20, 20, fn -> + runner = fn _command, _args, _opts -> + Process.sleep(100) + {"", 0} + end + + assert {:error, error} = Git.fetch(project.id, runner: runner) + assert %{kind: :timeout, operation: :fetch, timeout_ms: 20, message: _message} = error + assert error.message =~ "timed out" + end) + end + + test "default runner kills the spawned git process when fetch times out", %{ + project: project, + temp_root: temp_root + } do + pid_file = Path.join(temp_root, "git-timeout.pid") + sleep_executable = System.find_executable("sleep") || raise "missing sleep executable" + shell_executable = System.find_executable("sh") || raise "missing sh executable" + shell_script = "echo \"$$\" > \"#{pid_file}\"; exec \"#{sleep_executable}\" 2" + + with_git_timeouts(50, 50, fn -> + assert {:error, error} = + Git.fetch(project.id, + command: shell_executable, + command_args: ["-c", shell_script] + ) + + assert %{kind: :timeout, operation: :fetch} = error + + pid = pid_file |> File.read!() |> String.trim() |> String.to_integer() + refute os_process_alive?(pid) + end) + end + defp fake_runner(handler) do fn command, args, opts -> handler.(command, args, opts) end end + defp with_git_timeouts(local_timeout_ms, network_timeout_ms, fun) do + original = Application.get_env(:bds, :git) + + Application.put_env(:bds, :git, + local_timeout_ms: local_timeout_ms, + network_timeout_ms: network_timeout_ms + ) + + try do + fun.() + after + if original == nil do + Application.delete_env(:bds, :git) + else + Application.put_env(:bds, :git, original) + end + end + end + + defp os_process_alive?(pid) when is_integer(pid) do + {_output, exit_code} = + System.cmd("kill", ["-0", Integer.to_string(pid)], stderr_to_stdout: true) + + exit_code == 0 + end + defp init_git_repo!(project_dir, message) do run_git!(project_dir, ["init", "-b", "master"]) run_git!(project_dir, ["config", "user.name", "bDS Tests"])