Close TD-10 git command timeouts
This commit is contained in:
13
TECHDEBTS.md
13
TECHDEBTS.md
@@ -414,7 +414,18 @@ cleanup can trap exits locally. Keep the state bookkeeping identical
|
||||
**Acceptance.** A test worker with `trap_exit` observes `:shutdown` and runs
|
||||
its cleanup before dying; cancelled tasks still free a concurrency slot.
|
||||
|
||||
### TD-10: Timeouts for external `git` commands
|
||||
### TD-10: Timeouts for external `git` commands ✅ DONE (2026-06-12)
|
||||
|
||||
**Status: implemented.** `BDS.Git` now applies config-driven timeout budgets
|
||||
to every shell-out (`config :bds, :git` with `:local_timeout_ms` defaulting to
|
||||
15s and `:network_timeout_ms` defaulting to 120s for fetch/pull/push). The
|
||||
default runner no longer uses unbounded `System.cmd/3`; it launches the child
|
||||
through `Port.open/2`, collects stdout/stderr, and on timeout closes the port
|
||||
and explicitly terminates the OS process if it is still alive. Timeout results
|
||||
surface as structured `%{kind: :timeout, operation:, timeout_ms:, message:}`
|
||||
errors, while existing structured auth guidance is preserved. Acceptance proof
|
||||
now includes a bounded runner-stub timeout test and a real-process cleanup test
|
||||
that launches `sleep`, times out, and asserts the spawned PID is gone.
|
||||
|
||||
**Context.** `BDS.Git` shells out via `System.cmd`, which has **no timeout**.
|
||||
`GIT_TERMINAL_PROMPT=0` and SSH BatchMode prevent interactive hangs, but a
|
||||
|
||||
@@ -78,6 +78,10 @@ config :bds, :chat,
|
||||
stream_emit_interval_ms: 100,
|
||||
await_timeout_margin_ms: 5_000
|
||||
|
||||
config :bds, :git,
|
||||
local_timeout_ms: 15_000,
|
||||
network_timeout_ms: 120_000
|
||||
|
||||
config :bds, :embeddings,
|
||||
backend: BDS.Embeddings.Backends.Neural,
|
||||
model_id: "Xenova/multilingual-e5-small",
|
||||
|
||||
233
lib/bds/git.ex
233
lib/bds/git.ex
@@ -3,6 +3,9 @@ defmodule BDS.Git do
|
||||
|
||||
alias BDS.Projects
|
||||
|
||||
@default_local_timeout_ms 15_000
|
||||
@default_network_timeout_ms 120_000
|
||||
|
||||
@lfs_patterns [
|
||||
"*.jpg",
|
||||
"*.jpeg",
|
||||
@@ -94,9 +97,10 @@ defmodule BDS.Git do
|
||||
when is_binary(project_id) and is_binary(file_path) and is_list(opts) do
|
||||
with {:ok, project_dir} <- project_dir(project_id) do
|
||||
runner = Keyword.get(opts, :runner, &system_runner/3)
|
||||
timeout_ms = git_timeout_ms(["show"], opts)
|
||||
|
||||
original =
|
||||
case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir)) do
|
||||
case runner.("git", ["show", "HEAD:#{file_path}"], command_opts(project_dir, timeout_ms)) do
|
||||
{output, 0} -> output
|
||||
{_output, _status} -> ""
|
||||
end
|
||||
@@ -119,12 +123,8 @@ defmodule BDS.Git do
|
||||
project_dir,
|
||||
["log", "--date=short", "--format=%H%x09%an%x09%ad%x09%s", branch],
|
||||
opts
|
||||
) do
|
||||
remote_log =
|
||||
case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do
|
||||
{:ok, output} -> output
|
||||
{:error, {:git_failed, _message}} -> ""
|
||||
end
|
||||
),
|
||||
{:ok, remote_log} <- remote_history_log(project_dir, branch, opts) do
|
||||
|
||||
local_commits = parse_history_log(local_log)
|
||||
remote_hashes = MapSet.new(parse_remote_history(remote_log))
|
||||
@@ -166,8 +166,8 @@ defmodule BDS.Git do
|
||||
{:ok, output} ->
|
||||
{:ok, %{updated: true, output: output}}
|
||||
|
||||
{:error, {:git_failed, message}} ->
|
||||
structured_git_error(project_dir, :fetch, message, opts)
|
||||
{:error, reason} ->
|
||||
structured_git_error(project_dir, :fetch, reason, opts)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -244,15 +244,21 @@ defmodule BDS.Git do
|
||||
}}
|
||||
|
||||
{:ok, upstream_branch} ->
|
||||
with {:ok, ahead} <- revision_count(project_dir, "#{upstream_branch}..HEAD", opts),
|
||||
{:ok, behind} <- revision_count(project_dir, "HEAD..#{upstream_branch}", opts) do
|
||||
{:ok,
|
||||
%{
|
||||
local_branch: local_branch,
|
||||
upstream_branch: upstream_branch,
|
||||
has_upstream: true,
|
||||
ahead: revision_count(project_dir, "#{upstream_branch}..HEAD", opts),
|
||||
behind: revision_count(project_dir, "HEAD..#{upstream_branch}", opts)
|
||||
ahead: ahead,
|
||||
behind: behind
|
||||
}}
|
||||
end
|
||||
|
||||
{:error, _reason} = error ->
|
||||
error
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -304,6 +310,15 @@ defmodule BDS.Git do
|
||||
case run_git(project_dir, ["remote", "get-url", "origin"], opts) do
|
||||
{:ok, output} -> {:ok, blank_to_nil(output)}
|
||||
{:error, {:git_failed, _message}} -> {:ok, nil}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
defp remote_history_log(project_dir, branch, opts) do
|
||||
case run_git(project_dir, ["log", "--format=%H", "origin/#{branch}"], opts) do
|
||||
{:ok, output} -> {:ok, output}
|
||||
{:error, {:git_failed, _message}} -> {:ok, ""}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
@@ -311,6 +326,7 @@ defmodule BDS.Git do
|
||||
case run_git(project_dir, ["lfs", "ls-files"], opts) do
|
||||
{:ok, _output} -> {:ok, true}
|
||||
{:error, {:git_failed, _message}} -> {:ok, false}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
@@ -332,25 +348,53 @@ defmodule BDS.Git do
|
||||
end
|
||||
|
||||
defp run_git(project_dir, args, opts) do
|
||||
runner = Keyword.get(opts, :runner, &system_runner/3)
|
||||
timeout_ms = git_timeout_ms(args, opts)
|
||||
command_opts = command_opts(project_dir, timeout_ms)
|
||||
command = git_command(opts)
|
||||
command_args = git_command_args(args, opts)
|
||||
|
||||
case runner.("git", args, command_opts(project_dir)) do
|
||||
result =
|
||||
case Keyword.fetch(opts, :runner) do
|
||||
{:ok, runner} -> run_runner_with_timeout(runner, command, command_args, command_opts)
|
||||
:error -> system_runner(command, command_args, command_opts)
|
||||
end
|
||||
|
||||
case result do
|
||||
{:timeout, output} -> {:error, timeout_error(args, timeout_ms, output)}
|
||||
{output, 0} -> {:ok, String.trim_trailing(output)}
|
||||
{output, _status} -> {:error, {:git_failed, String.trim(output)}}
|
||||
end
|
||||
end
|
||||
|
||||
defp system_runner(command, args, opts) do
|
||||
env = opts |> Keyword.get(:env, %{}) |> Enum.to_list()
|
||||
cwd = Keyword.fetch!(opts, :cd)
|
||||
System.cmd(command, args, cd: cwd, env: env, stderr_to_stdout: true)
|
||||
timeout_ms = Keyword.fetch!(opts, :timeout_ms)
|
||||
executable = System.find_executable(command) || raise "missing executable: #{command}"
|
||||
|
||||
port =
|
||||
Port.open(
|
||||
{:spawn_executable, executable},
|
||||
[
|
||||
:binary,
|
||||
:stderr_to_stdout,
|
||||
:exit_status,
|
||||
:use_stdio,
|
||||
:hide,
|
||||
{:cd, cwd},
|
||||
{:env, port_env(opts |> Keyword.get(:env, %{}))},
|
||||
{:args, args}
|
||||
]
|
||||
)
|
||||
|
||||
receive_port_result(port, [], timeout_ms)
|
||||
end
|
||||
|
||||
defp command_opts(project_dir) do
|
||||
defp command_opts(project_dir, timeout_ms) do
|
||||
ssh_command = "ssh -oBatchMode=yes"
|
||||
|
||||
[
|
||||
cd: project_dir,
|
||||
timeout_ms: timeout_ms,
|
||||
env: %{
|
||||
"GIT_TERMINAL_PROMPT" => "0",
|
||||
"GCM_INTERACTIVE" => "never",
|
||||
@@ -367,6 +411,7 @@ defmodule BDS.Git do
|
||||
) do
|
||||
{:ok, output} -> {:ok, blank_to_nil(output)}
|
||||
{:error, {:git_failed, _message}} -> {:ok, nil}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
@@ -465,8 +510,151 @@ defmodule BDS.Git do
|
||||
|
||||
defp revision_count(project_dir, revision_range, opts) do
|
||||
case run_git(project_dir, ["rev-list", "--count", revision_range], opts) do
|
||||
{:ok, output} -> parse_count(output)
|
||||
{:error, {:git_failed, _message}} -> 0
|
||||
{:ok, output} -> {:ok, parse_count(output)}
|
||||
{:error, {:git_failed, _message}} -> {:ok, 0}
|
||||
{:error, _reason} = error -> error
|
||||
end
|
||||
end
|
||||
|
||||
defp run_runner_with_timeout(runner, command, args, opts) do
|
||||
timeout_ms = Keyword.fetch!(opts, :timeout_ms)
|
||||
task = Task.async(fn -> runner.(command, args, opts) end)
|
||||
|
||||
case Task.yield(task, timeout_ms) || Task.shutdown(task, :brutal_kill) do
|
||||
{:ok, result} -> result
|
||||
nil -> {:timeout, ""}
|
||||
end
|
||||
end
|
||||
|
||||
defp receive_port_result(port, output, timeout_ms) do
|
||||
receive do
|
||||
{^port, {:data, data}} ->
|
||||
receive_port_result(port, [data | output], timeout_ms)
|
||||
|
||||
{^port, {:exit_status, status}} ->
|
||||
{iodata_to_output(output), status}
|
||||
after
|
||||
timeout_ms ->
|
||||
{:timeout, terminate_port(port, output)}
|
||||
end
|
||||
end
|
||||
|
||||
defp terminate_port(port, output) do
|
||||
os_pid = port_os_pid(port)
|
||||
safe_port_close(port)
|
||||
|
||||
output = drain_port_messages(port, output, 25)
|
||||
maybe_kill_os_process(os_pid)
|
||||
|
||||
drain_port_messages(port, output, 25)
|
||||
|> iodata_to_output()
|
||||
end
|
||||
|
||||
defp drain_port_messages(port, output, timeout_ms) do
|
||||
receive do
|
||||
{^port, {:data, data}} -> drain_port_messages(port, [data | output], timeout_ms)
|
||||
{^port, {:exit_status, _status}} -> output
|
||||
after
|
||||
timeout_ms -> output
|
||||
end
|
||||
end
|
||||
|
||||
defp safe_port_close(port) do
|
||||
Port.close(port)
|
||||
catch
|
||||
:error, _reason -> :ok
|
||||
end
|
||||
|
||||
defp port_os_pid(port) do
|
||||
case Port.info(port, :os_pid) do
|
||||
{:os_pid, os_pid} when is_integer(os_pid) -> os_pid
|
||||
_other -> nil
|
||||
end
|
||||
end
|
||||
|
||||
defp maybe_kill_os_process(nil), do: :ok
|
||||
|
||||
defp maybe_kill_os_process(os_pid) when is_integer(os_pid) do
|
||||
if os_process_alive?(os_pid) do
|
||||
System.cmd("kill", ["-TERM", Integer.to_string(os_pid)], stderr_to_stdout: true)
|
||||
|
||||
if os_process_alive?(os_pid) do
|
||||
System.cmd("kill", ["-KILL", Integer.to_string(os_pid)], stderr_to_stdout: true)
|
||||
end
|
||||
end
|
||||
|
||||
:ok
|
||||
end
|
||||
|
||||
defp os_process_alive?(os_pid) when is_integer(os_pid) do
|
||||
{_output, exit_code} =
|
||||
System.cmd("kill", ["-0", Integer.to_string(os_pid)], stderr_to_stdout: true)
|
||||
|
||||
exit_code == 0
|
||||
end
|
||||
|
||||
defp iodata_to_output(output) do
|
||||
output
|
||||
|> Enum.reverse()
|
||||
|> IO.iodata_to_binary()
|
||||
end
|
||||
|
||||
defp port_env(env) do
|
||||
Enum.map(env, fn {key, value} ->
|
||||
{String.to_charlist(to_string(key)), String.to_charlist(to_string(value))}
|
||||
end)
|
||||
end
|
||||
|
||||
defp git_timeout_ms(args, opts) do
|
||||
Keyword.get(opts, :timeout_ms) ||
|
||||
if(network_git_command?(args),
|
||||
do: git_timeout_config(:network_timeout_ms, @default_network_timeout_ms),
|
||||
else: git_timeout_config(:local_timeout_ms, @default_local_timeout_ms)
|
||||
)
|
||||
end
|
||||
|
||||
defp git_command(opts), do: Keyword.get(opts, :command, "git")
|
||||
|
||||
defp git_command_args(args, opts) do
|
||||
Keyword.get(opts, :command_args, []) ++ args
|
||||
end
|
||||
|
||||
defp git_timeout_config(key, default) do
|
||||
:bds
|
||||
|> Application.get_env(:git, [])
|
||||
|> Keyword.get(key, default)
|
||||
end
|
||||
|
||||
defp network_git_command?([operation | _rest]), do: operation in ["fetch", "pull", "push"]
|
||||
|
||||
defp timeout_error(args, timeout_ms, output) do
|
||||
operation = git_operation(args)
|
||||
|
||||
%{
|
||||
kind: :timeout,
|
||||
operation: operation,
|
||||
timeout_ms: timeout_ms,
|
||||
message: "Git #{operation} timed out after #{timeout_ms}ms",
|
||||
output: String.trim(output)
|
||||
}
|
||||
end
|
||||
|
||||
defp git_operation([operation | _rest]) do
|
||||
case operation do
|
||||
"add" -> :add
|
||||
"commit" -> :commit
|
||||
"diff" -> :diff
|
||||
"fetch" -> :fetch
|
||||
"log" -> :log
|
||||
"lfs" -> :lfs
|
||||
"pull" -> :pull
|
||||
"push" -> :push
|
||||
"remote" -> :remote
|
||||
"rev-list" -> :rev_list
|
||||
"rev-parse" -> :rev_parse
|
||||
"show" -> :show
|
||||
"status" -> :status
|
||||
_other -> :git
|
||||
end
|
||||
end
|
||||
|
||||
@@ -482,10 +670,17 @@ defmodule BDS.Git do
|
||||
defp category_for_path("templates/" <> _rest), do: :templates
|
||||
defp category_for_path(_path), do: nil
|
||||
|
||||
defp structured_git_error(project_dir, _operation, message, opts) do
|
||||
defp structured_git_error(_project_dir, _operation, %{} = error, _opts), do: {:error, error}
|
||||
|
||||
defp structured_git_error(project_dir, operation, {:git_failed, message}, opts) do
|
||||
structured_git_error(project_dir, operation, message, opts)
|
||||
end
|
||||
|
||||
defp structured_git_error(project_dir, _operation, message, opts) when is_binary(message) do
|
||||
provider =
|
||||
case remote_url(project_dir, opts) do
|
||||
{:ok, remote} -> provider_info(remote)
|
||||
{:error, _reason} -> nil
|
||||
end
|
||||
|
||||
if auth_error?(message) do
|
||||
|
||||
@@ -212,10 +212,74 @@ defmodule BDS.GitTest do
|
||||
assert is_binary(error.guidance)
|
||||
end
|
||||
|
||||
test "fetch returns a structured timeout error when a runner exceeds the deadline", %{
|
||||
project: project
|
||||
} do
|
||||
with_git_timeouts(20, 20, fn ->
|
||||
runner = fn _command, _args, _opts ->
|
||||
Process.sleep(100)
|
||||
{"", 0}
|
||||
end
|
||||
|
||||
assert {:error, error} = Git.fetch(project.id, runner: runner)
|
||||
assert %{kind: :timeout, operation: :fetch, timeout_ms: 20, message: _message} = error
|
||||
assert error.message =~ "timed out"
|
||||
end)
|
||||
end
|
||||
|
||||
test "default runner kills the spawned git process when fetch times out", %{
|
||||
project: project,
|
||||
temp_root: temp_root
|
||||
} do
|
||||
pid_file = Path.join(temp_root, "git-timeout.pid")
|
||||
sleep_executable = System.find_executable("sleep") || raise "missing sleep executable"
|
||||
shell_executable = System.find_executable("sh") || raise "missing sh executable"
|
||||
shell_script = "echo \"$$\" > \"#{pid_file}\"; exec \"#{sleep_executable}\" 2"
|
||||
|
||||
with_git_timeouts(50, 50, fn ->
|
||||
assert {:error, error} =
|
||||
Git.fetch(project.id,
|
||||
command: shell_executable,
|
||||
command_args: ["-c", shell_script]
|
||||
)
|
||||
|
||||
assert %{kind: :timeout, operation: :fetch} = error
|
||||
|
||||
pid = pid_file |> File.read!() |> String.trim() |> String.to_integer()
|
||||
refute os_process_alive?(pid)
|
||||
end)
|
||||
end
|
||||
|
||||
defp fake_runner(handler) do
|
||||
fn command, args, opts -> handler.(command, args, opts) end
|
||||
end
|
||||
|
||||
defp with_git_timeouts(local_timeout_ms, network_timeout_ms, fun) do
|
||||
original = Application.get_env(:bds, :git)
|
||||
|
||||
Application.put_env(:bds, :git,
|
||||
local_timeout_ms: local_timeout_ms,
|
||||
network_timeout_ms: network_timeout_ms
|
||||
)
|
||||
|
||||
try do
|
||||
fun.()
|
||||
after
|
||||
if original == nil do
|
||||
Application.delete_env(:bds, :git)
|
||||
else
|
||||
Application.put_env(:bds, :git, original)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp os_process_alive?(pid) when is_integer(pid) do
|
||||
{_output, exit_code} =
|
||||
System.cmd("kill", ["-0", Integer.to_string(pid)], stderr_to_stdout: true)
|
||||
|
||||
exit_code == 0
|
||||
end
|
||||
|
||||
defp init_git_repo!(project_dir, message) do
|
||||
run_git!(project_dir, ["init", "-b", "master"])
|
||||
run_git!(project_dir, ["config", "user.name", "bDS Tests"])
|
||||
|
||||
Reference in New Issue
Block a user