222 lines
6.3 KiB
Elixir
222 lines
6.3 KiB
Elixir
defmodule BDS.AI.OpenAICompatibleRuntime do
|
|
@moduledoc false
|
|
|
|
require Logger
|
|
|
|
alias BDS.AI.HttpClient
|
|
|
|
def list_models(endpoint, opts \\ []) when is_map(endpoint) and is_list(opts) do
|
|
http_client = Keyword.get(opts, :http_client, HttpClient)
|
|
url = models_url(endpoint.url)
|
|
|
|
headers =
|
|
%{"accept" => "application/json"}
|
|
|> maybe_put_auth(endpoint.api_key)
|
|
|
|
with {:ok, response} <- http_client.get(url, headers),
|
|
200 <- response.status do
|
|
normalize_models_response(response.body)
|
|
else
|
|
status when is_integer(status) -> {:error, %{kind: :http_error, status: status}}
|
|
{:error, reason} -> {:error, %{kind: :http_error, reason: reason}}
|
|
end
|
|
end
|
|
|
|
def generate(endpoint, request, _opts) when is_map(endpoint) and is_map(request) do
|
|
url = completions_url(endpoint.url)
|
|
|
|
headers =
|
|
%{
|
|
"content-type" => "application/json",
|
|
"accept" => "application/json"
|
|
}
|
|
|> maybe_put_auth(endpoint.api_key)
|
|
|
|
payload =
|
|
%{
|
|
"model" => request.model,
|
|
"messages" => request.messages,
|
|
"max_tokens" => request.max_output_tokens
|
|
}
|
|
|> maybe_disable_thinking(request.model)
|
|
|> maybe_put_tools(Map.get(request, :tools, []))
|
|
|
|
payload_json = Jason.encode!(payload)
|
|
|
|
Logger.debug(
|
|
"AI OpenAI-compatible request operation=#{inspect(Map.get(request, :operation))} model=#{inspect(request.model)} url=#{url} tools=#{payload |> Map.get("tools", []) |> length()} payload_size=#{byte_size(payload_json)}"
|
|
)
|
|
|
|
case HttpClient.post(url, headers, payload_json) do
|
|
{:ok, %{status: 200, body: body}} ->
|
|
result = normalize_response(body)
|
|
|
|
case result do
|
|
{:ok, %{json: nil, content: content}} when is_binary(content) ->
|
|
Logger.warning(
|
|
"AI OpenAI-compatible response parsed but content is not valid JSON. Content: #{String.slice(content, 0, 500)}"
|
|
)
|
|
|
|
{:ok, _} ->
|
|
:ok
|
|
|
|
{:error, reason} ->
|
|
Logger.error(
|
|
"AI OpenAI-compatible response normalization failed: #{inspect(reason)} body=#{String.slice(body, 0, 1000)}"
|
|
)
|
|
end
|
|
|
|
result
|
|
|
|
{:ok, %{status: status, body: body}} ->
|
|
Logger.error(
|
|
"AI OpenAI-compatible HTTP error status=#{status} body=#{String.slice(body, 0, 2000)}"
|
|
)
|
|
|
|
{:error, %{kind: :http_error, status: status, body: body}}
|
|
|
|
{:error, reason} ->
|
|
Logger.error("AI OpenAI-compatible HTTP request failed: #{inspect(reason)}")
|
|
{:error, %{kind: :http_error, reason: reason}}
|
|
end
|
|
end
|
|
|
|
defp normalize_response(body) do
|
|
with {:ok, payload} <- decode_json_body(body) do
|
|
message = get_in(payload, ["choices", Access.at(0), "message"]) || %{}
|
|
content = normalize_content(message["content"])
|
|
tool_calls = normalize_tool_calls(message["tool_calls"] || [])
|
|
usage = normalize_usage(payload["usage"] || %{})
|
|
|
|
json =
|
|
case content do
|
|
nil ->
|
|
nil
|
|
|
|
value when is_binary(value) ->
|
|
case Jason.decode(value) do
|
|
{:ok, decoded} when is_map(decoded) -> decoded
|
|
_other -> nil
|
|
end
|
|
end
|
|
|
|
{:ok, %{content: content, json: json, tool_calls: tool_calls, usage: usage}}
|
|
end
|
|
end
|
|
|
|
defp completions_url(url) do
|
|
cond do
|
|
String.ends_with?(url, "/chat/completions") -> url
|
|
String.ends_with?(url, "/") -> url <> "chat/completions"
|
|
true -> url <> "/chat/completions"
|
|
end
|
|
end
|
|
|
|
defp models_url(url) do
|
|
cond do
|
|
String.ends_with?(url, "/chat/completions") ->
|
|
String.replace_suffix(url, "/chat/completions", "/models")
|
|
|
|
String.ends_with?(url, "/models") ->
|
|
url
|
|
|
|
String.ends_with?(url, "/") ->
|
|
url <> "models"
|
|
|
|
true ->
|
|
url <> "/models"
|
|
end
|
|
end
|
|
|
|
defp normalize_models_response(body) do
|
|
with {:ok, payload} <- decode_json_body(body) do
|
|
models =
|
|
payload
|
|
|> Map.get("data", [])
|
|
|> Enum.map(fn entry ->
|
|
id = entry["id"] || entry[:id]
|
|
|
|
%{
|
|
id: id,
|
|
label: id
|
|
}
|
|
end)
|
|
|> Enum.reject(&is_nil(&1.id))
|
|
|> Enum.uniq_by(& &1.id)
|
|
|> Enum.sort_by(&String.downcase(&1.id))
|
|
|
|
{:ok, models}
|
|
end
|
|
end
|
|
|
|
defp decode_json_body(body) do
|
|
case Jason.decode(body) do
|
|
{:ok, payload} -> {:ok, payload}
|
|
{:error, reason} -> {:error, %{kind: :invalid_json_response, reason: reason}}
|
|
end
|
|
end
|
|
|
|
defp maybe_put_auth(headers, nil), do: headers
|
|
defp maybe_put_auth(headers, ""), do: headers
|
|
|
|
defp maybe_put_auth(headers, api_key),
|
|
do: Map.put(headers, "authorization", "Bearer #{api_key}")
|
|
|
|
defp maybe_put_tools(payload, []), do: payload
|
|
defp maybe_put_tools(payload, nil), do: payload
|
|
|
|
defp maybe_put_tools(payload, tools) do
|
|
Map.put(payload, "tools", tools)
|
|
|> Map.put("tool_choice", "auto")
|
|
end
|
|
|
|
defp maybe_disable_thinking(payload, model) when is_binary(model) do
|
|
if BDS.AI.Catalog.model_capabilities(model).disables_reasoning do
|
|
Map.update(payload, "chat_template_kwargs", %{"enable_thinking" => false}, fn kwargs ->
|
|
Map.put(kwargs || %{}, "enable_thinking", false)
|
|
end)
|
|
else
|
|
payload
|
|
end
|
|
end
|
|
|
|
defp maybe_disable_thinking(payload, _model), do: payload
|
|
|
|
defp normalize_tool_calls(tool_calls) do
|
|
Enum.map(tool_calls, fn tool_call ->
|
|
%{
|
|
id: tool_call["id"],
|
|
name: get_in(tool_call, ["function", "name"]),
|
|
arguments: decode_arguments(get_in(tool_call, ["function", "arguments"]))
|
|
}
|
|
end)
|
|
end
|
|
|
|
defp decode_arguments(nil), do: %{}
|
|
|
|
defp decode_arguments(arguments) when is_binary(arguments) do
|
|
case Jason.decode(arguments) do
|
|
{:ok, decoded} when is_map(decoded) -> decoded
|
|
_other -> %{}
|
|
end
|
|
end
|
|
|
|
defp normalize_content(nil), do: nil
|
|
defp normalize_content(content) when is_binary(content), do: content
|
|
|
|
defp normalize_content(content) when is_list(content) do
|
|
content
|
|
|> Enum.map(fn item -> item["text"] || "" end)
|
|
|> Enum.join()
|
|
end
|
|
|
|
defp normalize_usage(usage) do
|
|
%{
|
|
input_tokens: usage["prompt_tokens"],
|
|
output_tokens: usage["completion_tokens"],
|
|
cache_read_tokens: get_in(usage, ["prompt_tokens_details", "cached_tokens"]),
|
|
cache_write_tokens: get_in(usage, ["completion_tokens_details", "cached_tokens"])
|
|
}
|
|
end
|
|
end
|