/**
 * LLM API client.
 *
 * This module contains functions used to make requests to the LLM provider API via
 * the Grafana LLM app plugin. That plugin must be installed, enabled and configured
 * in order for these functions to work.
 *
 * The {@link enabled} function can be used to check if the plugin is enabled and configured.
 */

import {
  isLiveChannelMessageEvent,
  LiveChannelAddress,
  LiveChannelMessageEvent,
  LiveChannelScope,
} from '@grafana/data';
import { getBackendSrv, getGrafanaLiveSrv, logDebug /* logError */ } from '@grafana/runtime';

import React, { useEffect, useCallback, useState } from 'react';
import { useAsync } from 'react-use';
import { pipe, Observable, UnaryFunction, Subscription } from 'rxjs';
import { filter, map, scan, takeWhile, tap } from 'rxjs/operators';
import { v4 as uuidv4 } from 'uuid';

import { LLM_PLUGIN_ID, LLM_PLUGIN_ROUTE, setLLMPluginVersion } from './constants';
import { HealthCheckResponse, LLMProviderHealthDetails } from './types';

const LLM_CHAT_COMPLETIONS_PATH = 'llm/v1/chat/completions';

/** The role of a message's author. */
export type Role = 'system' | 'user' | 'assistant' | 'function';

/** A message in a conversation. */
export interface Message {
  /** The role of the message's author. */
  role: Role;

  /** The contents of the message. content is required for all messages, and may be null for assistant messages with function calls. */
  content: string;

  /**
   * The name of the author of this message.
   *
   * This is required if role is 'function', and it should be the name of the function whose response is in the content.
   *
   * May contain a-z, A-Z, 0-9, and underscores, with a maximum length of 64 characters.
   */
  name?: string;

  /**
   * The name and arguments of a function that should be called, as generated by the model.
   */
  function_call?: Object;
}

/** A function the model may generate JSON inputs for. */
export interface Function {
  /**
   * The name of the function to be called.
   *
   * Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
   */
  name: string;
  /**
   * A description of what the function does, used by the model to choose when and how to call the function.
   */
  description?: string;
  /*
   * The parameters the functions accepts, described as a JSON Schema object. See the provider's guide for examples, and the JSON Schema reference for documentation about the format.
   *
   * To describe a function that accepts no parameters, provide the value {"type": "object", "properties": {}}.
   */
  parameters: Object;
}

/**
 * Enum representing abstracted models used by the backend app.
 * @enum {string}
 */
export enum Model {
  BASE = 'base',
  LARGE = 'large',
}

/**
 * @deprecated Use {@link Model} instead.
 */
type DeprecatedString = string;

export interface ChatCompletionsRequest {
  /**
   * Model abstraction to use. These abstractions are then translated back into specific models based on the users settings.
   *
   * If not specified, defaults to `Model.BASE`.
   */
  model?: Model | DeprecatedString;
  /** A list of messages comprising the conversation so far. */
  messages: Message[];
  /**
   * What sampling temperature to use, between 0 and 2.
   * Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
   *
   * We generally recommend altering this or top_p but not both.
   */
  temperature?: number;
  /**
   * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
   * So 0.1 means only the tokens comprising the top 10% probability mass are considered.
   *
   * We generally recommend altering this or temperature but not both.
   */
  top_p?: number;
  /**
   * How many chat completion choices to generate for each input message.
   */
  n?: number;
  /**
   * Up to 4 sequences where the API will stop generating further tokens.
   */
  stop?: string | string[];
  /**
   * The maximum number of tokens to generate in the chat completion.
   *
   * The total length of input tokens and generated tokens is limited by the model's context length. Example Python code for counting tokens.
   */
  max_tokens?: number;
  /**
   * Number between -2.0 and 2.0.
   *
   * Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
   */
  presence_penalty?: number;
  /**
   * Number between -2.0 and 2.0.
   *
   * Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
   */
  frequency_penalty?: number;
  /**
   * Modify the likelihood of specified tokens appearing in the completion.
   *
   * Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.
   * Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,
   * but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban
   * or exclusive selection of the relevant token.
   */
  logit_bias?: { [key: string]: number };
  /**
   * A unique identifier representing your end-user, which can help monitor and detect abuse.
   */
  user?: string;
}

/** A completion object from the LLM provider. */
export interface Choice {
  /** The message object generated by the model. */
  message: Message;
  /**
   * The reason the model stopped generating text.
   *
   * This may be one of:
   *  - stop: API returned complete message, or a message terminated by one of the stop sequences provided via the stop parameter
   *  - length: incomplete model output due to max_tokens parameter or token limit
   *  - function_call: the model decided to call a function
   *  - content_filter: omitted content due to a flag from our content filters
   *  - null: API response still in progress or incomplete
   */
  finish_reason: string;
  /** The index of the completion in the list of choices. */
  index: number;
}

/** The usage statistics for a request to the LLM provider. */
export interface Usage {
  /** The number of tokens in the prompt. */
  prompt_tokens: number;
  /** The number of tokens in the completion. */
  completion_tokens: number;
  /** The total number of tokens. */
  total_tokens: number;
}

/** The error response from the Grafana LLM app when trying to call the chat completions API. */
interface ChatCompletionsErrorResponse {
  /** The error message. */
  error: string;
}

/** A response from the LLM provider Chat Completions API. */
export interface ChatCompletionsResponse<T = Choice> {
  /** The ID of the request. */
  id: string;
  /** The type of object returned (e.g. 'chat.completion'). */
  object: string;
  /** The timestamp of the request, as a UNIX timestamp. */
  created: number;
  /** The name of the model used to generate the response. */
  model: string;
  /** A list of completion objects (only one, unless `n > 1` in the request). */
  choices: T[];
  /** The number of tokens used to generate the replies, counting prompt, completion, and total. */
  usage: Usage;
}

/** A content message returned from the model. */
export interface ContentMessage {
  /** The content of the message. */
  content: string;
}

/** A message returned from the model indicating that it is done. */
export interface DoneMessage {
  done: boolean;
}

/** A function call message returned from the model. */
export interface FunctionCallMessage {
  /** The name of the function to call. */
  name: string;
  /** The arguments to the function call. */
  arguments: any[];
}

/**
 * A delta returned from a stream of chat completion responses.
 *
 * In practice this will be either a content message or a function call;
 * done messages are filtered out by the `streamChatCompletions` function.
 */
export type ChatCompletionsDelta = ContentMessage | FunctionCallMessage | DoneMessage;

/** A chunk included in a chat completion response. */
export interface ChatCompletionsChunk {
  /** The delta since the previous chunk. */
  delta: ChatCompletionsDelta;
}

/** Return true if the message is a 'content' message. */
export function isContentMessage(message: ChatCompletionsDelta): message is ContentMessage {
  return 'content' in message;
}

/** Return true if the message is a 'done' message. */
export function isDoneMessage(message: ChatCompletionsDelta): message is DoneMessage {
  return 'done' in message && message.done != null;
}

/** Return true if the response is an error response. */
export function isErrorResponse<T>(
  response: ChatCompletionsResponse<T> | ChatCompletionsErrorResponse
): response is ChatCompletionsErrorResponse {
  return 'error' in response;
}

/**
 * An rxjs operator that extracts the content messages from a stream of chat completion responses.
 *
 * @returns An observable that emits the content messages. Each emission will be a string containing the
 *         token emitted by the model.
 * @example <caption>Example of reading all tokens in a stream.</caption>
 * const stream = streamChatCompletions({ model: Model.BASE, messages: [
 *   { role: 'system', content: 'You are a great bot.' },
 *   { role: 'user', content: 'Hello, bot.' },
 * ]}).pipe(extractContent());
 * stream.subscribe({ next: console.log, error: console.error });
 * // Output:
 * // ['Hello', '? ', 'How ', 'are ', 'you', '?']
 */
export function extractContent(): UnaryFunction<
  Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
  Observable<string>
> {
  return pipe(
    filter((response: ChatCompletionsResponse<ChatCompletionsChunk>) => isContentMessage(response.choices[0].delta)),
    // The type assertion is needed here because the type predicate above doesn't seem to propagate.
    map(
      (response: ChatCompletionsResponse<ChatCompletionsChunk>) => (response.choices[0].delta as ContentMessage).content
    )
  );
}

/**
 * An rxjs operator that accumulates the content messages from a stream of chat completion responses.
 *
 * @returns An observable that emits the accumulated content messages. Each emission will be a string containing the
 *         content of all messages received so far.
 * @example
 * const stream = streamChatCompletions({ model: Model.BASE, messages: [
 *   { role: 'system', content: 'You are a great bot.' },
 *   { role: 'user', content: 'Hello, bot.' },
 * ]}).pipe(accumulateContent());
 * stream.subscribe({ next: console.log, error: console.error });
 * // Output:
 * // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
 */
export function accumulateContent(): UnaryFunction<
  Observable<ChatCompletionsResponse<ChatCompletionsChunk>>,
  Observable<string>
> {
  return pipe(
    extractContent(),
    scan((acc, curr) => acc + curr, '')
  );
}

/**
 * Make a request to the chat-completions API via the Grafana LLM plugin proxy.
 */
export async function chatCompletions(request: ChatCompletionsRequest): Promise<ChatCompletionsResponse> {
  const response = await getBackendSrv().post<ChatCompletionsResponse>(
    `/api/plugins/grafana-llm-app/resources/${LLM_CHAT_COMPLETIONS_PATH}`,
    request,
    {
      headers: { 'Content-Type': 'application/json' },
    }
  );
  return response;
}

/**
 * Make a streaming request to the chat-completions API via the Grafana LLM plugin proxy.
 *
 * A stream of tokens will be returned as an `Observable<string>`. Use the `extractContent` operator to
 * filter the stream to only content messages, or the `accumulateContent` operator to obtain a stream of
 * accumulated content messages.
 *
 * The 'done' message will not be emitted; the stream will simply end when this message is encountered.
 *
 * @example <caption>Example of reading all tokens in a stream.</caption>
 * const stream = streamChatCompletions({ model: Model.BASE, messages: [
 *   { role: 'system', content: 'You are a great bot.' },
 *   { role: 'user', content: 'Hello, bot.' },
 * ]}).pipe(extractContent());
 * stream.subscribe({ next: console.log, error: console.error });
 * // Output:
 * // ['Hello', '? ', 'How ', 'are ', 'you', '?']
 *
 * @example <caption>Example of accumulating tokens in a stream.</caption>
 * const stream = streamChatCompletions({ model: Model.BASE, messages: [
 *   { role: 'system', content: 'You are a great bot.' },
 *   { role: 'user', content: 'Hello, bot.' },
 * ]}).pipe(accumulateContent());
 * stream.subscribe({ next: console.log, error: console.error });
 * // Output:
 * // ['Hello', 'Hello! ', 'Hello! How ', 'Hello! How are ', 'Hello! How are you', 'Hello! How are you?']
 */
export function streamChatCompletions(
  request: ChatCompletionsRequest
): Observable<ChatCompletionsResponse<ChatCompletionsChunk>> {
  const channel: LiveChannelAddress = {
    scope: LiveChannelScope.Plugin,
    namespace: LLM_PLUGIN_ID,
    path: LLM_CHAT_COMPLETIONS_PATH + '/' + uuidv4(),
    data: request,
  };
  const messages = getGrafanaLiveSrv()
    .getStream(channel)
    .pipe(filter((event) => isLiveChannelMessageEvent(event))) as Observable<
    LiveChannelMessageEvent<ChatCompletionsResponse<ChatCompletionsChunk>>
  >;
  return messages.pipe(
    tap((event) => {
      if (isErrorResponse(event.message)) {
        throw new Error(event.message.error);
      }
    }),
    takeWhile((event) => isErrorResponse(event.message) || !isDoneMessage(event.message.choices[0].delta)),
    map((event) => event.message)
  );
}

let loggedWarning = false;

/** Check if the LLM provider API is enabled via the LLM plugin. */
export const health = async (): Promise<LLMProviderHealthDetails> => {
  // First check if the plugin is enabled.
  try {
    const settings = await getBackendSrv().get(`${LLM_PLUGIN_ROUTE}/settings`, undefined, undefined, {
      showSuccessAlert: false,
      showErrorAlert: false,
    });
    if (!settings.enabled) {
      return { configured: false, ok: false, error: 'The Grafana LLM plugin is not enabled.' };
    }
  } catch (e) {
    logDebug(String(e));
    logDebug(
      'Failed to check if LLM provider is enabled. This is expected if the Grafana LLM plugin is not installed, and the above error can be ignored.'
    );
    loggedWarning = true;
    return { configured: false, ok: false, error: 'The Grafana LLM plugin is not installed.' };
  }

  // Run a health check to see if the LLM provider is configured on the plugin.
  let response: HealthCheckResponse;
  try {
    response = await getBackendSrv().get(`${LLM_PLUGIN_ROUTE}/health`, undefined, undefined, {
      showSuccessAlert: false,
      showErrorAlert: false,
    });
  } catch (e) {
    if (!loggedWarning) {
      logDebug(String(e));
      logDebug(
        'Failed to check if LLM provider is enabled. This is expected if the Grafana LLM plugin is not installed, and the above error can be ignored.'
      );
      loggedWarning = true;
    }
    return { configured: false, ok: false, error: 'The Grafana LLM plugin is not installed.' };
  }

  const { details } = response;
  // Update the version if it's present on the response.
  if (details?.version !== undefined) {
    setLLMPluginVersion(details.version);
  }
  if (details?.llmProvider === undefined) {
    return { configured: false, ok: false, error: 'The Grafana LLM plugin is outdated; please update it.' };
  }
  return typeof details.llmProvider === 'boolean' ? { configured: details.llmProvider, ok: details.llmProvider } : details.llmProvider;
};

export const enabled = async (): Promise<boolean> => {
  const healthDetails = await health();
  return healthDetails.configured && healthDetails.ok;
};

/**
 * Enum representing different states for a stream.
 * @enum {string}
 */
export enum StreamStatus {
  IDLE = 'idle',
  GENERATING = 'generating',
  COMPLETED = 'completed',
}

/**
 * A constant representing the timeout value in milliseconds.
 * @type {number}
 */
export const TIMEOUT = 10000;

/**
 * A type representing the state of an LLM stream.
 * @typedef {Object} LLMStreamState
 * @property {React.Dispatch<React.SetStateAction<Message[]>} setMessages - A function to set messages.
 * @property {string} reply - The reply associated with the stream.
 * @property {typeof StreamStatus} streamStatus - The current status of the stream.
 * @property {Error|undefined} error - An optional error associated with the stream.
 * @property {{
 *    enabled: boolean|undefined;
 *    stream?: undefined;
 *  }|{
 *    enabled: boolean|undefined;
 *    stream: Subscription;
 *  }|undefined} value - A value that can be an object with 'enabled' and 'stream' properties or undefined.
 */
export type LLMStreamState = {
  setMessages: React.Dispatch<React.SetStateAction<Message[]>>;
  reply: string;
  streamStatus: StreamStatus;
  error: Error | undefined;
  value:
    | {
        enabled: boolean | undefined;
        stream?: undefined;
      }
    | {
        enabled: boolean | undefined;
        stream: Subscription;
      }
    | undefined;
};

/**
 * A custom React hook for managing an LLM stream that communicates with the provided model.
 *
 * @param {string} [model=Model.LARGE] - The LLM model to use for communication.
 * @param {number} [temperature=1] - The temperature value for text generation (default is 1).
 * @param {function} [notifyError] - A callback function for handling errors.
 *
 * @returns {LLMStreamState} - An object containing the state of the LLM stream.
 * @property {function} setMessages - A function to update the list of messages in the stream.
 * @property {string} reply - The most recent reply received from the LLM stream.
 * @property {StreamStatus} streamStatus - The status of the stream ("idle", "generating" or "completed").
 * @property {Error|undefined} error - An error object if an error occurs, or undefined if no error.
 * @property {object|undefined} value - The current value of the stream.
 * @property {boolean|undefined} value.enabled - Indicates whether the stream is enabled (true or false).
 * @property {Subscription|undefined} value.stream - The stream subscription object if the stream is active, or undefined if not.
 */
export function useLLMStream(
  model = Model.LARGE,
  temperature = 1,
  notifyError: (title: string, text?: string, traceId?: string) => void = () => {}
): LLMStreamState {
  // The messages array to send to the LLM.
  const [messages, setMessages] = useState<Message[]>([]);
  // The latest reply from the LLM.
  const [reply, setReply] = useState('');
  const [streamStatus, setStreamStatus] = useState<StreamStatus>(StreamStatus.IDLE);
  const [error, setError] = useState<Error>();

  const onError = useCallback(
    (e: Error) => {
      setStreamStatus(StreamStatus.IDLE);
      setMessages([]);
      setError(e);
      notifyError(
        'Failed to generate content using LLM provider',
        `Please try again or if the problem persists, contact your organization admin.`
      );
      console.error(e);
    },
    [notifyError]
  );

  const { error: enabledError, value: isEnabled } = useAsync(async () => await enabled(), [enabled]);

  const { error: asyncError, value } = useAsync(async () => {
    if (!isEnabled || !messages.length) {
      return { enabled: isEnabled };
    }

    setStreamStatus(StreamStatus.GENERATING);
    setError(undefined);
    // Stream the completions. Each element is the next stream chunk.
    const stream = streamChatCompletions({
      model,
      temperature,
      messages,
    }).pipe(
      // Accumulate the stream content into a stream of strings, where each
      // element contains the accumulated message so far.
      accumulateContent()
      // The stream is just a regular Observable, so we can use standard rxjs
      // functionality to update state, e.g. recording when the stream
      // has completed.
      // The operator decision tree on the rxjs website is a useful resource:
      // https://rxjs.dev/operator-decision-tree.)
    );
    // Subscribe to the stream and update the state for each returned value.
    return {
      enabled: isEnabled,
      stream: stream.subscribe({
        next: setReply,
        error: onError,
        complete: () => {
          setStreamStatus(StreamStatus.COMPLETED);
          setTimeout(() => {
            setStreamStatus(StreamStatus.IDLE);
          });
          setMessages([]);
          setError(undefined);
        },
      }),
    };
  }, [messages, isEnabled]);

  // Unsubscribe from the stream when the component unmounts.
  useEffect(() => {
    return () => {
      if (value?.stream) {
        value.stream.unsubscribe();
      }
    };
  }, [value]);

  // If the stream is generating and we haven't received a reply, it times out.
  useEffect(() => {
    let timeout: NodeJS.Timeout | undefined;
    if (streamStatus === StreamStatus.GENERATING && reply === '') {
      timeout = setTimeout(() => {
        onError(new Error(`LLM stream timed out after ${TIMEOUT}ms`));
      }, TIMEOUT);
    }
    return () => {
      timeout && clearTimeout(timeout);
    };
  }, [streamStatus, reply, onError]);

  if (asyncError || enabledError) {
    setError(asyncError || enabledError);
  }

  return {
    setMessages,
    reply,
    streamStatus,
    error,
    value,
  };
}
