models.go

//go:build go1.18
// +build go1.18

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.
// Code generated by Microsoft (R) AutoRest Code Generator.
// Changes may cause incorrect behavior and will be lost if the code is regenerated.

package azopenai

import "time"

// ChatCompletionsClientCreateOptions contains the optional parameters for the ChatCompletionsClient.Create method.
type ChatCompletionsClientCreateOptions struct {
	// endpoint - server parameter
	Endpoint *string
}

// CompletionsClientCreateOptions contains the optional parameters for the CompletionsClient.Create method.
type CompletionsClientCreateOptions struct {
	// endpoint - server parameter
	Endpoint *string
}

// EmbeddingsClientCreateOptions contains the optional parameters for the EmbeddingsClient.Create method.
type EmbeddingsClientCreateOptions struct {
	// endpoint - server parameter
	Endpoint *string
}

type ErrorResponse struct {
	Error *ErrorResponseError `json:"error,omitempty"`
}

type ErrorResponseError struct {
	Code    *string `json:"code,omitempty"`
	Message *string `json:"message,omitempty"`
	Param   *string `json:"param,omitempty"`
	Type    *string `json:"type,omitempty"`
}

type EmbeddingsCreateParameters struct {
	// REQUIRED; Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request,
	// pass an array of strings. Each input must not exceed 2048 tokens in length. Unless you
	// are embedding code, we suggest replacing newlines (\n) in your input with a single space, as we have observed inferior
	// results when newlines are present.
	Input *EmbeddingsInput `json:"input,omitempty"`

	// OPTIONAL; Contains additional key/value pairs not defined in the schema.
	AdditionalProperties map[string]any

	// input type of embedding search to use
	InputType *string `json:"input_type,omitempty"`

	// ID of the model to use. You can use the ModelsList operation to see all of your available models, or see our ModelsGet
	// overview for descriptions of them.
	Model *string `json:"model,omitempty"`

	// A unique identifier representing your end-user, which can help monitoring and detecting abuse.
	User *string `json:"user,omitempty"`
}

type Embeddings struct {
	// REQUIRED
	Data []*EmbeddingsData `json:"data,omitempty"`

	// REQUIRED
	Model *string `json:"model,omitempty"`

	// REQUIRED
	Object *string `json:"object,omitempty"`

	// REQUIRED
	Usage *EmbeddingsUsage `json:"usage,omitempty"`
}

type EmbeddingsUsage struct {
	// REQUIRED
	PromptTokens *int32 `json:"prompt_tokens,omitempty"`

	// REQUIRED
	TotalTokens *int32 `json:"total_tokens,omitempty"`
}

type ChatCompletionsChoice struct {
	FinishReason *string                       `json:"finish_reason,omitempty"`
	Index        *int32                        `json:"index,omitempty"`
	Message      *ChatCompletionsChoiceMessage `json:"message,omitempty"`
	Delta        *ChatCompletionsChoiceMessage `json:"delta,omitempty"` // for stream
}

type ChatCompletions struct {
	// REQUIRED
	Choices []*ChatCompletionsChoice `json:"choices,omitempty"`

	// REQUIRED
	Created *time.Time `json:"created,omitempty"`

	// REQUIRED
	ID *string `json:"id,omitempty"`

	// REQUIRED
	Model *string `json:"model,omitempty"`

	// REQUIRED
	Object *string               `json:"object,omitempty"`
	Usage  *ChatCompletionsUsage `json:"usage,omitempty"`
}

type ChatCompletionsCreateParameters struct {
	// REQUIRED; The messages to generate chat completions for, in the chat format.
	Messages []*ChatCompletionsMessage `json:"messages,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
	// decreasing the model's likelihood to repeat the same line verbatim.
	FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`

	// Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified
	// by their token ID in the tokenizer) to an associated bias value from -100 to
	// 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary
	// per model, but values between -1 and 1 should decrease or increase likelihood
	// of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
	LogitBias *any `json:"logit_bias,omitempty"`

	// The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will
	// be (4096 - prompt tokens).
	MaxTokens *int32 `json:"max_tokens,omitempty"`

	// How many chat completion choices to generate for each input message.
	N *int32 `json:"n,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing
	// the model's likelihood to talk about new topics.
	PresencePenalty *float32 `json:"presence_penalty,omitempty"`

	// Up to 4 sequences where the API will stop generating further tokens.
	Stop *ChatCompletionsStop `json:"stop,omitempty"`

	// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they
	// become available, with the stream terminated by a data: [DONE] message.
	Stream *bool `json:"stream,omitempty"`

	// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower
	// values like 0.2 will make it more focused and deterministic. We generally
	// recommend altering this or top_p but not both.
	Temperature *float32 `json:"temperature,omitempty"`

	// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens
	// with top_p probability mass. So 0.1 means only the tokens comprising the top
	// 10% probability mass are considered. We generally recommend altering this or temperature but not both.
	TopP *float32 `json:"top_p,omitempty"`

	// A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.
	User *string `json:"user,omitempty"`
}

type CompletionsUsage struct {
	// REQUIRED
	CompletionTokens *int32 `json:"completion_tokens,omitempty"`

	// REQUIRED
	PromptTokens *int32 `json:"prompt_tokens,omitempty"`

	// REQUIRED
	TotalTokens *int32 `json:"total_tokens,omitempty"`
}

type CompletionsCreateParameters struct {
	// Generates bestof completions server-side and returns the "best" (the one with the highest log probability per token). Results
	// cannot be streamed. When used with n, bestof controls the number of
	// candidate completions and n specifies how many to return – bestof must be greater than n. Note: Because this parameter
	// generates many completions, it can quickly consume your token quota. Use
	// carefully and ensure that you have reasonable settings for maxtokens and stop. Has maximum value of 128.
	BestOf *int32 `json:"best_of,omitempty"`

	// can be used to disable any server-side caching, 0=no cache, 1=prompt prefix enabled, 2=full cache
	CacheLevel       *int32  `json:"cache_level,omitempty"`
	CompletionConfig *string `json:"completion_config,omitempty"`

	// Echo back the prompt in addition to the completion
	Echo *bool `json:"echo,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
	// decreasing the model's likelihood to repeat the same line verbatim.
	FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`

	// Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps
	// tokens (specified by their token ID in the GPT tokenizer) to an associated bias
	// value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token
	// IDs. Mathematically, the bias is added to the logits generated by the model
	// prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood
	// of selection; values like -100 or 100 should result in a ban or exclusive
	// selection of the relevant token. As an example, you can pass {"50256" : -100} to prevent the token from being generated.
	LogitBias *any `json:"logit_bias,omitempty"`

	// Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is
	// 5, the API will return a list of the 5 most likely tokens. The API will always
	// return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response. Minimum of 0 and maximum
	// of 5 allowed.
	Logprobs *int32 `json:"logprobs,omitempty"`

	// The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length
	// of 2048 tokens (except for the newest models, which support 4096). Has
	// minimum of 0.
	MaxTokens *int32 `json:"max_tokens,omitempty"`

	// ID of the model to use. You can use the ModelsList operation to see all of your available models, or see our ModelsGet
	// overview for descriptions of them.
	Model *string `json:"model,omitempty"`

	// How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed. Note: Because this parameter
	// generates many completions, it can quickly consume your token quota. Use
	// carefully and ensure that you have reasonable settings for max_tokens and stop.
	N *int32 `json:"n,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing
	// the model's likelihood to talk about new topics.
	PresencePenalty *float32 `json:"presence_penalty,omitempty"`

	// The prompt(s) to generate completions for, encoded as a string or array of strings. Note that is the document separator
	// that the model sees during training, so if a prompt is not specified the model
	// will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.
	Prompt *CompletionsPrompt `json:"prompt,omitempty"`

	// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
	Stop *CompletionsStop `json:"stop,omitempty"`

	// Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available,
	// with the stream terminated by a data: [DONE] message.
	Stream *bool `json:"stream,omitempty"`

	// The suffix that comes after a completion of inserted text.
	Suffix *string `json:"suffix,omitempty"`

	// What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications,
	// and 0 (argmax sampling) for ones with a well-defined answer. We generally
	// recommend altering this or top_p but not both.
	Temperature *float32 `json:"temperature,omitempty"`

	// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens
	// with top_p probability mass. So 0.1 means only the tokens comprising the top
	// 10% probability mass are considered. We generally recommend altering this or temperature but not both.
	TopP *float32 `json:"top_p,omitempty"`

	// A unique identifier representing your end-user, which can help monitoring and detecting abuse
	User *string `json:"user,omitempty"`
}

type ChatCompletionsUsage struct {
	// REQUIRED
	CompletionTokens *int32 `json:"completion_tokens,omitempty"`

	// REQUIRED
	PromptTokens *int32 `json:"prompt_tokens,omitempty"`

	// REQUIRED
	TotalTokens *int32 `json:"total_tokens,omitempty"`
}

type EmbeddingsData struct {
	// REQUIRED
	Embedding []*float32 `json:"embedding,omitempty"`

	// REQUIRED
	Index *int32 `json:"index,omitempty"`

	// REQUIRED
	Object *string `json:"object,omitempty"`
}

// ChatCompletionsStop - Up to 4 sequences
// where the API will stop generating further tokens.
type ChatCompletionsStop struct {
	Text      *string
	Sequences []string
}

type Completions struct {
	// REQUIRED
	Choices []*CompletionsChoice `json:"choices,omitempty"`

	// REQUIRED
	Created *int32 `json:"created,omitempty"`

	// REQUIRED
	ID *string `json:"id,omitempty"`

	// REQUIRED
	Model *string `json:"model,omitempty"`

	// REQUIRED
	Object *string           `json:"object,omitempty"`
	Usage  *CompletionsUsage `json:"usage,omitempty"`
}

type CompletionsChoice struct {
	FinishReason *string                    `json:"finish_reason,omitempty"`
	Index        *int32                     `json:"index,omitempty"`
	Logprobs     *CompletionsChoiceLogprobs `json:"logprobs,omitempty"`
	Text         *string                    `json:"text,omitempty"`
}

// EmbeddingsInput - Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in
// a single request, pass an array of strings. Each input must not exceed 2048 tokens in length. Unless you
// are embedding code, we suggest replacing newlines (\n) in your input with a single space, as we have observed inferior
// results when newlines are present.
type EmbeddingsInput struct {
	Text      *string
	Sequences []string
}

type ChatCompletionsMessage struct {
	// REQUIRED; The contents of the message
	Content *string `json:"content,omitempty"`

	// REQUIRED; The role of the author of this message.
	Role *ChatCompletionsMessageRole `json:"role,omitempty"`

	// The name of the user in a multi-user chat
	Name *string `json:"name,omitempty"`
}

// CompletionsPrompt - The prompt(s) to generate completions for, encoded as a string or array of strings. Note that
// is the document separator that the model sees during training, so if a prompt is not specified the model
// will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.
type CompletionsPrompt struct {
	Text      *string
	Sequences []string
}

// CompletionsStop - Up to 4 sequences where the API will stop generating further tokens. The returned text will not
// contain the stop sequence.
type CompletionsStop struct {
	Text      *string
	Sequences []string
}

type CompletionsChoiceLogprobs struct {
	TextOffset    []*int32              `json:"text_offset,omitempty"`
	TokenLogprobs []*float32            `json:"token_logprobs,omitempty"`
	Tokens        []*string             `json:"tokens,omitempty"`
	TopLogprobs   []map[string]*float32 `json:"top_logprobs,omitempty"`
}

type ChatCompletionsChoiceMessage struct {
	// REQUIRED; The contents of the message
	Content *string `json:"content,omitempty"`

	// REQUIRED; The role of the author of this message.
	Role *ChatCompletionsMessageRole `json:"role,omitempty"`
}