1
# https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
3
# Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)
7
# Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers,
8
# while a lower value (e.g. 10) will be more conservative. (Default: 40)
12
# Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text,
13
# while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
17
# The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)
21
# Tail free sampling is used to reduce the impact of less probable tokens from the output.
22
# A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)
26
# Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
30
# Influences how quickly the algorithm responds to feedback from the generated text.
31
# A lower learning rate will result in slower adjustments, while a higher learning rate
32
# will make the algorithm more responsive. (Default: 0.1)
36
# Controls the balance between coherence and diversity of the output.
37
# A lower value will result in more focused and coherent text. (Default: 5.0)
41
# The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable.
45
# Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
46
# repeat_last_n 64 256
49
# Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly,
50
# while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
54
stop = ["As an AI", "I'm an AI", "I am an AI", "I am a bot", "I'm a bot", "assistant"]