Local Whisper stores macOS service settings in ~/.whisper/config.toml. Change most settings from the app or with wh config. Edit the TOML directly when you need exact control.

Full reference

[hotkey]
key = "alt_r"
double_tap_threshold = 0.4

[transcription]
engine = "parakeet_v3"

[parakeet_v3]
model = "mlx-community/parakeet-tdt-0.6b-v3"
timeout = 0
chunk_duration = 120.0
overlap_duration = 15.0
decoding = "greedy"
beam_size = 5
length_penalty = 0.013
patience = 3.5
duration_reward = 0.67
local_attention = false
local_attention_context_size = 256

[qwen3_asr]
model = "mlx-community/Qwen3-ASR-1.7B-bf16"
timeout = 0
temperature = 0.0
top_p = 1.0
top_k = 0
repetition_context_size = 100
repetition_penalty = 1.2
chunk_duration = 1200.0
max_tokens = 0

[whisper]
model = "whisper-large-v3-v20240930"
language = "auto"
url = "http://localhost:50060/v1/audio/transcriptions"
check_url = "http://localhost:50060/"
timeout = 0
temperature = 0.0
compression_ratio_threshold = 2.4
no_speech_threshold = 0.6
logprob_threshold = -1.0
temperature_fallback_count = 5
prompt_preset = "none"
prompt = ""

[grammar]
backend = "apple_intelligence"
enabled = false

[ollama]
url = "http://localhost:11434/api/generate"
check_url = "http://localhost:11434/"
model = "gemma3:4b-it-qat"
keep_alive = "60m"
timeout = 0
max_chars = 0
max_predict = 0
num_ctx = 0
unload_on_exit = false

[apple_intelligence]
max_chars = 0
timeout = 0

[lm_studio]
url = "http://localhost:1234/v1/chat/completions"
check_url = "http://localhost:1234/"
model = "google/gemma-3-4b"
max_chars = 0
max_tokens = 0
timeout = 0

[replacements]
enabled = false

[replacements.rules]
# "gonna" = "going to"
# "wanna" = "want to"

[dictation]
# Voice commands also remove high-confidence speech fillers such as "um", "uh",
# "ah", "er", and pause-like "oh" before grammar correction.
enabled = true

[dictation.commands]
# "next bullet" = "\n- "
# "smiley" = " :)"

[audio]
sample_rate = 16000
min_duration = 0
max_duration = 0
min_rms = 0.005
vad_enabled = true
noise_reduction = true
normalize_audio = true
pre_buffer = 0.0

[service]
idle_unload_minutes = 20

[backup]
directory = "~/.whisper"
history_limit = 100

[ui]
show_overlay = true
overlay_opacity = 0.92
sounds_enabled = true
notifications_enabled = false
auto_paste = false

[shortcuts]
enabled = true
proofread = "ctrl+shift+g"
rewrite = "ctrl+shift+r"
prompt_engineer = "ctrl+shift+p"

[tts]
enabled = false
provider = "kokoro"
speak_shortcut = "alt+t"

[kokoro_tts]
model = "mlx-community/Kokoro-82M-bf16"
voice = "af_sky"

Source of truth

The Python schema and defaults live in src/whisper_voice/config/schema.py. When a setting changes, update this page and the schema together. Hold-to-record always pastes the transcription at the active cursor. auto_paste = true makes double-tap dictation use the same paste-at-cursor behavior.