From 5c241d65ed4a6ec2bc3e5d75d6858ed6722f1b17 Mon Sep 17 00:00:00 2001 From: sommerfeld Date: Wed, 13 May 2026 13:43:23 +0100 Subject: feat(sway): add dictate (whisper.cpp) and ocr (tesseract) keybinds Push-to-talk dictation toggle on Super+i: parecord captures 16 kHz mono WAV, whisper-cli transcribes (auto language), output is typed via wtype and copied to the clipboard. Region OCR on Super+Shift+o: slurp + grim feed tesseract (eng+por), result lands in the clipboard with a notification preview. Adds wtype to wayland.txt; tesseract (+eng/por data) and whisper.cpp + the large-v3-turbo-q5_0 model package to extra.txt. --- dot_local/bin/executable_ocr | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 dot_local/bin/executable_ocr (limited to 'dot_local/bin/executable_ocr') diff --git a/dot_local/bin/executable_ocr b/dot_local/bin/executable_ocr new file mode 100644 index 0000000..6f6191f --- /dev/null +++ b/dot_local/bin/executable_ocr @@ -0,0 +1,39 @@ +#!/usr/bin/env sh +# OCR a screen region (default) or an image file → clipboard. +# +# Usage: +# ocr # interactive: select a region with slurp, OCR it +# ocr # OCR an image file +# +# Requires: tesseract (+tesseract-data-eng, tesseract-data-por), +# grim, slurp, wl-clipboard, libnotify. +# +# Override languages via TESSERACT_LANG (e.g. TESSERACT_LANG=eng). + +set -eu + +lang="${TESSERACT_LANG:-eng+por}" + +if [ "${1:-}" ]; then + [ -r "$1" ] || { notify-send -u critical "📋 OCR" "Cannot read: $1"; exit 1; } + text="$(tesseract "$1" - -l "$lang" 2>/dev/null || true)" +else + region="$(slurp 2>/dev/null)" || exit 0 + text="$(grim -g "$region" - | tesseract - - -l "$lang" 2>/dev/null || true)" +fi + +# Trim trailing whitespace per line; collapse runs of blank lines; drop +# leading blanks. +text="$(printf '%s\n' "$text" | awk ' + { sub(/[[:space:]]+$/, "") } + NF { print; blank = 0; next } + !blank { print; blank = 1 } +' | sed -e '/./,$!d')" + +if [ -z "$text" ]; then + notify-send -u low "📋 OCR" "No text detected." + exit 1 +fi + +printf '%s' "$text" | wl-copy +notify-send -t 3000 "📋 OCR copied" "$(printf '%s' "$text" | head -c 200)" -- cgit v1.3.1