Python for Engineers : Voice-Controlled Desktop Assistant

import speech_recognition as sr

import pyttsx3

import subprocess

import os

import webbrowser

import time

from datetime import datetime

import threading

# -------------------- Configuration --------------------

# Change these to match your environment

MUSIC_FOLDER = os.path.expanduser("~/Music") # folder to play music from

SEARCH_ROOT = os.path.expanduser("~") # root folder for file searches

DEFAULT_BROWSER = None # None will use webbrowser.open

# Map simple app names to commands (Windows, macOS, Linux)

COMMANDS = {

"notepad": {"win": "notepad"},

"calculator": {"win": "calc", "mac": "open -a Calculator", "linux": "gnome-calculator"},

"vscode": {"win": r"C:\Users\%USERNAME%\AppData\Local\Programs\Microsoft VS Code\Code.exe",

"mac": "open -a Visual\\ Studio\\ Code", "linux": "code"},

"explorer": {"win": "explorer", "mac": "open", "linux": "xdg-open"},

}

# -------------------- Helpers --------------------

def speak(text, engine):

"""Speak text asynchronously so we don't block the main loop."""

def _s():

engine.say(text)

engine.runAndWait()

t = threading.Thread(target=_s, daemon=True)

t.start()

def recognize_speech_from_mic(recognizer, microphone, timeout=5, phrase_time_limit=6):

"""Capture audio and return recognized text (or None)."""

with microphone as source:

recognizer.adjust_for_ambient_noise(source, duration=0.6)

try:

audio = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)

except sr.WaitTimeoutError:

return None

try:

# Using Google Web Speech API (requires internet) — good accuracy

return recognizer.recognize_google(audio)

except sr.RequestError:

# API unreachable

return "[error_api]"

except sr.UnknownValueError:

return None

def open_app(app_key):

"""Open an application based on COMMANDS map."""

platform = os.name # 'nt' on Windows, 'posix' on macOS/Linux

is_windows = platform == "nt"

cmd_map = COMMANDS.get(app_key.lower())

if not cmd_map:

return False, f"No mapping for app '{app_key}'."

try:

if is_windows:

cmd = cmd_map.get("win")

if not cmd:

return False, "No Windows command available for this app."

# Support environment vars in path

cmd = os.path.expandvars(cmd)

subprocess.Popen(cmd, shell=True)

else:

# macOS or Linux (posix)

cmd = cmd_map.get("mac") or cmd_map.get("linux")

if not cmd:

return False, "No command available for this platform."

subprocess.Popen(cmd, shell=True)

return True, f"Opened {app_key}."

except Exception as e:

return False, str(e)

def open_folder(folder_name):

"""Open a common folder name like downloads, documents, desktop"""

name = folder_name.strip().lower()

user = os.path.expanduser("~")

mapping = {

"downloads": os.path.join(user, "Downloads"),

"documents": os.path.join(user, "Documents"),

"desktop": os.path.join(user, "Desktop"),

"pictures": os.path.join(user, "Pictures"),

"music": os.path.join(user, "Music"),

}

path = mapping.get(name) or os.path.join(user, name)

if os.path.exists(path):

if os.name == "nt":

os.startfile(path)

else:

subprocess.Popen(f'xdg-open "{path}"', shell=True)

return True, f"Opened folder {path}"

return False, f"Folder {path} does not exist."

def search_files(query, root=SEARCH_ROOT, limit=10):

"""Simple filename search returning up to `limit` results."""

results = []

q = query.lower()

for dirpath, dirs, files in os.walk(root):

for f in files:

if q in f.lower():

results.append(os.path.join(dirpath, f))

if len(results) >= limit:

return results

def play_random_music():

"""Play a random music file from MUSIC_FOLDER (if available)."""

if not os.path.exists(MUSIC_FOLDER):

return False, f"Music folder {MUSIC_FOLDER} not found."

exts = (".mp3", ".wav", ".ogg", ".flac")

files = [f for f in os.listdir(MUSIC_FOLDER) if f.lower().endswith(exts)]

if not files:

return False, "No music files found in your music folder."

choice = os.path.join(MUSIC_FOLDER, files[0]) # pick the first for now

try:

if os.name == "nt":

os.startfile(choice)

else:

subprocess.Popen(f'xdg-open "{choice}"', shell=True)

return True, f"Playing {os.path.basename(choice)}"

except Exception as e:

return False, str(e)

def confirm_action(recognizer, microphone, engine, prompt="Are you sure? Say 'yes' to confirm"):

speak(prompt, engine)

text = recognize_speech_from_mic(recognizer, microphone, timeout=5, phrase_time_limit=4)

if text:

txt = text.lower()

return "yes" in txt or "yeah" in txt or "yup" in txt

return False

# -------------------- Main Assistant --------------------

def main():

# Initialize recognizer and TTS

recognizer = sr.Recognizer()

microphone = sr.Microphone()

engine = pyttsx3.init()

engine.setProperty('rate', 160)

speak("Hello — voice assistant activated. Say a command.", engine)

print("Assistant is listening... (say 'help' for suggestions)")

while True:

print("\nListening...")

text = recognize_speech_from_mic(recognizer, microphone)

if text is None:

print("No speech detected.")

continue

if text == "[error_api]":

print("Speech API not reachable. Check internet or use offline recognizer.")

speak("Sorry, speech service is unreachable.", engine)

continue

command = text.lower().strip()

print("You said:", command)

# Exit

if any(w in command for w in ("exit", "quit", "stop", "shutdown assistant", "bye")):

speak("Goodbye!", engine)

print("Exiting assistant.")

break

# Help

if "help" in command:

help_text = ("You can say: open notepad, open calculator, open downloads, search file invoice, "

"play music, open website youtube, what's the time, shutdown system (requires confirmation).")

speak(help_text, engine)

print(help_text)

continue

# Time

if "time" in command:

now = datetime.now().strftime("%I:%M %p")

speak(f"The time is {now}", engine)

print("Time:", now)

continue

# Open website

if command.startswith("open website") or command.startswith("open url") or command.startswith("open "):

# e.g. "open website youtube" or "open youtube"

parts = command.replace("open website", "").replace("open url", "").replace("open ", "").strip()

site = parts.split()[0] if parts else None

if site:

url = site if site.startswith("http") else f"https://{site}.com"

webbrowser.open(url)

speak(f"Opening {site}", engine)

print("Open URL:", url)

continue

# Open app mapping

if command.startswith("open ") and not command.startswith("open website"):

# e.g. "open notepad" or "open vscode"

app = command.replace("open ", "").strip()

success, msg = open_app(app)

speak(msg if msg else ("Opened " + app), engine)

print(msg)

continue

# Open folder

if command.startswith("open folder") or command.startswith("open downloads") or command.startswith("open desktop"):

folder_name = command.replace("open folder", "").replace("open ", "").strip()

success, msg = open_folder(folder_name or "downloads")

speak(msg, engine)

print(msg)

continue

# Search files

if command.startswith("search file") or command.startswith("find file") or command.startswith("search for file"):

# e.g. "search file invoice 2024"

query = command.split("file", 1)[-1].strip()

if not query:

speak("Please say the file name to search for.", engine)

continue

speak(f"Searching for files that match {query}", engine)

print("Searching for:", query)

results = search_files(query)

if not results:

speak("No files found.", engine)

print("No results.")

else:

speak(f"I found {len(results)} files. First one is {os.path.basename(results[0])}", engine)

print("\n".join(results[:10]))

continue

# Play music

if "music" in command or command.startswith("play music"):

ok, msg = play_random_music()

speak(msg, engine) if ok else speak("Could not play music.", engine)

print(msg)

continue

# Shutdown system (dangerous) — confirm first

if "shutdown" in command or "restart system" in command:

speak("You asked to perform a system operation. This requires confirmation.", engine)

confirmed = confirm_action(recognizer, microphone, engine,

prompt="Please say yes to confirm shutting down the system.")

if confirmed:

speak("Performing the operation now.", engine)

print("Confirmed. Executing shutdown/restart.")

if "shutdown" in command:

if os.name == "nt":

subprocess.Popen("shutdown /s /t 5", shell=True)

else:

subprocess.Popen("sudo shutdown -h now", shell=True)

else:

# restart

if os.name == "nt":

subprocess.Popen("shutdown /r /t 5", shell=True)

else:

subprocess.Popen("sudo reboot", shell=True)

else:

speak("Operation cancelled.", engine)

continue

# Search web (fallback)

if command.startswith("search ") or command.startswith("google "):

query = command.replace("search", "").replace("google", "").strip()

url = f"https://www.google.com/search?q={query.replace(' ', '+')}"

webbrowser.open(url)

speak(f"Searching the web for {query}", engine)

print("Web search:", query)

continue

# Unknown command fallback

speak("I did not understand that. Say help for suggestions.", engine)

print("Unhandled command.")

if __name__ == "__main__":

try:

main()

except KeyboardInterrupt:

print("\nAssistant stopped by user.")

Python for Engineers

Blog Pages

Voice-Controlled Desktop Assistant

No comments:

Post a Comment