halp/local_agent_ui.py

#!/usr/bin/env python
import customtkinter as ctk
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import subprocess
import shlex
import threading
import sys
import os
from pynput import keyboard

# --- Configuration ---
MODEL_ID = "01-ai/Yi-Coder-1.5B-Chat"
HOTKEY = keyboard.KeyCode.from_char('`')

# --- 1. AI Model Loading (in a separate thread to not freeze UI) ---
print("Loading model... This may take a moment.")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
print("Model loaded successfully.")

# --- 2. Tool: Shell Command Executor ---
def execute_shell_command(command: str, working_dir: str):
    try:
        # Use shell=True for better compatibility with Windows built-in commands
        # and to handle complex commands without shlex.
        result = subprocess.run(
            command,
            capture_output=True,
            text=True,
            shell=True,
            cwd=working_dir
        )
        return {
            "stdout": result.stdout,
            "stderr": result.stderr,
            "returncode": result.returncode,
        }
    except Exception as e:
        return {"stdout": "", "stderr": str(e), "returncode": 1}

# --- 3. The Main Application Window ---
class ChatWindow(ctk.CTk):
    def __init__(self, working_dir):
        super().__init__()

        self.working_dir = working_dir
        self.title(f"Local AI Assistant - CWD: {self.working_dir}")
        self.geometry("700x500")

        self.grid_columnconfigure(0, weight=1)
        self.grid_rowconfigure(0, weight=1)

        # Output Textbox
        self.output_textbox = ctk.CTkTextbox(self, state="disabled", wrap="word")
        self.output_textbox.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")

        # Input Entry
        self.input_entry = ctk.CTkEntry(self, placeholder_text="Type your task here and press Enter...")
        self.input_entry.grid(row=1, column=0, padx=10, pady=10, sticky="ew")
        self.input_entry.bind("<Return>", self.start_agent_task)

        self.is_minimized = True # Start hidden
        self.withdraw()

    def add_message(self, message_type: str, content: str):
        # This method ensures UI updates happen on the main thread
        self.output_textbox.configure(state="normal")
        self.output_textbox.insert("end", f"[{message_type}]\n{content}\n\n")
        self.output_textbox.configure(state="disabled")
        self.output_textbox.see("end")

    def toggle_visibility(self):
        if self.is_minimized:
            self.deiconify() # Show the window
            self.attributes('-topmost', 1) # Bring to front
            self.focus()
            self.attributes('-topmost', 0)
        else:
            self.withdraw() # Hide the window
        self.is_minimized = not self.is_minimized

    def start_agent_task(self, event=None):
        task = self.input_entry.get()
        if not task:
            return
        self.input_entry.delete(0, "end")
        self.after(0, self.add_message, "User", task)

        # Run the agent in a separate thread to avoid freezing the UI
        agent_thread = threading.Thread(target=self.run_agent, args=(task,))
        agent_thread.start()

    def run_agent(self, task: str):
        system_prompt = f"""You are a helpful AI assistant that executes shell commands on Windows in the directory '{self.working_dir}'.
You can use the `execute_shell_command(command)` function.
Based on the output, decide the next step.
When finished, respond with "DONE" and a summary.

Example:
User: List all files in the current folder.
Assistant: I need to list files. I will use the `dir` command.
<execute_shell_command>dir</execute_shell_command>
<observation>
{{"stdout": " Volume in drive C is OS...", "stderr": "", "returncode": 0}}
</observation>
I have listed the files.
DONE: I have listed the files and folders in the current directory."""

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": task},
        ]

        for _ in range(5):  # Limit steps to prevent loops
            self.after(0, self.add_message, "Agent", "Thinking...")
            input_ids = tokenizer.apply_chat_template(
                conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt'
            )
            output_ids = model.generate(input_ids.to(model.device), max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
            response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

            self.after(0, self.add_message, "Agent Thought", response)

            if "DONE" in response:
                break

            if "<execute_shell_command>" in response and "</execute_shell_command>" in response:
                command = response.split("<execute_shell_command>")[1].split("</execute_shell_command>")[0].strip()
                self.after(0, self.add_message, "Executing", command)

                result = execute_shell_command(command, self.working_dir)
                observation_text = f'STDOUT:\n{result["stdout"]}\nSTDERR:\n{result["stderr"]}\nRETURN CODE: {result["returncode"]}'
                self.after(0, self.add_message, "Observation", observation_text)

                messages.append({"role": "assistant", "content": response})
                messages.append({"role": "user", "content": f"<observation>\n{str(result)}\n</observation>"})
            else:
                self.after(0, self.add_message, "Agent", "Could not determine a command. Stopping.")
                break

# --- 4. Main Execution Logic ---
def main():
    # Set the current working directory
    # If launched from the context menu, sys.argv[1] will be the folder path
    if len(sys.argv) > 1:
        current_dir = sys.argv[1]
        try:
            os.chdir(current_dir)
        except Exception as e:
            print(f"Failed to change directory to {current_dir}: {e}")
            current_dir = os.getcwd() # Fallback to script's dir
    else:
        current_dir = os.getcwd()

    print(f"Application starting in directory: {current_dir}")

    app = ChatWindow(working_dir=current_dir)

    def on_press(key):
        if key == HOTKEY:
            app.toggle_visibility()

    listener = keyboard.Listener(on_press=on_press)
    listener.start()

    app.mainloop()
    listener.stop()

if __name__ == "__main__":
    main()