Skip to content

Commit fd492ae

Browse files
Merge pull request #3179 from Alibehram11/master
Revise README for Jarvis with detailed features and setup
2 parents 89f8daa + d0cab73 commit fd492ae

15 files changed

Lines changed: 932 additions & 28 deletions

JARVIS/README.md

Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,74 @@
1-
# JARVIS
2-
patch-5<br>
3-
It can Control windows programs with your voice.<br>
4-
What can it do:
5-
1. It can tell you time.<br/>
6-
2. It can open, These of the following:-<br/>a.) Notepad<br/>
7-
b.) Calculator<br/>
8-
c.) Sticky Note<br/>
9-
d.) PowerShell<br/>
10-
e.) MS Paint<br/>
11-
f.) cmd<br/>
12-
g.) Browser (Internet Explorer)<br/>
13-
14-
It will make your experience better while using the Windows computer.
15-
===========================================================================
16-
It demonstrates Controlling windows programs with your voice.
1+
# Jarvis Local Desktop Assistant
2+
3+
Jarvis is a local voice-first desktop assistant for Windows. It talks through the terminal, uses LM Studio on `localhost`, and can safely open apps, close visible windows, open websites, and search Google.
4+
5+
## Features
6+
7+
- Voice-first terminal assistant with optional type mode.
8+
- Understands Turkish or English input, answers in English.
9+
- Developer mode for prompts, raw model output, and token usage when LM Studio reports it.
10+
- Safe app launcher using Start Menu/Desktop/Programs indexes.
11+
- Explicit memory only: Jarvis remembers notes only when you say `remember that ...`.
12+
- Extra tools: `/help` and `/apps <name>` in type mode.
13+
14+
## Safety
15+
16+
Jarvis does not run arbitrary shell commands from the model. AI output is restricted to safe actions like:
17+
18+
- `open_app:<name>`
19+
- `open_web:<site-or-url>`
20+
- `search_google:<query>`
21+
- `close_app:<window-name>`
22+
- `open_cmd`
23+
- `chat`
24+
25+
Blocked intents include install, uninstall, delete, remove, update, download, edit, modify, registry, PowerShell, terminal, scripts, screenshots, recordings, and email.
26+
27+
## Setup
28+
29+
1. Start LM Studio.
30+
2. Load `google/gemma-3-4b`.
31+
3. Start the LM Studio local server at:
32+
33+
```text
34+
http://localhost:1234/v1
35+
```
36+
37+
4. Install Python dependencies if needed:
38+
39+
```powershell
40+
python -m pip install -r requirements.txt
41+
```
42+
43+
## Run
44+
45+
Voice mode:
46+
47+
```powershell
48+
python .\jarvis.py
49+
```
50+
51+
Type mode:
52+
53+
```powershell
54+
python .\jarvis.py --type
55+
```
56+
57+
Or double-click:
58+
59+
```text
60+
start_jarvis_agent.bat
61+
```
62+
63+
## Useful Commands
64+
65+
- `developer mode` or `development mode` - show prompts/raw outputs/token usage.
66+
- `normal mode` - hide developer details.
67+
- `remember that my favorite editor is VS Code` - save an explicit memory note.
68+
- `clear memory` - clear saved notes.
69+
- `/apps code` - list matching indexed apps in type mode.
70+
- `/help` - show command help.
71+
72+
## Notes
73+
74+
The microphone feature uses Google speech recognition through `SpeechRecognition`. This is the one privacy tradeoff in the current version. LM Studio model calls stay on localhost.

JARVIS/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"""Jarvis local desktop assistant."""
2+

JARVIS/actions.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import importlib
2+
import os
3+
import webbrowser
4+
from urllib.parse import quote_plus, urlparse
5+
6+
from .ai import ask_model, classify_action
7+
from .apps import find_application
8+
from .config import CMD_OPEN_PHRASES, KNOWN_SITES
9+
from .memory import remember_note
10+
from .safety import BLOCKED_APPS, is_dangerous_request
11+
from .text_utils import normalize_text
12+
13+
try:
14+
win32con = importlib.import_module("win32con")
15+
win32gui = importlib.import_module("win32gui")
16+
except ImportError:
17+
win32con = None
18+
win32gui = None
19+
20+
21+
def open_application(name):
22+
app = find_application(name)
23+
if not app:
24+
return f"I could not find a safe installed app named {name}."
25+
path = app["path"]
26+
os.startfile(path)
27+
return f"Opening {app['name']}."
28+
29+
30+
def close_visible_window(name):
31+
if win32gui is None or win32con is None:
32+
return "Close is not available because pywin32 is missing."
33+
wanted = normalize_text(name)
34+
if not wanted or wanted in BLOCKED_APPS:
35+
return "That close request is blocked for safety."
36+
matches = []
37+
38+
def callback(hwnd, _):
39+
if not win32gui.IsWindowVisible(hwnd):
40+
return
41+
title = win32gui.GetWindowText(hwnd)
42+
if wanted in normalize_text(title):
43+
matches.append((hwnd, title))
44+
45+
win32gui.EnumWindows(callback, None)
46+
if not matches:
47+
return f"I could not find an open window matching {name}."
48+
hwnd, title = matches[0]
49+
win32gui.PostMessage(hwnd, win32con.WM_CLOSE, 0, 0)
50+
return f"Closing {title or name}."
51+
52+
53+
def is_safe_url(url):
54+
parsed = urlparse(url)
55+
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
56+
57+
58+
def open_web_target(target):
59+
cleaned = normalize_text(target)
60+
url = KNOWN_SITES.get(cleaned)
61+
if not url and "." in cleaned:
62+
url = target if target.startswith(("http://", "https://")) else f"https://{target}"
63+
if not url or not is_safe_url(url):
64+
return "I can only open safe web addresses."
65+
webbrowser.open(url)
66+
return f"Opening {url}."
67+
68+
69+
def run_action(action):
70+
if not action:
71+
return ""
72+
action = action.strip()
73+
lowered = action.lower()
74+
if lowered == "open_cmd":
75+
os.startfile("cmd.exe")
76+
return "Opening Command Prompt."
77+
if lowered == "blocked" or is_dangerous_request(action):
78+
return "I cannot do that for safety."
79+
if lowered.startswith("open_web:"):
80+
return open_web_target(action.split(":", 1)[1].strip())
81+
if lowered.startswith("search_google:"):
82+
query = action.split(":", 1)[1].strip().strip("<> ")
83+
if not query or is_dangerous_request(query):
84+
return "I cannot search that for safety."
85+
webbrowser.open(f"https://www.google.com/search?q={quote_plus(query)}")
86+
return f"Searching Google for {query}."
87+
if lowered.startswith("open_app:"):
88+
return open_application(action.split(":", 1)[1].strip())
89+
if lowered.startswith("close_app:"):
90+
return close_visible_window(action.split(":", 1)[1].strip())
91+
if lowered == "chat":
92+
return ""
93+
return ""
94+
95+
96+
def rule_based_action(text):
97+
cleaned = normalize_text(text)
98+
if not cleaned:
99+
return ""
100+
if cleaned in CMD_OPEN_PHRASES:
101+
return "open_cmd"
102+
if is_dangerous_request(cleaned):
103+
return "blocked"
104+
105+
search_prefixes = ["search for ", "google search ", "look up ", "find ", "ara ", "google da ara "]
106+
for prefix in search_prefixes:
107+
if cleaned.startswith(prefix):
108+
query = cleaned.removeprefix(prefix).strip()
109+
return f"search_google:{query}" if query else ""
110+
if cleaned.endswith(" ara"):
111+
query = cleaned[: -len(" ara")].strip()
112+
return f"search_google:{query}" if query else ""
113+
114+
close_prefixes = ["close ", "kapat ", "close the ", "can you close "]
115+
for prefix in close_prefixes:
116+
if cleaned.startswith(prefix):
117+
app = cleaned.removeprefix(prefix).strip()
118+
return f"close_app:{app}" if app else ""
119+
if cleaned.endswith(" kapat"):
120+
app = cleaned[: -len(" kapat")].strip()
121+
return f"close_app:{app}" if app else ""
122+
123+
open_prefixes = ["open ", "launch ", "start ", "can you open ", "please open ", "ac ", "aç "]
124+
suffix_open_words = [" ac", " aç", " i ac", " i aç", " u ac", " u aç"]
125+
for site, url in KNOWN_SITES.items():
126+
if cleaned in {site, f"open {site}", f"{site} ac", f"{site} aç"}:
127+
return f"open_web:{url}"
128+
for prefix in open_prefixes:
129+
if cleaned.startswith(prefix):
130+
target = cleaned.removeprefix(prefix).strip()
131+
if target in KNOWN_SITES:
132+
return f"open_web:{target}"
133+
return f"open_app:{target}" if target else ""
134+
for suffix in suffix_open_words:
135+
if cleaned.endswith(suffix):
136+
target = cleaned[: -len(suffix)].strip()
137+
if target in KNOWN_SITES:
138+
return f"open_web:{target}"
139+
return f"open_app:{target}" if target else ""
140+
return ""
141+
142+
143+
def handle_user_text(text):
144+
cleaned = normalize_text(text)
145+
if cleaned.startswith("remember that "):
146+
return remember_note(text.split("remember that", 1)[1].strip())
147+
action = rule_based_action(text) or classify_action(text)
148+
if action.strip().lower() == "blocked" and not is_dangerous_request(text):
149+
action = "chat"
150+
answer = run_action(action)
151+
if answer:
152+
return answer
153+
return ask_model(text)

JARVIS/ai.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from openai import OpenAI, OpenAIError
2+
3+
from . import state
4+
from .config import MAX_OUTPUT_TOKENS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MODEL
5+
from .memory import memory_context
6+
from .prompts import ACTION_CLASSIFIER_PROMPT, ASSISTANT_PROMPT
7+
from .text_utils import clean_assistant_output
8+
9+
10+
def lm_client():
11+
return OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
12+
13+
14+
def usage_dict(response):
15+
usage = getattr(response, "usage", None)
16+
if usage is None:
17+
return {}
18+
if hasattr(usage, "model_dump"):
19+
return usage.model_dump()
20+
if isinstance(usage, dict):
21+
return usage
22+
return {
23+
name: getattr(usage, name)
24+
for name in ("input_tokens", "output_tokens", "total_tokens")
25+
if hasattr(usage, name)
26+
}
27+
28+
29+
def debug_response(kind, prompt, response):
30+
state.debug(f"{kind} model", OPENAI_MODEL)
31+
state.debug(f"{kind} prompt", prompt)
32+
state.debug(f"{kind} raw output", getattr(response, "output_text", ""))
33+
state.debug(f"{kind} usage", usage_dict(response) or "not reported by server")
34+
35+
36+
def ask_model(text):
37+
prompt = (
38+
f"{ASSISTANT_PROMPT}\n"
39+
f"Saved memory:\n{memory_context()}\n\n"
40+
f"Answer in English.\n"
41+
f"User: {text}"
42+
)
43+
try:
44+
response = lm_client().responses.create(
45+
model=OPENAI_MODEL,
46+
input=prompt,
47+
max_output_tokens=MAX_OUTPUT_TOKENS,
48+
)
49+
except OpenAIError as exc:
50+
state.debug("chat error", str(exc))
51+
return "I cannot reach LM Studio right now. Start the local server and try again."
52+
debug_response("chat", prompt, response)
53+
return clean_assistant_output(response.output_text)
54+
55+
56+
def classify_action(text):
57+
prompt = f"{ACTION_CLASSIFIER_PROMPT}\nUser: {text}"
58+
try:
59+
response = lm_client().responses.create(model=OPENAI_MODEL, input=prompt, max_output_tokens=40)
60+
except OpenAIError as exc:
61+
state.debug("action error", str(exc))
62+
return "chat"
63+
debug_response("action", prompt, response)
64+
return response.output_text.strip().splitlines()[0].strip()

0 commit comments

Comments
 (0)