Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,40 @@ and then you can start as many instances you want, each of them are isolated:
vix
```

### Remote control

`vixd` can optionally accept prompts from Telegram or WhatsApp and reply with the
agent's answer. Remote control is disabled by default and requires both a feature
flag and an allowlist in `~/.vix/settings.json`:

```json
{
"features": { "remote_control": true },
"remote_control": {
"enabled": true,
"cwd": "/absolute/project/path",
"telegram": {
"enabled": true,
"bot_token": "<telegram bot token>",
"allowed_chat_ids": ["123456789"]
},
"whatsapp": {
"enabled": true,
"access_token": "<whatsapp cloud api token>",
"app_secret": "<meta app secret>",
"phone_number_id": "<phone number id>",
"verify_token": "<webhook verify token>",
"webhook_addr": "127.0.0.1:1340",
"allowed_contacts": ["15551234567"]
}
}
}
```

Telegram uses bot long-polling. WhatsApp exposes `GET/POST /whatsapp` on
`webhook_addr` for Cloud API webhook verification and messages. Only allowlisted
chat IDs/contacts can control vix.

<div align="center">

## Why is vix faster and cheaper in plan mode?
Expand Down
12 changes: 12 additions & 0 deletions cmd/vixd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,18 @@ func main() {
server.RegisterHandler(cmd, handler)
}, cred, ctx)
daemon.RegisterToolHandlers(server)
if config.RemoteControlEnabled() {
remoteCfg, err := daemon.LoadRemoteControlConfig()
if err != nil {
log.Printf("remote control: config load failed: %v", err)
} else if remoteCfg.Enabled {
if err := server.StartRemoteControl(ctx, remoteCfg); err != nil {
log.Printf("remote control: disabled: %v", err)
}
}
} else {
log.Printf("remote control: disabled (features.remote_control=false or VIX_DISABLE_REMOTE_CONTROL)")
}
if *webPort > 0 && !*noMissionControl {
go daemon.StartWebServer(ctx, server, *webPort)
}
Expand Down
10 changes: 10 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@ func HooksEnabled() bool {
return feature("hooks", true)
}

// RemoteControlEnabled reads the remote_control feature flag. Defaults to false
// because chat-service control requires explicit credentials and sender allowlists.
// VIX_DISABLE_REMOTE_CONTROL is an emergency kill switch.
func RemoteControlEnabled() bool {
if v := os.Getenv("VIX_DISABLE_REMOTE_CONTROL"); v == "1" || v == "true" {
return false
}
return feature("remote_control", false)
}

// JobsMaxConcurrentRuns reads jobs.max_concurrent_runs from
// ~/.vix/settings.json. Returns 0 when absent/invalid, letting the scheduler
// apply its default.
Expand Down
22 changes: 21 additions & 1 deletion internal/config/defaults/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,27 @@
"read_claude_md": true,
"show_thinking": false,
"telemetry": true,
"tool_orchestrator": false
"tool_orchestrator": false,
"remote_control": false
},
"remote_control": {
"enabled": false,
"cwd": "",
"workflow": "",
"telegram": {
"enabled": false,
"bot_token": "",
"allowed_chat_ids": []
},
"whatsapp": {
"enabled": false,
"access_token": "",
"app_secret": "",
"phone_number_id": "",
"verify_token": "",
"webhook_addr": "127.0.0.1:1340",
"allowed_contacts": []
}
},
"elevenlabs": {
"agent_id": "agent_7501kqrztj1te17ssqz5wqpnvkf3",
Expand Down
251 changes: 251 additions & 0 deletions internal/daemon/remote_control.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package daemon

import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"

"github.com/get-vix/vix/internal/config"
"github.com/get-vix/vix/internal/protocol"
)

// RemoteControlConfig configures opt-in remote control from chat services.
type RemoteControlConfig struct {
Enabled bool `json:"enabled"`
CWD string `json:"cwd"`
Workflow string `json:"workflow,omitempty"`
Telegram TelegramRemoteConfig `json:"telegram,omitempty"`
WhatsApp WhatsAppRemoteConfig `json:"whatsapp,omitempty"`
}

type TelegramRemoteConfig struct {
Enabled bool `json:"enabled"`
BotToken string `json:"bot_token"`
AllowedChatIDs []string `json:"allowed_chat_ids"`
PollIntervalMs int `json:"poll_interval_ms,omitempty"`
}

type WhatsAppRemoteConfig struct {
Enabled bool `json:"enabled"`
AccessToken string `json:"access_token"`
AppSecret string `json:"app_secret"`
PhoneNumberID string `json:"phone_number_id"`
VerifyToken string `json:"verify_token"`
WebhookAddr string `json:"webhook_addr,omitempty"`
AllowedContacts []string `json:"allowed_contacts"`
}

type remoteReplyFunc func(ctx context.Context, text string) error

type remoteMessage struct {
Provider string
SenderID string
Text string
Reply remoteReplyFunc
}

type remoteHTTPClient interface {
Do(*http.Request) (*http.Response, error)
}

type remoteControl struct {
server *Server
cfg RemoteControlConfig
http remoteHTTPClient
}

func LoadRemoteControlConfig() (RemoteControlConfig, error) {
p := filepath.Join(config.HomeVixDir(), "settings.json")
data, err := os.ReadFile(p)
if err != nil {
if os.IsNotExist(err) {
return RemoteControlConfig{}, nil
}
return RemoteControlConfig{}, err
}
var cfg struct {
RemoteControl RemoteControlConfig `json:"remote_control"`
}
if err := json.Unmarshal(data, &cfg); err != nil {
return RemoteControlConfig{}, err
}
return cfg.RemoteControl, nil
}

func (s *Server) StartRemoteControl(ctx context.Context, cfg RemoteControlConfig) error {
if !cfg.Enabled {
return nil
}
if strings.TrimSpace(cfg.CWD) == "" {
return fmt.Errorf("remote control: missing cwd")
}
rc := &remoteControl{server: s, cfg: cfg, http: http.DefaultClient}
started := false
if cfg.Telegram.Enabled {
if err := rc.startTelegram(ctx); err != nil {
return err
}
started = true
}
if cfg.WhatsApp.Enabled {
if err := rc.startWhatsApp(ctx); err != nil {
return err
}
started = true
}
if !started {
return fmt.Errorf("remote control: enabled but no provider is enabled")
}
return nil
}

func (rc *remoteControl) handleMessage(ctx context.Context, msg remoteMessage) {
text := strings.TrimSpace(msg.Text)
if text == "" {
return
}
LogInfo("remote control: received %s message from %s", msg.Provider, msg.SenderID)
result, err := rc.server.runRemotePrompt(ctx, rc.cfg.CWD, rc.cfg.Workflow, text)
if err != nil {
result = "vix remote control error: " + err.Error()
LogError("remote control: %s", err)
}
if err := msg.Reply(ctx, result); err != nil {
LogError("remote control: reply to %s %s failed: %v", msg.Provider, msg.SenderID, err)
}
}

func remoteSessionAutomaticPermissions() (autoWrite, autoDirs bool) {
return false, false
}

func (s *Server) runRemotePrompt(ctx context.Context, cwd, workflow, prompt string) (string, error) {
runID := generateSessionID()
autoWrite, autoDirs := remoteSessionAutomaticPermissions()
session := NewSession(runID, s, nil, s.model, cwd, "", false, autoWrite, autoDirs, true, ctx)
session.origin = "vix"
session.trigger = &protocol.TriggerInfo{Type: "remote_control", Ref: "remote"}
session.title = "Remote control - " + time.Now().Format(jobTitleTimeFormat)

s.sessionMu.Lock()
s.sessions[runID] = session
s.sessionMu.Unlock()
s.broadcastSessionsChanged()
defer func() {
s.sessionMu.Lock()
delete(s.sessions, runID)
s.sessionMu.Unlock()
session.cancel()
s.broadcastSessionsChanged()
}()

go session.Run()

var startCmd protocol.SessionCommand
if workflow != "" {
data, _ := json.Marshal(protocol.SessionWorkflowData{Name: workflow, Text: prompt})
startCmd = protocol.SessionCommand{Type: "session.workflow", Data: data}
} else {
data, _ := json.Marshal(protocol.SessionInputData{Text: prompt})
startCmd = protocol.SessionCommand{Type: "session.input", Data: data}
}
if !session.pushCommand(ctx, startCmd) {
return "", fmt.Errorf("session refused start command")
}

var final strings.Builder
var hadError bool
var errMsg string
for {
select {
case ev := <-session.eventChan:
switch ev.Type {
case "event.stream_chunk":
final.WriteString(decodeRemoteEvent[protocol.EventStreamChunk](ev.Data).Text)
case "event.confirm_request", "event.user_question", "event.plan_proposed":
cmd, err := remoteCommandForUnattendedEvent(ev)
if err != nil {
session.persist()
return "", err
}
session.pushCommand(ctx, cmd)
case "event.error":
hadError = true
errMsg = decodeRemoteEvent[protocol.EventError](ev.Data).Message
case "event.agent_done":
if hadError && strings.TrimSpace(final.String()) == "" {
return "", errors.New(errMsg)
}
session.persist()
return final.String(), nil
}
case <-ctx.Done():
session.persist()
return "", ctx.Err()
case <-session.ctx.Done():
if hadError && strings.TrimSpace(final.String()) == "" {
return "", errors.New(errMsg)
}
return final.String(), nil
}
}
}

func remoteCommandForUnattendedEvent(ev protocol.SessionEvent) (protocol.SessionCommand, error) {
switch ev.Type {
case "event.confirm_request":
data, _ := json.Marshal(protocol.SessionConfirmData{Approved: false})
return protocol.SessionCommand{Type: "session.confirm", Data: data}, nil
case "event.user_question":
return protocol.SessionCommand{}, fmt.Errorf("remote control requires an interactive answer")
case "event.plan_proposed":
return protocol.SessionCommand{}, fmt.Errorf("remote control requires interactive approval")
default:
return protocol.SessionCommand{}, fmt.Errorf("unsupported unattended event: %s", ev.Type)
}
}

func decodeRemoteEvent[T any](data any) T {
var out T
raw, _ := json.Marshal(data)
_ = json.Unmarshal(raw, &out)
return out
}

func authorizedRemoteID(id string, allowed []string) bool {
if len(allowed) == 0 {
return false
}
for _, v := range allowed {
if strings.TrimSpace(v) == id {
return true
}
}
return false
}

func postForm(ctx context.Context, hc remoteHTTPClient, endpoint string, form url.Values) error {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, strings.NewReader(form.Encode()))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
resp, err := hc.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return fmt.Errorf("remote provider returned %s: %s", resp.Status, strings.TrimSpace(string(b)))
}
return nil
}
Loading