Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
99ac125
chore: gitignore per-machine device-state cache + Appium client deps
IrenaRistova May 12, 2026
5d52d8c
core: AndroidRunner engine + BatteryManager plugin adjustments for th…
IrenaRistova May 12, 2026
ff86583
matrix: APK provenance + tracking-matrix updater + 3-window energy split
IrenaRistova May 12, 2026
3872cc3
hooks: crash/ANR detection so failed runs are isolated, not silently …
IrenaRistova May 12, 2026
0c14f3b
hooks: device-state controls + per-run discharge-validity snapshot (E…
IrenaRistova May 12, 2026
c4d3cb7
hooks: BATTERY_STATS auto-grant + before_experiment hook-chain assemb…
IrenaRistova May 12, 2026
dd5f562
hooks: per-app uninstall hooks for metronome / tipuous / repertoire /…
IrenaRistova May 12, 2026
9b28eb5
appium: generic dispatcher + per-app wrappers + monkey-only fallback …
IrenaRistova May 12, 2026
b1dd09f
aux: postprocess CPU + memory samples from AndroidRunner's android pr…
IrenaRistova May 12, 2026
420a34d
configs: per-device templates + per-(app, variant, device) experiment…
IrenaRistova May 12, 2026
cea2b3d
docs: experiment / template / hook / Espresso-mirror validation READMEs
IrenaRistova May 12, 2026
63b3d32
configs: add Pixel 3 sibling for the v2.1.1 Bangcle-packed Metronome …
IrenaRistova May 12, 2026
22c1631
configs: Documenter + LinkHub + tipuous Bangcle-on-Pixel-9 smoke configs
IrenaRistova May 12, 2026
8f1f870
configs: update app tagging for accurate tracking in after_experiment.py
IrenaRistova May 14, 2026
01dab94
hooks + configs for 7 new per-app modules + Option D bangcle pixel9 c…
IrenaRistova May 15, 2026
8ea1f7d
configs: add diaguard stub-Bangcle configuration for Pixel 9
IrenaRistova May 15, 2026
8bc577c
Add baseline and Bangcle configurations for various applications on P…
IrenaRistova May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,6 @@ AndroidRunner/Plugins/monsoon/script/monsoon_config.csv

# ignore any ouputs of experiments
output/

# Per-machine device-state capability cache (E0.T8) — keyed by adb serial
examples/batterymanager/Scripts/.device_state_capabilities/
16 changes: 13 additions & 3 deletions AndroidRunner/NativeExperiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,15 @@ def __init__(self, config, progress, restart):
self.autostart_subject = config.get('autostart_subject', True)
self.experiment_args = config.get('experiment_args', [0]) # Just a single argument, if none are specified
super(NativeExperiment, self).__init__(config, progress, restart)
# If True, the interaction script blocks for the full ``duration`` window itself (e.g. sysfs loop,
# or Appium subprocess hold — see interaction_appium_metronome.py). Skip the extra sleep below so
# the profiled window is not doubled.
self.interaction_covers_duration = bool(config.get('interaction_covers_duration', False))
self.pre_installed_apps = config.get('apps', [])
# When installing from ``paths``, the runner otherwise derives the package name from the APK
# filename (splitext basename). Obfuscated or packed builds often use unrelated filenames;
# set ``application_id`` to the manifest packageName (e.g. com.bobek.metronome).
self.application_id = config.get('application_id')
for apk in config.get('paths', []):
if not op.isfile(apk):
raise ConfigError('File %s not found' % apk)
Expand All @@ -33,9 +41,10 @@ def before_run_subject(self, device, path, *args, **kwargs):
else:
filename = op.basename(path)
self.logger.info('APK: %s' % filename)
if filename not in device.get_app_list():
pkg = self.application_id or op.splitext(filename)[0]
if pkg not in device.get_app_list():
device.install(path)
self.package = op.splitext(op.basename(path))[0]
self.package = pkg

def get_run_count(self):
return self.repetitions * len(self.experiment_args)
Expand All @@ -53,7 +62,8 @@ def start_profiling(self, device, path, run, *args, **kwargs):

def interaction(self, device, path, run, *args, **kwargs):
super(NativeExperiment, self).interaction(device, path, run, *args, **kwargs)
time.sleep(self.duration)
if not self.interaction_covers_duration:
time.sleep(self.duration)

def after_run(self, device, path, run, *args, **kwargs):
self.before_close(device, path, run)
Expand Down
38 changes: 23 additions & 15 deletions AndroidRunner/Plugins/android/Android.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,30 @@ def start_profiling(self, device, **kwargs):
def get_data(self, device, app):
"""Runs the profiling methods every self.interval seconds in a separate thread"""
self.lock.acquire()
if not self.profile:
try:
if not self.profile:
return
start = timeit.default_timer()
try:
device_time = device.shell('date -u')
row = [device_time]
if 'cpu' in self.data_points:
row.append(self.get_cpu_usage(device))
if 'mem' in self.data_points:
row.append(self.get_mem_usage(device, app))
self.data.append(row)
except Exception:
# Transient errors (e.g. "No process found for <pkg>" during a brief
# terminate_app+activate_app recovery in the interaction script) must
# NOT leak the lock — otherwise stop_profiling() will deadlock and
# teardown hangs forever. Swallow and let the next Timer retry.
pass
end = timeit.default_timer()
interval = max(float(0), self.interval - max(0, int(end - start)))
finally:
self.lock.release()
return
start = timeit.default_timer()
device_time = device.shell('date -u')
row = [device_time]
if 'cpu' in self.data_points:
row.append(self.get_cpu_usage(device))
if 'mem' in self.data_points:
row.append(self.get_mem_usage(device, app))
self.data.append(row)
end = timeit.default_timer()
# timer results could be negative
interval = max(float(0), self.interval - max(0, int(end - start)))
self.lock.release()
threading.Timer(interval, self.get_data, args=(device, app)).start()
if self.profile:
threading.Timer(interval, self.get_data, args=(device, app)).start()

def stop_profiling(self, device, **kwargs):
self.lock.acquire()
Expand Down
9 changes: 7 additions & 2 deletions AndroidRunner/Plugins/batterymanager/Batterymanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,12 @@ def calculate_power(df):

@staticmethod
def trapezoid_method(df):
return np.trapz(df['power'].values, df['Timestamp'].values)
# NumPy 2.0+ renamed trapz to trapezoid; keep both for venvs on 1.x and 2.x
y, x = df['power'].values, df['Timestamp'].values
trapezoid = getattr(np, "trapezoid", None) or getattr(np, "trapz", None)
if trapezoid is None:
raise RuntimeError("NumPy trapezoidal integration API not found")
return trapezoid(y, x)

@staticmethod
def aggregate_batterymanager_runs(logs_dir):
Expand All @@ -211,7 +216,7 @@ def aggregate_batterymanager_runs(logs_dir):

runs = pd.concat([runs, pd.DataFrame(stats, index=[0])], ignore_index=True)

runs = runs.drop(columns=['Timestamp', 'power'], axis=1)
runs = runs.drop(columns=['Timestamp', 'power'], errors='ignore')
return runs

@staticmethod
Expand Down
5 changes: 4 additions & 1 deletion devices.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@
"GalaxyJ7-W": "192.168.0.106:5555",

"Nexus 4": "emulator-5554",
"Pixel 6": "emulator-5554"
"Pixel 3": "89WX0HWVF",
"Pixel 6": "18131FDF6002S9",
"Pixel 9": "56040DLAQ0027U",
"Pixel 9-W": "10.15.10.93:5555"
}
100 changes: 100 additions & 0 deletions examples/batterymanager/ESPRESSO_MIRROR_VALIDATION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Espresso vs Appium “espresso_mirror” workload — validation matrix

This document supports **thesis / committee validation**: it states how closely the Android Runner
Appium workload (`APPIUM_WORKLOAD=espresso_mirror`, hook
`Scripts/interaction_appium_metronome_espresso_mirror.py`) relates to the **upstream** Metronome
**Espresso** suite.

**Source of truth (Espresso)**
- `app/src/androidTest/java/com/bobek/metronome/InstrumentedTest.kt`
- `app/src/androidTest/java/com/bobek/metronome/AbstractAndroidTest.kt` (shared `R.id` helpers, `applyTempo`, `verifyTempoMarking`)

**Appium implementation**
- `Scripts/interaction_appium_metronome.py` — function `run_espresso_mirror_workload` (and helpers such as `_type_numeric_edit_by_id`, `_ESPRESSO_TEMPO_MARKING_WALK`, UI pulse fillers)

---

## 1. Methodology difference (all scenarios)

| Aspect | Espresso (`androidTest`) | Appium (black-box) |
|--------|-------------------------|-------------------|
| **APK** | Typically debug + **test runner** on device/emulator | Same **installable** app id (`com.bobek.metronome`) as user experiments (debug / R8 / …) |
| **Selectors** | `withId(R.id.*)`, `SliderUtils.setValue`, Hamcrest | `find_element(ID, "package:id/…")`, `UiAutomator` text/description for pulse |
| **Tempo / beats / subdivisions** | Often **slider** `setValue` then **assert** edit + slider | We **type into `*_edit` fields** (equivalent end-state if UI sync matches Espresso’s coupling tests; **not** identical gestures for energy) |
| **Assertions** | Strict (`matches(withText(…))`, `displaysError()`, slider `withValue`) | **Soft / logging**: booleans in `appium_workload_coverage.jsonl`, optional substring checks on `tempo_marking_text` |
| **Error tests** | Assert `TextInputLayout` shows error | **“Touch only”**: enter invalid text then restore — **does not** assert error drawable/state |

**Validation claim:** The Appium suite is **scenario-aligned** (same screens and IDs, same numeric journeys where possible), **not** a byte-for-byte replay of Espresso gestures or assertions.

---

## 2. Scenario-by-scenario comparison

### Legend

- **Close** — Same logical steps and views; gesture path may differ (edit vs slider).
- **Partial** — Subset of Espresso steps, or substring check instead of exact `withText(R.string.…)`.
- **Touch-only** — Same inputs as Espresso’s “bad value” phase; **no** Espresso-style error UI assertion.
- **Not implemented** — No dedicated Appium scenario (may overlap indirectly).

| Espresso `@Test` (InstrumentedTest) | Appium `scenario_name` | Relationship | Notes |
|------------------------------------|-------------------------|--------------|--------|
| `contentVisible` | *(none)* | **Not implemented** | Espresso checks `loading_indicator` gone + `content` visible. Appium uses `await_app_ready` (FAB / strings), not the same view IDs. |
| `initialState` | `initialState` | **Partial / Close** | Espresso: sliders → 4, 1, `applyTempo(80)` + **many** `check()` on sliders/edits/markings. Appium: types `4`, `1`, `80` into edits + checks **Andante** substring on `tempo_marking_text`. Does **not** assert slider `withValue`. |
| `beatsSliderAndEditReflectEachOther` | `beatsReflect` | **Close** | Espresso: slider 1 → edit shows `1` → `replaceText("2")` → slider `withValue(2)`. Appium: `beats_edit` `1` → `2` only (skips explicit slider drag). |
| `subdivisionsSliderAndEditReflectEachOther` | `subdivisionsReflect` | **Close** | Same pattern as beats; edit-only path. |
| `tempoSliderAndEditReflectEachOther` | `tempoReflect` | **Close** | Espresso: slider 30 → edit `30` → `replaceText("40")`. Appium: `tempo_edit` `30` → `40`. |
| `tempoMarkings` | `tempoMarkingsWalk` | **Partial** | Espresso: **18** `applyTempo` / `verifyTempoMarking` pairs (exact string resource match). Appium: **8** tempo checkpoints with **English** substring defaults (`_ESPRESSO_TEMPO_MARKING_WALK`). Missing intermediate tempos (e.g. 59, 65, 75, 107, 119, 167, 169, 252). Env vars `METRONOME_TEMPO_MARKING_*_SUBSTR` for locale. |
| `beatsErrorWhenValueTooBig` | `invalidBeatsReset` | **Touch-only** | Espresso: slider 1, type `9`, **assert** `beats_edit_layout` error + slider unchanged. Appium: `1` → `9` → restore `4`; **no** layout error assertion. |
| `beatsErrorWhenValueNotANumber` | `beatsErrorNonNumericTouch` | **Touch-only** | Espresso: type `.`, assert error. Appium: `1` → `.` → restore `4`. |
| `subdivisionsErrorWhenValueTooBig` | `subdivisionsErrorTooBigTouch` | **Touch-only** | Espresso: subdivisions `5` invalid from base 1. Appium: `1` → `5` → restore `2` (same restore shape as our suite, not necessarily Espresso’s implied “2”). |
| `subdivisionsErrorWhenValueNotANumber` | *(none)* | **Not implemented** | Could add mirror of `.` + restore if needed. |
| `tempoErrorWhenValueTooBig` | `tempoErrorTooBigTouch` | **Touch-only** | Espresso: tempo 30, `253`, assert error on layout. Appium: `30` → `253` → restore `80`. |
| `tempoErrorWhenValueNotANumber` | *(none)* | **Not implemented** | Same gap as subdivisions non-number for tempo field. |

---

## 3. What Appium adds that Espresso does not define as `@Test`

| Mechanism | Purpose |
|-----------|---------|
| **`_espresso_mirror_ui_pulse_until_near_deadline`** | Keeps **continuous UI** (FAB, tempo ±, tick viz, tap tempo, swipes) between suite rounds until ~8s before workload deadline — fills JSON `duration` with interaction. |
| **`_espresso_mirror_ui_pulse_final_gap`** | Uses last ~second(s) before deadline. |
| **Baseline workload** (`APPIUM_WORKLOAD=metronome`) | Separate longer structured tap loop — **not** Espresso-mapped; documented elsewhere. |

These are **energy / workload saturations**, not claims of Espresso parity.

---

## 4. Summary counts (for validation slides)

| Category | Count |
|----------|-------|
| Espresso `@Test` methods in `InstrumentedTest` | **13** |
| Named Appium espresso_mirror scenarios per suite round | **9** |
| Espresso tests with a **dedicated** Appium analogue (full / partial / touch-only) | **9** of **13** (`contentVisible`, `subdivisionsErrorWhenValueNotANumber`, `tempoErrorWhenValueNotANumber` have **no** dedicated scenario) |
| `tempoMarkings` | Espresso **18** checkpoints → Appium **8** tempo stops in `_ESPRESSO_TEMPO_MARKING_WALK` (subset) |

---

## 5. Suggested wording for a thesis / defence

> We automated the **same Metronome UI surfaces** addressed in `InstrumentedTest`, using **resource IDs**
> and **edit fields** on the **same APK** used in energy experiments. Where Espresso asserts internal
> slider positions and `TextInputLayout` errors, our black-box driver records **success booleans** and
> **optional** marking substrings. **Gesture paths** differ (typing edits vs `SliderUtils.setValue`)
> but align with the app’s **slider–edit coupling** tests as behavioural intent. **Continuous tap/swipe**
> fillers occupy remaining experiment time without claiming equivalence to a specific Espresso `@Test`.

---

## 6. References (paths in this repo workspace)

- Espresso:
`app_repositories_newest/app_repositories/Kr0oked_Metronome/app/src/androidTest/java/com/bobek/metronome/InstrumentedTest.kt`
- Appium mirror:
`android-runner/examples/batterymanager/Scripts/interaction_appium_metronome.py` (`run_espresso_mirror_workload`)
- Thin hook:
`android-runner/examples/batterymanager/Scripts/interaction_appium_metronome_espresso_mirror.py`

*Generated for validation; update this file if scenarios or `_ESPRESSO_TEMPO_MARKING_WALK` change.*
Loading