Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ Oobee can perform the following to scan the target URL.

- To **run** Oobee in **terminal**, run `npm start`. Questions will be prompted to assist you in providing the right inputs.
- Results will be compiled in JSON format, followed by generating a HTML report.
- **NEW**: Automatic detection and neutralization of accessibility overlays (UserWay, accessiBe, etc.) for accurate baseline testing. See [OVERLAY-DETECTION.md](./OVERLAY-DETECTION.md) for details.

> NOTE: For your initial scan, there may be some loading time required before use. Oobee will also ask for your name and email address and collect your app usage data to personalise your experience. Your information fully complies with [GovTechs Privacy Policy](https://www.tech.gov.sg/privacy/).
> NOTE: For your initial scan, there may be some loading time required before use. Oobee will also ask for your name and email address and collect your app usage data to personalise your experience. Your information fully complies with [GovTech's Privacy Policy](https://www.tech.gov.sg/privacy/).

#### Delete/Edit Details

Expand Down
57 changes: 42 additions & 15 deletions src/constants/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,37 @@ const checkUrlConnectivityWithBrowser = async (
try {
const page = await browserContext.newPage();

// Apply stealth techniques to bypass bot detection
await page.addInitScript(() => {
// Remove webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});

// Override plugins to make it look real
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});

// Override languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en'],
});

// Mock chrome object
(window as any).chrome = {
runtime: {},
};

// Override permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters: any) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: 'denied' } as PermissionStatus) :
originalQuery(parameters)
);
});

// Block native Chrome download UI
try {
const cdp = await browserContext.newCDPSession(page as any);
Expand All @@ -401,24 +432,20 @@ const checkUrlConnectivityWithBrowser = async (
try {
await page.waitForLoadState('networkidle', { timeout: 8000 });
} catch {
consoleLogger.info('networkidle not reached; proceeding with verification GET');
consoleLogger.info('networkidle not reached; proceeding with page response');
}

// STEP 3: Verify final URL with a GET (follows redirects)
// STEP 3: Get final URL and status from the page navigation
// Note: We skip the verification GET because some sites block API requests
// but allow browser navigation (403 on fetch but 200 on browser navigation)
const finalUrl = page.url();
let verifyResp = response;
try {
verifyResp = await page.request.fetch(finalUrl, {
method: 'GET',
headers: extraHTTPHeaders,
});
} catch (e) {
consoleLogger.info(`Verification GET failed, falling back to navigation response: ${e.message}`);
}

// Prefer verification GET; fall back to nav response
const finalStatus = verifyResp?.status?.() ?? response?.status?.() ?? 0;
const headers = (verifyResp?.headers?.() ?? response?.headers?.()) || {};
const navigationStatus = response?.status?.() ?? 0;

consoleLogger.info(`Navigation to ${finalUrl} returned status: ${navigationStatus}`);

// Use navigation response directly
const finalStatus = navigationStatus;
const headers = response?.headers?.() || {};
contentType = headers['content-type'] || '';

if (!isAllowedContentType(contentType)) {
Expand Down
17 changes: 15 additions & 2 deletions src/constants/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,24 @@ export const guiInfoStatusTypes = {
DUPLICATE: 'duplicate',
};

let launchOptionsArgs: string[] = [];
let launchOptionsArgs: string[] = [
// Stealth options to bypass bot detection
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
'--disable-site-isolation-trials',
];

// Check if running in docker container
if (fs.existsSync('/.dockerenv')) {
launchOptionsArgs = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage'];
launchOptionsArgs = [
'--disable-gpu',
'--no-sandbox',
'--disable-dev-shm-usage',
// Keep stealth options in Docker too
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
'--disable-site-isolation-trials',
];
}

export const impactOrder = {
Expand Down
45 changes: 44 additions & 1 deletion src/crawlers/commonCrawlerFunc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import type { Response as PlaywrightResponse } from 'playwright';
import fs from 'fs';
import { getStoragePath } from '../utils.js';
import path from 'path';
import { detectOverlaysInDom, type OverlayDetection } from '../overlays/overlayDetector.js';
import { scrubOverlaysOnPage } from '../overlays/overlayNeutralizer.js';

// types
interface AxeResultsWithScreenshot extends AxeResults {
Expand Down Expand Up @@ -72,6 +74,7 @@ type FilteredResults = {
needsReview: ResultCategory;
passed: ResultCategory;
actualUrl?: string;
overlayDetections?: OverlayDetection[];
};

const truncateHtml = (html: string, maxBytes = 1024, suffix = '…'): string => {
Expand Down Expand Up @@ -102,6 +105,7 @@ export const filterAxeResults = (
results: AxeResultsWithScreenshot,
pageTitle: string,
customFlowDetails?: CustomFlowDetails,
overlayDetections?: OverlayDetection[],
): FilteredResults => {
const { violations, passes, incomplete, url } = results;

Expand Down Expand Up @@ -230,6 +234,7 @@ export const filterAxeResults = (
goodToFix,
needsReview,
passed,
...(overlayDetections && overlayDetections.length > 0 && { overlayDetections }),
};
};

Expand All @@ -240,13 +245,15 @@ export const runAxeScript = async ({
customFlowDetails = null,
selectors = [],
ruleset = [],
crawler = null,
}: {
includeScreenshots: boolean;
page: Page;
randomToken: string;
customFlowDetails?: CustomFlowDetails;
selectors?: string[];
ruleset?: RuleFlags[];
crawler?: any;
}) => {
const browserContext: BrowserContext = page.context();
const requestUrl = page.url();
Expand Down Expand Up @@ -328,6 +335,42 @@ export const runAxeScript = async ({
const disableOobee = ruleset.includes(RuleFlags.DISABLE_OOBEE);
const enableWcagAaa = ruleset.includes(RuleFlags.ENABLE_WCAG_AAA);

consoleLogger.info(`[overlay-neutralizer] ═══════════════════════════════════════════════`);
consoleLogger.info(`[overlay-neutralizer] Starting overlay detection and neutralization`);
consoleLogger.info(`[overlay-neutralizer] Page URL: ${requestUrl}`);
consoleLogger.info(`[overlay-neutralizer] ═══════════════════════════════════════════════`);

// Get blocked overlays from network interception (if available)
const getBlockedOverlays = (crawler as any)?.__getBlockedOverlays;
const blockedOverlays = typeof getBlockedOverlays === 'function' ? getBlockedOverlays() : [];

// Detect overlays in DOM
const domDetections = await detectOverlaysInDom(page);

// Merge blocked overlays with DOM detections
const overlayDetections = [...blockedOverlays, ...domDetections];

// Log overlay detections
if (overlayDetections.length > 0) {
consoleLogger.info(
`[overlay-neutralizer] ⚠️ OVERLAYS FOUND: ${overlayDetections.map(d => d.vendor).join(', ')} on ${requestUrl}`,
);
overlayDetections.forEach(detection => {
consoleLogger.info(
`[overlay-neutralizer] - ${detection.vendor}: detected by ${detection.detectedBy.join(' & ')}, details: ${detection.details.join(', ')}`,
);
});
} else {
consoleLogger.info(`[overlay-neutralizer] ✓ No overlays detected on ${requestUrl}`);
}

// Scrub overlay DOM elements before running axe
await scrubOverlaysOnPage(page);

consoleLogger.info(`[overlay-neutralizer] ═══════════════════════════════════════════════`);
consoleLogger.info(`[overlay-neutralizer] Overlay neutralization complete, proceeding with axe scan`);
consoleLogger.info(`[overlay-neutralizer] ═══════════════════════════════════════════════`);

const gradingReadabilityFlag = await extractAndGradeText(page); // Ensure flag is obtained before proceeding

await playwrightUtils.injectFile(page, axeScript);
Expand Down Expand Up @@ -473,7 +516,7 @@ export const runAxeScript = async ({
}
}

return filterAxeResults(results, pageTitle, customFlowDetails);
return filterAxeResults(results, pageTitle, customFlowDetails, overlayDetections);
};

export const createCrawleeSubFolders = async (
Expand Down
48 changes: 46 additions & 2 deletions src/crawlers/crawlDomain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
shouldSkipClickDueToDisallowedHref,
shouldSkipDueToUnsupportedContent,
} from './commonCrawlerFunc.js';
import { attachOverlayNeutralization } from '../overlays/overlayNeutralizer.js';
import constants, {
UrlsCrawled,
blackListedFileExtensions,
Expand Down Expand Up @@ -381,7 +382,50 @@ const crawlDomain = async ({
requestQueue,
postNavigationHooks: [
async crawlingContext => {
const { page, request } = crawlingContext;
const { page, request, crawler } = crawlingContext;

// Apply stealth techniques to bypass bot detection
await page.addInitScript(() => {
// Remove webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});

// Override plugins to make it look real
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});

// Override languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en'],
});

// Mock chrome object
(window as any).chrome = {
runtime: {},
};

// Override permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters: any) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: 'denied' } as PermissionStatus) :
originalQuery(parameters)
);
});

// Attach overlay neutralization to the browser context on first page
// This is done per-page but context.route() calls are idempotent
const context = page.context();
if (context && typeof context.route === 'function') {
consoleLogger.info(`[overlay-neutralizer] 🔧 Attaching overlay neutralization to browser context for: ${page.url()}`);
const getBlockedOverlays = attachOverlayNeutralization(context);
// Store in crawler state so runAxeScript can access it
(crawler as any).__getBlockedOverlays = getBlockedOverlays;
} else {
consoleLogger.warn('[overlay-neutralizer] ⚠️ Unable to attach overlay neutralization - no context.route() available');
}

await page.evaluate(() => {
return new Promise(resolve => {
Expand Down Expand Up @@ -586,7 +630,7 @@ const crawlDomain = async ({
return;
}

const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset });
const results = await runAxeScript({ includeScreenshots, page, randomToken, ruleset, crawler });

if (isRedirected) {
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(
Expand Down
50 changes: 47 additions & 3 deletions src/crawlers/crawlSitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ import {
} from '../constants/common.js';
import { areLinksEqual, isWhitelistedContentType, register } from '../utils.js';
import { handlePdfDownload, runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
import { guiInfoLog } from '../logs.js';
import { guiInfoLog, consoleLogger } from '../logs.js';
import { ViewportSettingsClass } from '../combine.js';
import * as path from 'path';
import fsp from 'fs/promises';
import { attachOverlayNeutralization } from '../overlays/overlayNeutralizer.js';

const crawlSitemap = async ({
sitemapUrl,
Expand Down Expand Up @@ -146,7 +147,50 @@ const crawlSitemap = async ({
},
requestList,
postNavigationHooks: [
async ({ page }) => {
async ({ page, crawler }) => {
// Apply stealth techniques to bypass bot detection
await page.addInitScript(() => {
// Remove webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined,
});

// Override plugins to make it look real
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5],
});

// Override languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en'],
});

// Mock chrome object
(window as any).chrome = {
runtime: {},
};

// Override permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters: any) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: 'denied' } as PermissionStatus) :
originalQuery(parameters)
);
});

// Attach overlay neutralization to the browser context on first page
// This is done per-page but context.route() calls are idempotent
const context = page.context();
if (context && typeof context.route === 'function') {
consoleLogger.info(`[overlay-neutralizer] 🔧 Attaching overlay neutralization to browser context for: ${page.url()}`);
const getBlockedOverlays = attachOverlayNeutralization(context);
// Store in crawler state so runAxeScript can access it
(crawler as any).__getBlockedOverlays = getBlockedOverlays;
} else {
consoleLogger.warn('[overlay-neutralizer] ⚠️ Unable to attach overlay neutralization - no context.route() available');
}

try {
// Wait for a quiet period in the DOM, but with safeguards
await page.evaluate(() => {
Expand Down Expand Up @@ -313,7 +357,7 @@ const crawlSitemap = async ({
return;
}

const results = await runAxeScript({ includeScreenshots, page, randomToken });
const results = await runAxeScript({ includeScreenshots, page, randomToken, crawler });

guiInfoLog(guiInfoStatusTypes.SCANNED, {
numScanned: urlsCrawled.scanned.length,
Expand Down
Loading