diff --git a/tests/test_xqueue_client.py b/tests/test_xqueue_client.py index 24cee9d..f57e6e7 100644 --- a/tests/test_xqueue_client.py +++ b/tests/test_xqueue_client.py @@ -3,6 +3,7 @@ import json import collections import requests +import requests.cookies import requests.exceptions from xqueue_watcher import client @@ -20,6 +21,8 @@ def __init__(self): self._fail = False self._url_checker = None self._open = True + self.cookies = requests.cookies.RequestsCookieJar() + self.headers = {} def close(self): self._open = False @@ -152,8 +155,17 @@ def login(url, response, session): self.session._url_checker = login reply = self.client.process_one() - req = self.session._requests[1] - self.assertTrue(req.url, 'TEST/xqueue/login/') + # _requests[0]: original GET /xqueue/get_submission/ → 302 + # _requests[1]: GET /xqueue/login/ (CSRF prefetch) + # _requests[2]: POST /xqueue/login/ + # _requests[3]: retry GET /xqueue/get_submission/ → 200 + get_csrf_req = self.session._requests[1] + self.assertEqual(get_csrf_req.method, 'get') + self.assertEqual(get_csrf_req.url, 'TEST/xqueue/login/') + post_login_req = self.session._requests[2] + self.assertEqual(post_login_req.method, 'post') + self.assertEqual(post_login_req.url, 'TEST/xqueue/login/') + self.assertEqual(post_login_req.kwargs['headers']['Referer'], 'TEST/xqueue/login/') self.assertTrue(reply) def test_bad_login(self): @@ -169,10 +181,158 @@ def login(url, response, session): self.session._url_checker = login reply = self.client.process_one() - req = self.session._requests[1] - self.assertTrue(req.url, 'TEST/xqueue/login/') + post_login_req = self.session._requests[2] + self.assertEqual(post_login_req.method, 'post') + self.assertEqual(post_login_req.url, 'TEST/xqueue/login/') self.assertFalse(reply) + def test_login_sets_referer_header(self): + """POST to login always includes Referer pointing at the login URL.""" + self.session._json = {'return_code': 0, 'msg': 'logged in'} + self.client._login() + post_req = self.session._requests[1] + self.assertEqual(post_req.method, 'post') + self.assertEqual(post_req.kwargs['headers']['Referer'], 'TEST/xqueue/login/') + + def test_login_sets_csrf_token_header(self): + """X-CSRFToken is set on the POST when the CSRF cookie is present.""" + def set_csrf_cookie(url, response, session): + if url.endswith('xqueue/login/'): + session.cookies.set('csrftoken', 'testcsrf123') + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'msg': 'logged in'} + self.session._url_checker = set_csrf_cookie + self.client._login() + post_req = self.session._requests[1] + self.assertEqual(post_req.method, 'post') + self.assertEqual(post_req.kwargs['headers']['X-CSRFToken'], 'testcsrf123') + + def test_login_sets_edx_csrf_token_header(self): + """X-CSRFToken falls back to the edx-csrftoken cookie name.""" + def set_edx_csrf_cookie(url, response, session): + if url.endswith('xqueue/login/'): + session.cookies.set('edx-csrftoken', 'edxcsrf456') + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'msg': 'logged in'} + self.session._url_checker = set_edx_csrf_cookie + self.client._login() + post_req = self.session._requests[1] + self.assertEqual(post_req.kwargs['headers']['X-CSRFToken'], 'edxcsrf456') + + def test_login_no_csrf_token_header_when_no_cookie(self): + """X-CSRFToken is omitted from the POST when no CSRF cookie is available.""" + self.session._json = {'return_code': 0, 'msg': 'logged in'} + self.client._login() + post_req = self.session._requests[1] + self.assertNotIn('X-CSRFToken', post_req.kwargs['headers']) + + def test_login_clears_stale_session(self): + """Stale cookies and X-CSRFToken header are cleared before the CSRF GET.""" + self.session._json = {'return_code': 0, 'msg': 'logged in'} + self.session.cookies.set('sessionid', 'stale_session') + self.session.headers['X-CSRFToken'] = 'stale_token' + self.client._login() + self.assertIsNone(self.session.cookies.get('sessionid')) + self.assertNotIn('X-CSRFToken', self.session.headers) + + def test_login_persists_csrf_in_session_headers(self): + """After a successful login, the session X-CSRFToken header is updated.""" + def set_cookie_on_post(url, response, session): + if url.endswith('xqueue/login/') and response.status_code == 200: + session.cookies.set('csrftoken', 'post_login_csrf') + response.json.return_value = {'return_code': 0} + + self.session._url_checker = set_cookie_on_post + self.client._login() + self.assertEqual(self.session.headers.get('X-CSRFToken'), 'post_login_csrf') + + def test_login_persists_referer_in_session_headers(self): + """After login, Referer is set in session headers so POST requests pass + Django's HTTPS Referer check (required by Django 4+ CSRF enforcement).""" + self.session._json = {'return_code': 0, 'msg': 'logged in'} + self.client._login() + self.assertEqual(self.session.headers.get('Referer'), self.client.xqueue_server) + + def test_reauth_only_once_per_request(self): + """Re-authentication is attempted at most once; persistent failures return False.""" + self.client.add_handler(self._simple_handler) + login_call_count = [0] + + def always_auth_error(url, response, session): + if url.endswith('xqueue/login/'): + login_call_count[0] += 1 + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'msg': 'logged in'} + else: + response.status_code = 403 + + self.session._url_checker = always_auth_error + self.session.status_code = 403 + + reply = self.client.process_one() + self.assertFalse(reply) + # _login() makes GET + POST = 2 requests to the login URL, but only once + self.assertEqual(login_call_count[0], 2) + + def test_reauth_on_401(self): + """A 401 response triggers re-authentication just like a 302 redirect.""" + self.client.add_handler(self._simple_handler) + self.session.status_code = 401 + + def login(url, response, session): + if url.endswith('xqueue/login/'): + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'msg': 'logged in'} + session.status_code = 200 + self.session._url_checker = login + + reply = self.client.process_one() + self.assertTrue(reply) + + def test_reauth_on_403(self): + """A 403 response (e.g. CSRF failure) triggers re-authentication.""" + self.client.add_handler(self._simple_handler) + self.session.status_code = 403 + + def login(url, response, session): + if url.endswith('xqueue/login/'): + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'msg': 'logged in'} + session.status_code = 200 + self.session._url_checker = login + + reply = self.client.process_one() + self.assertTrue(reply) + + def test_put_result_reauth_with_csrf(self): + """When put_result returns 403 (CSRF failure), re-auth obtains a fresh + CSRF token via the GET /xqueue/login/ endpoint and the retry succeeds.""" + put_result_attempts = [0] + + def handler(content): + return {'result': True} + self.client.add_handler(handler) + + def url_checker(url, response, session): + if url.endswith('xqueue/login/'): + session.cookies.set('csrftoken', 'fresh_csrf_token') + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'content': ''} + elif url.endswith('put_result/'): + put_result_attempts[0] += 1 + if put_result_attempts[0] == 1: + response.status_code = 403 + else: + response.status_code = 200 + response.json.return_value = {'return_code': 0, 'content': ''} + + self.session._url_checker = url_checker + result = self.client.process_one() + + self.assertTrue(result) + self.assertEqual(put_result_attempts[0], 2) + self.assertEqual(self.client.session.headers.get('X-CSRFToken'), 'fresh_csrf_token') + def test_post_back(self): def handler(content): return {'result': True} diff --git a/xqueue_watcher/client.py b/xqueue_watcher/client.py index fbae997..04fb0ca 100644 --- a/xqueue_watcher/client.py +++ b/xqueue_watcher/client.py @@ -83,6 +83,7 @@ def _parse_response(self, response, is_reply=True): def _request(self, method, uri, **kwargs): url = self.xqueue_server + uri r = None + reauthenticated = False while not r: try: r = self.session.request( @@ -102,14 +103,22 @@ def _request(self, method, uri, **kwargs): # Django can issue both a 302 to the login page and a # 301 if the original URL did not have a trailing / and # APPEND_SLASH is true in XQueue deployment, which is the default. - elif r.status_code in (301, 302): + # DRF returns 401/403 directly when the session is missing or expired. + elif r.status_code in (301, 302, 401, 403): + if reauthenticated: + # Already re-authenticated once; give up to avoid a login storm. + message = "Received {} after re-authentication, calling {}.".format( + r.status_code, url) + log.error(message) + return (False, message) + reauthenticated = True if self._login(): r = None else: return (False, "Could not log in") else: - message = "Received un expected response status code, {}, calling {}.".format( - r.status_code,url) + message = "Received unexpected response status code, {}, calling {}. Response: {}".format( + r.status_code, url, r.content) log.error(message) return (False, message) @@ -118,13 +127,62 @@ def _login(self): return True url = self.xqueue_server + '/xqueue/login/' log.debug("Trying to login to %s with user: %s", url, self.username) - response = self.session.request('post', url, auth=self.http_basic_auth, verify=_VERIFY_TLS, data={ - 'username': self.username, - 'password': self.password, - }) + # Clear any stale session/CSRF state so the GET arrives as an anonymous + # request. A stale session cookie causes DRF to return 403 on the GET, + # preventing us from obtaining a fresh CSRF cookie. + self.session.cookies.clear() + self.session.headers.pop('X-CSRFToken', None) + # GET the login page so Django sets the csrftoken cookie before we POST. + # edx-submissions exposes GET /xqueue/login/ for this purpose + # (openedx/edx-submissions#352). Older deployments that only allow POST + # will return 405; we log a warning and proceed without a token — the + # login POST is AllowAny so it is CSRF-exempt. + get_response = self.session.request( + 'get', + url, + auth=self.http_basic_auth, + timeout=self.requests_timeout, + verify=_VERIFY_TLS, + ) + if get_response.status_code != 200: + log.debug( + "Login CSRF prefetch returned %d from %s; " + "proceeding without CSRF token (older server?)", + get_response.status_code, url, + ) + csrf_token = ( + self.session.cookies.get('csrftoken') + or self.session.cookies.get('edx-csrftoken') + ) + login_headers = {'Referer': url} + if csrf_token: + login_headers['X-CSRFToken'] = csrf_token + response = self.session.request( + 'post', + url, + auth=self.http_basic_auth, + timeout=self.requests_timeout, + verify=_VERIFY_TLS, + headers=login_headers, + data={ + 'username': self.username, + 'password': self.password, + }, + ) if response.status_code != 200: log.error('Log in error %s %s', response.status_code, response.content) return False + # Persist the CSRF token and Referer in the session so every subsequent + # mutating request (put_result POST) automatically includes them. + # Django 4+ rejects HTTPS POST requests that carry no Referer or Origin + # header even when the CSRF token itself is correct. + csrf_token = ( + self.session.cookies.get('csrftoken') + or self.session.cookies.get('edx-csrftoken') + ) + self.session.headers.update({'Referer': self.xqueue_server}) + if csrf_token: + self.session.headers.update({'X-CSRFToken': csrf_token}) msg = response.json() log.debug("login response from %r: %r", url, msg) return msg['return_code'] == 0