diff --git a/ofxtools/header.py b/ofxtools/header.py index 166713f..38a7127 100644 --- a/ofxtools/header.py +++ b/ofxtools/header.py @@ -273,7 +273,8 @@ def parse_header(source: BinaryIO) -> Tuple[OFXHeaderType, str]: # OFX header is read by nice clean machines, not meatbags - # should not contain 💩, 漢字, or what have you. - line = source.readline().decode("ascii") + # However, the first line may contain the XML body, which CAN contain non-ascii. + line = source.readline().decode("ascii", "replace") if line.strip(): found_header = True break diff --git a/tests/test_header.py b/tests/test_header.py index a1dfce6..3de6f1a 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -474,6 +474,16 @@ def testParseHeaderV2NoNewlineBetweenHeaderAndBody(self): self.assertIsNone(root.text) self.assertEqual(len(root), 1) + def testParseHeaderV2NoNewlineBetweenHeaderAndBodyWithUnicode(self): + """OFXv2 may contain non-ascii characters in the first line, + if the header is not separated from the body by newlines.""" + ofxtools.header.parse_header(BytesIO( + b'' + b'' + b'Dummy unicode data: A:\xc3\x83\xe2\x80\x9e\xc3\x83\xc2\xa4, ' + b'O:\xc3\x83\xe2\x80\x93\xc3\x83\xc2\xb6, U:\xc3\x83\xc5\x93\xc3\x83\xc2\xbc, and SZ:\xc3\x83\xc5\xb8' + )) + def testParseInvalid(self): header = str(self.headerClass(self.defaultVersion)) with self.assertRaises(ofxtools.header.OFXHeaderError):