From 67361985060fdf22c70c5ed30c8d3f537bb5f387 Mon Sep 17 00:00:00 2001 From: Evan Trimboli Date: Fri, 22 Aug 2025 11:41:43 +1000 Subject: [PATCH] Fix eof issue with seekWhile. Add helper function for cloning at different start/end. Add tests --- package-lock.json | 5 +- src/charstream.ts | 64 ++++---- test/charstream.spec.ts | 321 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+), 33 deletions(-) diff --git a/package-lock.json b/package-lock.json index 82be0a5..c42ebea 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,12 @@ { "name": "parsecco", - "version": "1.5.0", + "version": "1.5.2", "lockfileVersion": 2, "requires": true, "packages": { "": { - "version": "1.5.0", + "name": "parsecco", + "version": "1.5.2", "license": "BSD-2-Clause", "devDependencies": { "@types/chai": "^4.2.15", diff --git a/src/charstream.ts b/src/charstream.ts index 320702c..dc2d43a 100644 --- a/src/charstream.ts +++ b/src/charstream.ts @@ -33,6 +33,15 @@ export namespace CharUtil { } } + private withNewEndPos(newEndPos: number): CharStream { + const newHasEOF = newEndPos >= this.endpos && this.hasEOF; + return new CharStream(this.input, this.startpos, newEndPos, newHasEOF); + } + + private withNewStartPos(newStartPos: number): CharStream { + return new CharStream(this.input, newStartPos, this.endpos, this.hasEOF); + } + /** * Returns true of the end of the input has been reached. */ @@ -59,8 +68,7 @@ export namespace CharUtil { if (this.startpos + num >= this.endpos) { return this; } else { - const newHasEOF = this.startpos + num == this.endpos && this.hasEOF; - return new CharStream(this.input, this.startpos, this.startpos + num, newHasEOF); + return this.withNewEndPos(this.startpos + num); } } @@ -83,11 +91,10 @@ export namespace CharUtil { */ public seekWhile(pred: (char: string) => boolean): CharStream { let pos = this.startpos; - let end = this.endpos; - while (pos < end && pred(this.input.charAt(pos))) { + while (pos < this.endpos && pred(this.input.charAt(pos))) { pos++; } - return new CharStream(this.input, pos, end, pos == end); + return this.withNewStartPos(pos); } /** @@ -98,14 +105,10 @@ export namespace CharUtil { */ public seekWhileCharCode(okCodes: (n: number) => boolean): [CharStream, CharStream] { let pos = this.startpos; - let end = this.endpos; - while (pos < end && okCodes(this.input.charCodeAt(pos))) { + while (pos < this.endpos && okCodes(this.input.charCodeAt(pos))) { pos++; } - return [ - new CharStream(this.input, this.startpos, pos, pos == end), - new CharStream(this.input, pos, end, this.hasEOF), - ]; + return [this.withNewEndPos(pos), this.withNewStartPos(pos)]; } /** @@ -114,11 +117,8 @@ export namespace CharUtil { * @param num */ public seek(num: number): CharStream { - if (this.startpos + num > this.endpos) { - return new CharStream(this.input, this.endpos, this.endpos, this.hasEOF); - } else { - return new CharStream(this.input, this.startpos + num, this.endpos, this.hasEOF); - } + const newStart = Math.min(this.startpos + num, this.endpos); + return this.withNewStartPos(newStart); } /** @@ -127,12 +127,10 @@ export namespace CharUtil { * empty. */ public head(): CharStream { - if (!this.isEmpty()) { - const newHasEOF = this.startpos + 1 == this.endpos && this.hasEOF; - return new CharStream(this.input, this.startpos, this.startpos + 1, newHasEOF); - } else { + if (this.isEmpty()) { throw new Error('Cannot get the head of an empty string.'); } + return this.withNewEndPos(this.startpos + 1); } /** @@ -141,11 +139,10 @@ export namespace CharUtil { * empty. */ public tail(): CharStream { - if (!this.isEmpty()) { - return new CharStream(this.input, this.startpos + 1, this.endpos, this.hasEOF); - } else { + if (this.isEmpty()) { throw new Error('Cannot get the tail of an empty string.'); } + return this.withNewStartPos(this.startpos + 1); } /** @@ -175,7 +172,7 @@ export namespace CharUtil { public substring(start: number, end: number): CharStream { const start2 = this.startpos + start; const end2 = this.startpos + end; - const newHasEOF = this.endpos == end2 && this.hasEOF; + const newHasEOF = end2 >= this.endpos && this.hasEOF; return new CharStream(this.input, start2, end2, newHasEOF); } @@ -199,14 +196,19 @@ export namespace CharUtil { */ static concat(css: CharStream[]): CharStream { if (css.length == 0) { - return new CharStream('', 0, 0, false); - } else { - let cs = css[0]; - for (let i = 1; i < css.length; i++) { - cs = cs.concat(css[i]); - } - return cs; + return new CharStream('', 0, 0, true); } + + let s = ''; + let hasEOF = true; + for (const cs of css) { + s += cs.toString(); + // We only want the EOF of the last item so + // it's fine to keep overwriting it + hasEOF = cs.hasEOF; + } + + return new CharStream(s, 0, s.length, hasEOF); } } } diff --git a/test/charstream.spec.ts b/test/charstream.spec.ts index 1e2bd0f..d7186f8 100644 --- a/test/charstream.spec.ts +++ b/test/charstream.spec.ts @@ -1,4 +1,5 @@ import { CharUtil } from '../src/charstream'; +import { Primitives } from '../src/primitives'; import { assert, expect } from 'chai'; import 'mocha'; @@ -116,4 +117,324 @@ describe('CharStream', () => { const b_cs = new CharUtil.CharStream(b); expect(a_cs.concat(b_cs).toString()).to.equal(a + b); }); + + describe('EOF management', () => { + const z = (input: string, hasEOF?: boolean) => new CharUtil.CharStream(input, 0, input.length, hasEOF); + + describe('peek', () => { + describe('starting with eof', () => { + it('should retain eof when peeking past the end of input', () => { + const result = z('peeking').peek(100); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof when peeking at the end of input', () => { + const result = z('peeking').peek(7); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof when peeking before the end of input', () => { + const result = z('peeking').peek(3); + expect(result.hasEOF).to.equal(false); + }); + }); + + describe('starting without eof', () => { + it('should not have eof when peeking past the end of input', () => { + const result = z('peeking', false).peek(100); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when peeking at the end of input', () => { + const result = z('peeking', false).peek(7); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when peeking before the end of input', () => { + const result = z('peeking', false).peek(3); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('seekWhile', () => { + describe('starting with eof', () => { + it('should retain eof with no matches', () => { + const result = z('seekWhile').seekWhile(() => false); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof with all matches', () => { + const result = z('seekWhile').seekWhile(() => true); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof with partial matches', () => { + const result = z('seekWhile').seekWhile(c => c === 's' || c === 'e'); + expect(result.hasEOF).to.equal(true); + }); + }); + + describe('starting without eof', () => { + it('should not have with no matches', () => { + const result = z('seekWhile', false).seekWhile(() => false); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof with all matches', () => { + const result = z('seekWhile', false).seekWhile(() => true); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof with partial matches', () => { + const result = z('seekWhile', false).seekWhile(c => c === 's' || c === 'e'); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('seekWhileCharCode', () => { + describe('starting with eof', () => { + it('should head:not retain, tail:retain with no matches', () => { + const [head, tail] = z('seekWhileCharCode').seekWhileCharCode(() => false); + expect(head.hasEOF).to.equal(false); + expect(tail.hasEOF).to.equal(true); + }); + + it('should head:retain, tail:retain with no matches', () => { + const [head, tail] = z('seekWhileCharCode').seekWhileCharCode(() => true); + expect(head.hasEOF).to.equal(true); + expect(tail.hasEOF).to.equal(true); + }); + + it('should head:not retain, tail:retain with no matches', () => { + const sCode = 115; + const eCode = 101; + const [head, tail] = z('seekWhileCharCode').seekWhileCharCode(c => c === sCode || c === eCode); + expect(head.hasEOF).to.equal(false); + expect(tail.hasEOF).to.equal(true); + }); + }); + + describe('starting without eof', () => { + it('should head:no eof, tail:no eof with no matches', () => { + const [head, tail] = z('seekWhileCharCode', false).seekWhileCharCode(() => false); + expect(head.hasEOF).to.equal(false); + expect(tail.hasEOF).to.equal(false); + }); + + it('should head:no eof, tail:no eof with all matches', () => { + const [head, tail] = z('seekWhileCharCode', false).seekWhileCharCode(() => true); + expect(head.hasEOF).to.equal(false); + expect(tail.hasEOF).to.equal(false); + }); + + it('should head:no eof, tail:no eof with partial matches', () => { + const sCode = 115; + const eCode = 101; + const [head, tail] = z('seekWhileCharCode', false).seekWhileCharCode(c => c === sCode || c === eCode); + expect(head.hasEOF).to.equal(false); + expect(tail.hasEOF).to.equal(false); + }); + }); + }); + + describe('seek', () => { + describe('starting with eof', () => { + it('should retain eof when seeking past the end of input', () => { + const result = z('seeking').seek(100); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof when seeking at the end of input', () => { + const result = z('seeking').seek(7); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof when seeking before the end of input', () => { + const result = z('seeking').seek(1); + expect(result.hasEOF).to.equal(true); + }); + }); + + describe('starting without eof', () => { + it('should not have eof when seeking past the end of input', () => { + const result = z('seeking', false).seek(100); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when seeking at the end of input', () => { + const result = z('seeking', false).seek(7); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when seeking before the end of input', () => { + const result = z('seeking', false).seek(1); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('head', () => { + describe('starting with eof', () => { + it('should retain eof when at the end of input', () => { + const result = z('head').seek(3).head(); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof when not at the end of input', () => { + const result = z('head').head(); + expect(result.hasEOF).to.equal(false); + }); + }); + + describe('starting without eof', () => { + it('should not have eof when at the end of input', () => { + const result = z('head', false).seek(3).head(); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when not at the end of input', () => { + const result = z('head', false).head(); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('tail', () => { + describe('starting with eof', () => { + it('should retain eof when at the end of input', () => { + const result = z('tail').seek(3).tail(); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof when not at the end of input', () => { + const result = z('tail').tail(); + expect(result.hasEOF).to.equal(true); + }); + }); + + describe('starting without eof', () => { + it('should not have eof when at the end of input', () => { + const result = z('tail', false).seek(3).tail(); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when not at the end of input', () => { + const result = z('tail', false).tail(); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('substring', () => { + describe('starting with eof', () => { + it('should retain eof when substring past the end of input', () => { + const result = z('a-long-string-for-substring').substring(0, 1000); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof when substring to the end of input', () => { + const result = z('a-long-string-for-substring').substring(0, 27); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof when substring before the end of input', () => { + const result = z('a-long-string-for-substring').substring(0, 12); + expect(result.hasEOF).to.equal(false); + }); + }); + + describe('starting without eof', () => { + it('should not have eof when substring past the end of input', () => { + const result = z('a-long-string-for-substring', false).substring(0, 1000); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when substring to the end of input', () => { + const result = z('a-long-string-for-substring', false).substring(0, 27); + expect(result.hasEOF).to.equal(false); + }); + + it('should not have eof when substring before the end of input', () => { + const result = z('a-long-string-for-substring', false).substring(0, 12); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('concat', () => { + describe('starting with eof', () => { + it('should retain eof when arg is at eof', () => { + const base = z('concat-base'); + const other = z('concat'); + + const result = base.concat(other); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof when arg is not at eof', () => { + const base = z('concat-base'); + const other = z('concat').peek(1); + + const result = base.concat(other); + expect(result.hasEOF).to.equal(false); + }); + }); + + describe('starting without eof', () => { + it('should retain eof when arg is at eof', () => { + const base = z('concat-base', false); + const other = z('concat'); + + const result = base.concat(other); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof when arg is not at eof', () => { + const base = z('concat-base', false); + const other = z('concat').peek(1); + + const result = base.concat(other); + expect(result.hasEOF).to.equal(false); + }); + }); + }); + + describe('static-concat', () => { + it('should retain eof when no args are passed', () => { + const result = CharUtil.CharStream.concat([]); + expect(result.hasEOF).to.equal(true); + }); + + it('should be retain eof if the only item is eof', () => { + const a = z('static-concat-1'); + const result = CharUtil.CharStream.concat([a]); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof if the last item is eof', () => { + const a = z('static-concat-1', false); + const b = z('static-concat-2'); + const result = CharUtil.CharStream.concat([a, b]); + expect(result.hasEOF).to.equal(true); + }); + + it('should retain eof if all items are eof', () => { + const a = z('static-concat-1'); + const b = z('static-concat-2'); + const c = z('static-concat-3'); + const result = CharUtil.CharStream.concat([a, b, c]); + expect(result.hasEOF).to.equal(true); + }); + + it('should not retain eof if the last item is not eof', () => { + const a = z('static-concat-1'); + const b = z('static-concat-2'); + const c = z('static-concat-3', false); + const result = CharUtil.CharStream.concat([a, b, c]); + expect(result.hasEOF).to.equal(false); + }); + }); + }); });