Skip to content

Commit 05e2ef2

Browse files
committed
Expand testing suites for text fragment params
1 parent 8bf9cc0 commit 05e2ef2

2 files changed

Lines changed: 177 additions & 9 deletions

File tree

src/util/TextSelectionManager.js

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,10 @@ export function createTextFragmentUrlParam(selection, pageLayer) {
274274
const endPhraseMatchRe = new RegExp(String.raw`(${textStartRe})(?=.*?(${textEndRe}))`, "gis");
275275

276276
// Duplicated spaces in pageLayer.textContent for some reason
277-
const wholePageText = pageLayer.textContent.replace(/\s+/g, " ");
277+
const wholePageText = Array.from(document.querySelectorAll('.BRpage-visible'))
278+
.map((item) => item.textContent)
279+
.join(' ')
280+
.replace(/\s+/g, " ") || pageLayer.textContent.replace(/\s+/g, " ");
278281
const startPhraseFoundMatches = wholePageText.matchAll(startPhraseMatchRe).toArray();
279282
const endPhraseFoundMatches = wholePageText.matchAll(endPhraseMatchRe).toArray();
280283
if (startPhraseFoundMatches.length == 1 && endPhraseFoundMatches.length == 1) {
@@ -284,12 +287,24 @@ export function createTextFragmentUrlParam(selection, pageLayer) {
284287

285288
// Need to add some additional context to `startWord...endWord` by including surrounding words before and after the keywords
286289
const preStartRange = document.createRange();
287-
preStartRange.setStart(pageLayer.firstElementChild, 0);
290+
291+
const previousPageContainer = pageLayer.parentElement?.previousElementSibling;
292+
if (previousPageContainer?.classList.contains("BRpage-visible")) {
293+
preStartRange.setStart(previousPageContainer, 0);
294+
} else {
295+
preStartRange.setStart(pageLayer.firstElementChild, 0);
296+
}
288297
preStartRange.setEnd(startNode, 0);
289298
const postEndRange = document.createRange();
290299
postEndRange.setStart(endNode, endNode.textContent.length);
291-
postEndRange.setEnd(pageLayer.lastElementChild, pageLayer.lastElementChild.childElementCount);
292-
300+
const nextPageContainer = pageLayer.parentElement.nextElementSibling;
301+
if (nextPageContainer?.classList.contains("BRpage-visible")) {
302+
const nextPageLastWord = getLastestElement(nextPageContainer);
303+
postEndRange.setEnd(nextPageLastWord, Math.max(0, nextPageLastWord.textContent.length - 1));
304+
} else {
305+
const lastWordOfPageEl = getLastestElement(pageLayer);
306+
postEndRange.setEnd(lastWordOfPageEl, Math.max(0, lastWordOfPageEl.textContent.length - 1));
307+
}
293308
// prefixes/suffixes cannot contain paragraph breaks, words that are from more than one line break away should not be included
294309
const prefix = getLastWords(3, preStartRange.toString())
295310
.replace(/[ ]+/g, " ")
@@ -301,9 +316,22 @@ export function createTextFragmentUrlParam(selection, pageLayer) {
301316
.replace(/\n[^\n]*$/gm, "");
302317

303318
// Partially selected words need to be captured completely
319+
// Guarantee that all whitespace is replaced with just one space and that the first/last word of the highlight is not a space
304320
const fullHighlight = selection.toString().replace(/\s+/g, " ").trim().split(/\s/g);
305-
fullHighlight[0] = startNode.textContent;
306-
fullHighlight[fullHighlight.length - 1] = endNode.textContent;
321+
// Capture start/end words that may be partially highlighted
322+
if (startNode.textContent.trim().length != 0) {
323+
if (!startNode.textContent.includes(fullHighlight[0])) {
324+
fullHighlight.unshift(startNode.textContent);
325+
} else {
326+
fullHighlight[0] = startNode.textContent;
327+
}
328+
}
329+
if (endNode.textContent.trim().length != 0) {
330+
if (!endNode.textContent.includes(fullHighlight[fullHighlight.length - 1])) {
331+
fullHighlight.push(endNode.textContent);
332+
}
333+
fullHighlight[fullHighlight.length - 1] = endNode.textContent;
334+
}
307335

308336
let quote = [fullHighlight.join(" ")];
309337
if (fullHighlight.length > 6) {
@@ -492,3 +520,14 @@ export function getLastWords(numWords, text) {
492520
const m = text.match(re);
493521
return m ? m[0].trim() : "";
494522
}
523+
524+
/**
525+
* @param {HTMLElement | Element} parent
526+
* @returns {Node}
527+
*/
528+
export function getLastestElement(parent) {
529+
while (parent.lastElementChild) {
530+
parent = parent.lastElementChild;
531+
}
532+
return parent;
533+
}

tests/jest/util/TextSelectionManager.test.js

Lines changed: 132 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ const FAKE_XML_MULT_LINES = `
1010
<PARAGRAPH>
1111
<LINE>
1212
<WORD coords="119,2050,230,2014" x-confidence="29">way </WORD>
13-
<WORD coords="230,2038,320,2002" x-confidence="30">can </WORD>
13+
<WORD coords="230,2038,320,2002" x-confidence="30">can </WORD>
1414
<WORD coords="320,2039,433,2002" x-confidence="28">false </WORD>
1515
<WORD coords="433,2051,658,2003" x-confidence="29">judgment </WORD>
1616
<WORD coords="658,2039,728,2002" x-confidence="30">be </WORD>
1717
<WORD coords="658,2039,728,2002" x-confidence="30">-</WORD>
18-
<WORD coords="728,2039,939,2001" x-confidence="29">formed. </WORD>
18+
<WORD coords="728,2039,939,2001" x-confidence="29"> formed. </WORD>
1919
<WORD coords="939,2039,1087,2001" x-confidence="29">There </WORD>
2020
<WORD coords="1087,2039,1187,2002" x-confidence="29">still </WORD>
2121
<WORD coords="1187,2038,1370,2003" x-confidence="29">remains </WORD>
@@ -58,7 +58,7 @@ const MULTIPLE_REPEAT_LINES = `
5858
<WORD coords="331,1425,412,1378" x-confidence="96">clay</WORD>
5959
<WORD coords="431,1413,505,1378" x-confidence="96">and</WORD>
6060
<WORD coords="526,1413,637,1378" x-confidence="96">chalk</WORD>
61-
<WORD coords="657,1414,814,1378" x-confidence="96">mixture</WORD>
61+
<WORD coords="657,1414,814,1378" x-confidence="96">mixture,</WORD>
6262
<WORD coords="836,1414,957,1378" x-confidence="96">which</WORD>
6363
<WORD coords="976,1414,994,1380" x-confidence="96">I</WORD>
6464
<WORD coords="1017,1414,1080,1390" x-confidence="96">see</WORD>
@@ -138,6 +138,46 @@ const FAKE_DIALOGUE = `
138138
</PARAGRAPH>
139139
</OBJECT>
140140
`;
141+
const PAGE_ONE = `
142+
<OBJECT>
143+
<PARAGRAPH>
144+
<LINE>
145+
<WORD coords="177,1591,308,1556" x-confidence="84">Book</WORD>
146+
<WORD coords="327,1592,359,1556" x-confidence="96">header</WORD>
147+
<WORD coords="376,1604,493,1556" x-confidence="96">test</WORD>
148+
<WORD coords="509,1603,610,1557" x-confidence="96">replica</WORD>
149+
</LINE>
150+
</PARAGRAPH>
151+
<PARAGRAPH>
152+
<LINE>
153+
<WORD coords="177,1591,308,1556" x-confidence="84">This</WORD>
154+
<WORD coords="327,1592,359,1556" x-confidence="96">is</WORD>
155+
<WORD coords="376,1604,493,1556" x-confidence="96">page</WORD>
156+
<WORD coords="509,1603,610,1557" x-confidence="96">one</WORD>
157+
</LINE>
158+
</PARAGRAPH>
159+
</OBJECT>
160+
`;
161+
const PAGE_TWO = `
162+
<OBJECT>
163+
<PARAGRAPH>
164+
<LINE>
165+
<WORD coords="177,1591,308,1556" x-confidence="84">Book</WORD>
166+
<WORD coords="327,1592,359,1556" x-confidence="96">header</WORD>
167+
<WORD coords="376,1604,493,1556" x-confidence="96">test</WORD>
168+
<WORD coords="509,1603,610,1557" x-confidence="96">replica</WORD>
169+
</LINE>
170+
</PARAGRAPH>
171+
<PARAGRAPH>
172+
<LINE>
173+
<WORD coords="177,1591,308,1556" x-confidence="84">Currently</WORD>
174+
<WORD coords="327,1592,359,1556" x-confidence="96">on</WORD>
175+
<WORD coords="376,1604,493,1556" x-confidence="96">page</WORD>
176+
<WORD coords="509,1603,610,1557" x-confidence="96">two</WORD>
177+
</LINE>
178+
</PARAGRAPH>
179+
</OBJECT>
180+
`;
141181
document.body.innerHTML = '<div id="BookReader">';
142182
const br = window.br = new BookReader({
143183
data: [
@@ -239,9 +279,50 @@ describe("TextFragment tests", () => {
239279
afterEach(() => {
240280
sinon.restore();
241281
$('.BRtextLayer').remove();
282+
$('.BRpage').remove();
242283
window.getSelection().direction = null;
243284
});
244285

286+
test("Text fragment generation accounts for text at the end of the first page/beginning of second page", async () => {
287+
sinon.stub(br.plugins.textSelection, "getPageText")
288+
.returns($(new DOMParser().parseFromString(PAGE_ONE, "text/xml")));
289+
const $pageContainer1 = $("<div class='BRpage'></div>").appendTo(br.refs.$brContainer);
290+
await br.plugins.textSelection.createTextLayer({ $container: $pageContainer1, page: {index: 1, width: 100, height: 100 }});
291+
292+
sinon.restore();
293+
294+
sinon.stub(br.plugins.textSelection, "getPageText")
295+
.returns($(new DOMParser().parseFromString(PAGE_TWO, "text/xml")));
296+
const $pageContainer2 = $("<div class='BRpage'></div>").appendTo(br.refs.$brContainer);
297+
await br.plugins.textSelection.createTextLayer({ $container: $pageContainer2, page: {index: 2, width: 100, height: 100 }});
298+
299+
Array.from($(br.refs.$brContainer).find(".BRtextLayer")).forEach((layer) => {
300+
layer.parentElement.classList.add("BRpage-visible");
301+
});
302+
303+
const rangePageOne = document.createRange();
304+
rangePageOne.setStart($(br.refs.$brContainer).find(".BRtextLayer").find(".BRparagraphElement").find(".BRwordElement")[0].firstChild, 0);
305+
rangePageOne.setEnd($(br.refs.$brContainer).find(".BRtextLayer").find(".BRparagraphElement").find(".BRwordElement")[3].firstChild, 7);
306+
307+
const selectionPageOne = window.getSelection();
308+
selectionPageOne.removeAllRanges();
309+
selectionPageOne.addRange(rangePageOne);
310+
311+
const pageOneUrlParam = createTextFragmentUrlParam(selectionPageOne, br.refs.$brContainer.find(".BRtextLayer")[0]);
312+
313+
const rangePageTwo = document.createRange();
314+
rangePageTwo.setStart($($(br.refs.$brContainer).find(".BRtextLayer")[1]).find(".BRparagraphElement").find(".BRwordElement")[0].firstChild, 0);
315+
rangePageTwo.setEnd($($(br.refs.$brContainer).find(".BRtextLayer")[1]).find(".BRparagraphElement").find(".BRwordElement")[3].firstChild, 6);
316+
const selectionPageTwo = window.getSelection();
317+
selectionPageTwo.removeAllRanges();
318+
selectionPageTwo.addRange(rangePageTwo);
319+
320+
const pageTwoUrlParam = createTextFragmentUrlParam(selectionPageTwo, br.refs.$brContainer.find(".BRtextLayer")[1]);
321+
322+
expect(pageOneUrlParam).toMatch("text=Book%20header%20test%20replica,-This%20is%20page");
323+
expect(pageTwoUrlParam).toMatch("text=is%20page%20one-,Book%20header%20test%20replica,-Currently%20on%20page");
324+
});
325+
245326
test("Forward and Backward selection without prefix", async () => {
246327
const $container = br.refs.$brContainer;
247328
sinon.stub(br.plugins.textSelection, "getPageText")
@@ -300,6 +381,54 @@ describe("TextFragment tests", () => {
300381
expect(backwardTest).toMatch(forwardTest);
301382
});
302383

384+
test("Handle start/end word with space before/after meaningful text content", async () => {
385+
const $container = br.refs.$brContainer;
386+
sinon.stub(br.plugins.textSelection, "getPageText")
387+
.returns($(new DOMParser().parseFromString(FAKE_XML_MULT_LINES, "text/xml")));
388+
await br.plugins.textSelection.createTextLayer({ $container, page: {index: 1, width: 100, height: 100 }});
389+
390+
const startWordRange = document.createRange();
391+
startWordRange.setStart($container.find(".BRwordElement")[1].firstChild, 3);
392+
startWordRange.setEnd($container.find(".BRwordElement")[6].firstChild, 4);
393+
394+
const selection = window.getSelection();
395+
selection.removeAllRanges();
396+
selection.addRange(startWordRange);
397+
const startingSpaceTextFragmentUrl = createTextFragmentUrlParam(selection, document.querySelector('.BRtextLayer'));
398+
399+
expect(startingSpaceTextFragmentUrl).toBe(`text=way-,can%20false%20judgment%20be%20-%20formed.,-There%20still%20remains`);
400+
401+
const endWordRange = document.createRange();
402+
endWordRange.setStart($container.find(".BRwordElement")[1].firstChild, 0);
403+
endWordRange.setEnd($container.find(".BRwordElement")[6].firstChild, 0);
404+
405+
selection.removeAllRanges();
406+
selection.addRange(endWordRange);
407+
const endingSpaceTextFragmentUrl = createTextFragmentUrlParam(selection, document.querySelector('.BRtextLayer'));
408+
409+
expect(endingSpaceTextFragmentUrl).toBe(startingSpaceTextFragmentUrl);
410+
});
411+
412+
test("Quote and comma included in text selection should be URI encoded", async () => {
413+
const $container = br.refs.$brContainer;
414+
sinon.stub(br.plugins.textSelection, "getPageText")
415+
.returns($(new DOMParser().parseFromString(MULTIPLE_REPEAT_LINES, "text/xml")));
416+
await br.plugins.textSelection.createTextLayer({ $container, page: {index: 1, width: 100, height: 100 }});
417+
418+
const rangeIncludesComma = document.createRange();
419+
rangeIncludesComma.setStart(
420+
$($container.find('.BRparagraphElement')[0]).find(".BRwordElement")[0].firstChild, 0);
421+
rangeIncludesComma.setEnd($($container.find('.BRparagraphElement')[0]).find(".BRwordElement")[4].firstChild, 0);
422+
423+
const commaSelection = window.getSelection();
424+
commaSelection.removeAllRanges();
425+
commaSelection.addRange(rangeIncludesComma);
426+
const commaTextFragmentUrl = createTextFragmentUrlParam(commaSelection, document.querySelector('.BRtextLayer'));
427+
428+
expect(commaTextFragmentUrl.includes("“")).toBeFalsy();
429+
expect(commaTextFragmentUrl).toBe("text=%E2%80%9CThat,mixture%2C");
430+
});
431+
303432
test("Should be able to differentiate overlapping matches", async () => {
304433
const $container = br.refs.$brContainer;
305434
sinon.stub(br.plugins.textSelection, "getPageText")

0 commit comments

Comments
 (0)