Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
Changelog
---------
2.2.24
^^^^^^
- Fix SCC ingestion error when a doubled italic-off mid-row code
(9120 9120) appears before punctuation. The punctuation lookahead
now skips the error-correction duplicate, preventing an unwanted
space that pushed lines past the 32-character limit.


2.2.23
^^^^^^
- bumps nltk from 3.9.1 to 3.9.4.
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# built documents.
#
# The short X.Y version.
version = "2.2.23"
version = "2.2.24.dev1"
# The full version, including alpha/beta/rc tags.
release = "2.2.23"

Expand Down
10 changes: 9 additions & 1 deletion pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,15 @@ def _translate_line(self, line):
for idx, word in enumerate(word_list):
word = word.strip()
if len(word) == 4:
next_command = word_list[idx + 1] if idx + 1 < len(word_list) else None
# Look ahead for the next command, skipping the duplicate
# that SCC uses for error-correction (same word repeated).
next_idx = idx + 1
if (next_idx < len(word_list)
and word_list[next_idx].strip() == word):
next_idx += 1
next_command = (
word_list[next_idx] if next_idx < len(word_list) else None
)
self._translate_word(word=word, next_command=next_command)

def _translate_word(self, word, next_command=None):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name="pycaption",
version="2.2.23",
version="2.2.24.dev1",
description="Closed caption converter",
long_description=open(README_PATH).read(),
author="Joe Norton",
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
sample_no_positioning_at_all_scc,
sample_scc_created_dfxp_with_wrongly_closing_spans,
sample_scc_duplicate_special_characters,
sample_scc_doubled_mid_row_before_punctuation,
sample_scc_duplicate_tab_offset,
sample_scc_empty,
sample_scc_eoc_first_command,
Expand Down
10 changes: 10 additions & 0 deletions tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,13 @@ def sample_scc_paint_on_edm():
00:00:04;00 942c

"""


@pytest.fixture(scope="session")
def sample_scc_doubled_mid_row_before_punctuation():
return """\
Scenarist_SCC V1.0

00:26:48;29\t9420 9420 94d0 94d0 97a1 97a1 3e3e 2057 e5a7 ecec 2062 e520 6261 e36b 206e e5f8 f420 f7e5 e56b 20f7 e9f4 6880 9470 9470 616e eff4 68e5 f220 e570 e973 ef64 e520 efe6 91ae 91ae 4361 6e61 6461 2046 e9ec e573 9120 9120 ae80 942c 942c 8080 8080 942f 942f

"""
18 changes: 18 additions & 0 deletions tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,24 @@ def test_mid_row_codes_not_adding_space_if_there_is_one_before(
]
assert expected_lines == actual_lines

def test_doubled_mid_row_before_punctuation_no_extra_space(
self,
sample_scc_doubled_mid_row_before_punctuation,
):
caption_set = SCCReader().read(
sample_scc_doubled_mid_row_before_punctuation
)
captions = caption_set.get_captions("en-US")
text_nodes = [
node.content
for cap_ in captions
for node in cap_.nodes
if node.type_ == CaptionNode.TEXT
]
full_text = "".join(text_nodes)
assert " ." not in full_text
assert full_text.endswith("Files.")

def test_removing_spaces_at_end_of_lines(
self,
sample_scc_with_spaces_at_eol_pop,
Expand Down
Loading