diff --git a/app.py b/app.py
index 8460995..b0382ed 100644
--- a/app.py
+++ b/app.py
@@ -676,6 +676,55 @@ def _repl(_m):
out.append(tvar_name.sub(_repl, piece))
return ''.join(out)
+email_regrex = re.compile(r'[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}')
+
+def email_to_nospam(addr):
+ user, domain = addr.split('@', 1)
+ return f'{{{{nospam|{user}|{domain}}}}}'
+
+def process_emails_in_units(text):
+ def replace_in_unit(match):
+ content = match.group(1)
+
+ emails = email_regrex.findall(content)
+ if not emails:
+ return match.group(0)
+
+ # Push outside if it's just a label + email
+ if len(emails) == 1:
+ match_email = next(email_regrex.finditer(content))
+ prefix = content[:match_email.start()]
+ suffix = content[match_email.end():]
+
+ plain_prefix = re.sub(r'<[^>]+>', '', prefix).strip()
+ plain_suffix = re.sub(r'<[^>]+>', '', suffix).strip()
+
+ # If suffix is basically empty or just punctuation:
+ if len(plain_suffix) <= 1 and plain_suffix in ('', '.', ',', ';', ':', '!', '?'):
+ # If prefix is a short label
+ if len(plain_prefix) < 50 and len(plain_prefix.split()) <= 10:
+ nospam = email_to_nospam(match_email.group(0))
+
+ if not prefix.strip():
+ return f'{nospam}{suffix}'
+
+ return f'{prefix.rstrip()} {nospam}{suffix}'
+
+ # Otherwise, keep inside and tvar
+ email_counter = 1
+ def email_regrexpl(m):
+ nonlocal email_counter
+ addr = m.group(0)
+ nospam = email_to_nospam(addr)
+ res = f'{nospam}'
+ email_counter += 1
+ return res
+
+ new_content = email_regrex.sub(email_regrexpl, content)
+ return f'{new_content}'
+
+ return re.sub(r'(.*?)', replace_in_unit, text, flags=re.DOTALL)
+
# --- Main Tokenisation Logic ---
@@ -700,19 +749,6 @@ def convert_to_translatable_wikitext(wikitext):
while curr < text_length :
found = None
- if wikitext[curr] == '=':
- # Find the end of the line
- end_line = wikitext.find('\n', curr)
- if end_line == -1:
- end_line = text_length
- line = wikitext[curr:end_line]
- if re.match(r'^(=+)[^=]+(=+)$', line.strip()):
- if last < curr:
- parts.append((wikitext[last:curr], _wrap_in_translate))
- parts.append((line, process_section_heading))
- curr = end_line
- last = curr
- continue
# Syntax highlight block
pattern = '[[m:Special:MyLanguage/Main Page|Main Page]]"
)
+
+ def test_email_label_only_pushed_outside(self):
+ self.assertEqual(
+ convert_to_translatable_wikitext("Contact: foo@bar.com"),
+ "Contact: {{nospam|foo|bar.com}}"
+ )
+
+ def test_email_already_in_nospam_untouched(self):
+ self.assertEqual(
+ convert_to_translatable_wikitext("Email: {{nospam|foo|bar.com}}"),
+ "Email: {{nospam|foo|bar.com}}"
+ )
+
+ def test_email_with_trailing_punctuation(self):
+ self.assertEqual(
+ convert_to_translatable_wikitext("Write to: hello@example.org."),
+ "Write to: {{nospam|hello|example.org}}."
+ )
+
+ def test_email_with_surrounding_text_keeps_tvar(self):
+ self.assertEqual(
+ convert_to_translatable_wikitext("Send questions to info@wiki.org or use the form."),
+ 'Send questions to {{nospam|info|wiki.org}} or use the form.'
+ )
+
if __name__ == '__main__':
unittest.main(exit=False, failfast=True)