From 43a95c23c782b04be14a188d28aa3c7c79e604b2 Mon Sep 17 00:00:00 2001
From: ashish-066 <ashishkaranam06@Gmail.com>
Date: Sun, 28 Jun 2026 12:28:24 +0530
Subject: [PATCH 1/2] handling email addresses in translate tagger

---
 app.py | 69 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 15 deletions(-)
diff --git a/app.py b/app.py
index 8460995..b0382ed 100644
--- a/app.py
+++ b/app.py
@@ -676,6 +676,55 @@ def _repl(_m):
         out.append(tvar_name.sub(_repl, piece))
     return ''.join(out)
 
+email_regrex = re.compile(r'[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}')
+
+def email_to_nospam(addr):
+    user, domain = addr.split('@', 1)
+    return f'{{{{nospam|{user}|{domain}}}}}'
+
+def process_emails_in_units(text):
+    def replace_in_unit(match):
+        content = match.group(1)
+        
+        emails = email_regrex.findall(content)
+        if not emails:
+            return match.group(0)
+            
+        # Push outside if it's just a label + email
+        if len(emails) == 1:
+            match_email = next(email_regrex.finditer(content))
+            prefix = content[:match_email.start()]
+            suffix = content[match_email.end():]
+            
+            plain_prefix = re.sub(r'<[^>]+>', '', prefix).strip()
+            plain_suffix = re.sub(r'<[^>]+>', '', suffix).strip()
+            
+            # If suffix is basically empty or just punctuation:
+            if len(plain_suffix) <= 1 and plain_suffix in ('', '.', ',', ';', ':', '!', '?'):
+                # If prefix is a short label
+                if len(plain_prefix) < 50 and len(plain_prefix.split()) <= 10:
+                    nospam = email_to_nospam(match_email.group(0))
+                    
+                    if not prefix.strip():
+                        return f'{nospam}{suffix}'
+                    
+                    return f'<translate>{prefix.rstrip()}</translate> {nospam}{suffix}'
+
+        # Otherwise, keep inside and tvar 
+        email_counter = 1
+        def email_regrexpl(m):
+            nonlocal email_counter
+            addr = m.group(0)
+            nospam = email_to_nospam(addr)
+            res = f'<tvar name="email{email_counter}">{nospam}</tvar>'
+            email_counter += 1
+            return res
+            
+        new_content = email_regrex.sub(email_regrexpl, content)
+        return f'<translate>{new_content}</translate>'
+
+    return re.sub(r'<translate>(.*?)</translate>', replace_in_unit, text, flags=re.DOTALL)
+
 
 # --- Main Tokenisation Logic ---
 
@@ -700,19 +749,6 @@ def convert_to_translatable_wikitext(wikitext):
 
     while curr < text_length :
         found = None
-        if wikitext[curr] == '=':
-            # Find the end of the line
-            end_line = wikitext.find('\n', curr)
-            if end_line == -1:
-                end_line = text_length
-            line = wikitext[curr:end_line]
-            if re.match(r'^(=+)[^=]+(=+)$', line.strip()):
-                if last < curr:
-                    parts.append((wikitext[last:curr], _wrap_in_translate))
-                parts.append((line, process_section_heading))
-                curr = end_line
-                last = curr
-                continue
         # Syntax highlight block
         pattern = '<syntaxhighlight'
         if wikitext.startswith(pattern, curr):
@@ -1077,8 +1113,11 @@ def convert_to_translatable_wikitext(wikitext):
     """
     
     # Join the processed parts into a single string and renumber tvars per unit
-    return renumber_tvars_per_unit(''.join(processed_parts)[1:])  # Remove the leading newline added at the beginning
-
+    result = ''.join(processed_parts)[1:] # Remove the leading newline added at the beginning
+    result = renumber_tvars_per_unit(result)
+    return process_emails_in_units(result)
+    
+    
 @app.route('/')
 def index():
     return render_template('home.html', last_updated=get_last_updated_date())

From ab81a1f9fa173c548fc45aeaca2730780264a200 Mon Sep 17 00:00:00 2001
From: ashish-066 <ashishkaranam06@Gmail.com>
Date: Sun, 28 Jun 2026 13:30:36 +0530
Subject: [PATCH 2/2] adding tests

---
 tests.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests.py b/tests.py
index 82f1c37..ce6786c 100644
--- a/tests.py
+++ b/tests.py
@@ -271,5 +271,30 @@ def test_italic_text(self):
         ),
         "<translate>[[<tvar name=0>m:Special:MyLanguage/Main Page</tvar>|Main Page]]</translate>"
     )
+
+    def test_email_label_only_pushed_outside(self):
+        self.assertEqual(
+            convert_to_translatable_wikitext("Contact: foo@bar.com"),
+            "<translate>Contact:</translate> {{nospam|foo|bar.com}}"
+        )
+
+    def test_email_already_in_nospam_untouched(self):
+        self.assertEqual(
+            convert_to_translatable_wikitext("Email: {{nospam|foo|bar.com}}"),
+            "<translate>Email:</translate> {{nospam|foo|bar.com}}"
+        )
+
+    def test_email_with_trailing_punctuation(self):
+        self.assertEqual(
+            convert_to_translatable_wikitext("Write to: hello@example.org."),
+            "<translate>Write to:</translate> {{nospam|hello|example.org}}."
+        )
+
+    def test_email_with_surrounding_text_keeps_tvar(self):
+        self.assertEqual(
+            convert_to_translatable_wikitext("Send questions to info@wiki.org or use the form."),
+            '<translate>Send questions to <tvar name="email1">{{nospam|info|wiki.org}}</tvar> or use the form.</translate>'
+        )
+
 if __name__ == '__main__':
     unittest.main(exit=False, failfast=True)