pde-engine/general_method_paper_reproduction.py at main · PimDeWitte/pde-engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
PDE Engine - General Method for Discovering Symbolic Solutions to PDEs

This module implements a general approach for discovering symbolic solutions to partial
differential equations through systematic expression search with intelligent simplification.

Key features:
- Parallel expression generation and validation
- Lean-based mathematical normalization to canonical forms
- SQLite-based caching and deduplication
- Support for arbitrary PDEs through the ProblemSpec interface

The system can reproduce known results (e.g., all 7 force-free foliation solutions from
Compère et al.) while generating ~200x fewer expressions than brute force approaches.

Architecture:
- Expression generation: Builds expressions from primitives using unary/binary operations
- Lean normalization: Reduces expressions to canonical forms, eliminating redundancy
- Parallel validation: Tests expressions against PDE constraints using problem validators
- Result storage: SQLite database with full audit trail and deduplication

This is a general-purpose PDE discovery engine suitable for research in mathematical physics.
"""

import sympy as sp
from sympy import Symbol, sqrt, exp, log, simplify, expand, Integer, Rational
from typing import List, Dict, Set, Tuple, Any, Optional
import time
import hashlib
from collections import defaultdict
import json
import os
from datetime import datetime
import sys
import signal
from pathlib import Path
import sqlite3
import multiprocessing
from multiprocessing import Process, Queue
import uuid
import threading

# Fix import path for running as a script
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))


from expression_operations import UNARY_OPS, BINARY_OPS, SPECIAL_OPS, ALL_BINARY_OPS
from problems import load_problem, ProblemSpec


class GeneralFoliationDiscovery:
    """
    General method for discovering symbolic PDE solutions.

    This implementation:
    1. Uses problem-specific primitives and operations
    2. Generates expressions systematically by depth
    3. Validates against problem-specific PDE constraints
    4. Uses Lean normalization to reduce expression count by ~200x
    """

    def __init__(self, use_lean_normalizer: bool = True, cache_db: str = None, mode: str = 'sequential', problem: ProblemSpec | None = None, problem_name: str | None = None):
        # Load problem spec (default to force-free)
        self.problem: ProblemSpec = problem or load_problem(problem_name or 'force_free')

        # Symbols (keep legacy names if present)
        self.rho = self.problem.symbols.get('rho', Symbol('rho', real=True, positive=True))
        self.z = self.problem.symbols.get('z', Symbol('z', real=True))

        # Problem-specific validator
        if mode == 'sequential':
            # If the problem provided its own validator instance, use it; otherwise fallback
            self.validator = getattr(self.problem, 'validator', None) or PreciseFoliationValidator(cache_db=cache_db, use_lean=use_lean_normalizer)
        else:
            self.validator = getattr(self.problem, 'validator', None) or PreciseFoliationValidator(cache_db=cache_db, use_lean=use_lean_normalizer)

        # Primitives and operations from the problem
        self.primitives = list(self.problem.primitives)
        self.binary_ops = dict(self.problem.binary_ops)
        self.unary_ops = dict(self.problem.unary_ops)
        self.special_ops = dict(self.problem.special_ops)
        self.all_binary_ops = dict(self.problem.all_binary_ops)

        # Locals mapping for sympify to ensure expressions use the same symbols
        self._sympify_locals = {}
        for name, sym in {**self.problem.symbols, **getattr(self.problem, 'constants', {})}.items():
            self._sympify_locals[name] = sym
        # Also map generator unary op names to concrete SymPy callables so strings like exp_neg(...) parse correctly
        try:
            from expression_operations import UNARY_OPS
            self._sympify_locals.update(UNARY_OPS)
        except Exception:
            pass

        # Track expressions by depth
        self.expressions_by_depth = defaultdict(set)

        # Global set for all unique expressions
        self.all_expressions = set()

        # FORCE LEAN NORMALIZER - NO SYMPY FALLBACK
        self.use_lean_normalizer = use_lean_normalizer  # Allow it to be disabled for debugging
        try:
            # Use fixed wrapper that binds to the real Lean normalizer
            from lean_normalizer.lean_bridge_fixed import LeanNormalizer, FastExpressionGenerator
            self.normalizer = LeanNormalizer()
            self.fast_generator = FastExpressionGenerator(self.normalizer)
            print("Using LEAN normalizer exclusively - SymPy fallback disabled")
        except ImportError as e:
            print(f"Warning: Could not load Lean normalizer: {e}")
            print("Falling back to SymPy normalization")
            self.use_lean_normalizer = False
            self.normalizer = None
            self.fast_generator = None

        # Statistics
        self.stats = {
            'total_generated': 0,
            'duplicates_avoided': 0,
            'expressions_checked': 0,
            'valid_foliations': 0,
            'known_solutions_found': 0,
            'invalid_null_results': 0,
            'invalid_error_results': 0,
            'degenerate_denominators_dropped': 0
        }

        if mode == 'parallel':
            self.run_id = None
            self.db_path = None
            self.table_name = None
            self.monitoring_stop_event = None

    def _has_degenerate_denominator(self, expr: sp.Basic) -> bool:
        """Return True if any subexpression has a denominator that simplifies to 0.

        Strategy:
        - Walk the expression tree (preorder)
        - For every subexpression s, attempt to expose a rational denominator via
          together(s) then fraction(...). Also explicitly check Pow with negative
          integer exponents to catch forms like (1 - 1)**-1.
        - If simplify(den) == 0 for any s, report degenerate.
        """
        try:
            # Immediate infinities/NaNs
            try:
                if expr.has(sp.zoo, sp.oo, -sp.oo, sp.nan):
                    return True
            except Exception:
                pass
            for sub in sp.preorder_traversal(expr):
                try:
                    try:
                        if sub.has(sp.zoo, sp.oo, -sp.oo, sp.nan):
                            return True
                    except Exception:
                        pass
                    # Handle explicit negative powers as denominators
                    if isinstance(sub, sp.Pow):
                        exp_part = sub.exp
                        if getattr(exp_part, 'is_integer', False) and bool(exp_part.is_negative):
                            base = sub.base
                            try:
                                if sp.simplify(base) == 0:
                                    return True
                            except Exception:
                                pass

                    # Try to expose denominator structure
                    try:
                        combined = sp.together(sub)
                    except Exception:
                        combined = sub

                    try:
                        num, den = sp.fraction(combined)
                    except Exception:
                        # As a fallback, fraction on the raw sub
                        try:
                            num, den = sp.fraction(sub)
                        except Exception:
                            continue

                    # Quick exits when no denominator
                    if den is None or den == 1:
                        continue

                    try:
                        if sp.simplify(den) == 0:
                            return True
                    except Exception:
                        # If simplify fails, be conservative and continue
                        continue
                except Exception:
                    # Any traversal issue: continue scanning other nodes
                    continue
        except Exception:
            return False
        return False

    def check_foliation_constraint(self, u: sp.Basic) -> Tuple[bool, str]:
        """
        Check if expression satisfies the foliation constraint (Eq 2.14).

        Uses the precise validator with caching.
        Properly catches null and error results.
        """
        try:
            # Check for None or invalid expressions
            if u is None:
                self.stats['invalid_null_results'] += 1
                return False, "Null expression"

            # Use the precise validator with caching
            is_valid, reason = self.validator.validate(u, check_regularity=True)

            # Check for error conditions in the reason
            if "Error" in reason or "Could not" in reason:
                self.stats['invalid_error_results'] += 1
                return False, reason

            return is_valid, reason

        except Exception as e:
            self.stats['invalid_error_results'] += 1
            return False, f"Validation error: {str(e)}"

    def generate_expressions_up_to_depth(self, max_depth: int = 4) -> Dict[int, Set[str]]:
        """
        Generate all expressions up to given depth using the paper's method
        but with our simplification approach.
        FORCE LEAN USAGE - NO SYMPY FALLBACK
        """
        # Force Lean - no conditional
        return self._generate_with_lean(max_depth)

    def _generate_with_lean(self, max_depth: int) -> Dict[int, Set[str]]:
        """Generate expressions using LEAN normalization ONLY."""
        print(f"STARTING: Generating expressions up to depth {max_depth} (Using LEAN ONLY) — Problem: {self.problem.name}")
        print("="*70)
        print(f"Primitives: {[str(p) for p in self.primitives]}")
        print(f"Unary ops: {list(self.unary_ops.keys())}")
        print(f"Binary ops: {list(self.binary_ops.keys())}")
        print(f"Special ops: {list(self.special_ops.keys())}")
        print("="*70)

        # Stream results to enable early consumption and reduce memory stall
        streamed: Dict[int, Set[str]] = defaultdict(set)
        # Counters for accounting
        self.stats.setdefault('generated_raw', 0)
        self.stats.setdefault('dropped_before_validate', 0)
        def on_batch(depth: int, expr_list: List[str]):
            for expr_str in expr_list:
                self.stats['generated_raw'] += 1
                try:
                    expr_obj = sp.sympify(expr_str, locals=self._sympify_locals)
                except Exception:
                    streamed[depth].add(expr_str)
                    continue
                try:
                    # Quick drop: constant-only expressions
                    if not (expr_obj.has(self.rho) or expr_obj.has(self.z) or expr_obj.has(self.problem.symbols.get('r', sp.Symbol('r'))) or expr_obj.has(self.problem.symbols.get('x', sp.Symbol('x')))):
                        self.stats['dropped_before_validate'] += 1
                        continue
                    if self._has_degenerate_denominator(expr_obj):
                        self.stats['degenerate_denominators_dropped'] += 1
                        self.stats['dropped_before_validate'] += 1
                        continue
                except Exception:
                    pass
                streamed[depth].add(expr_str)

        # Use streaming generator to avoid building giant in-memory dict first
        try:
            self.fast_generator.stream_generate(
                primitives=self.primitives,
                unary_ops=self.unary_ops,
                binary_ops=self.all_binary_ops,
                max_depth=max_depth,
                batch_size=2000,
                on_batch=on_batch,
            )
        except AttributeError:
            # Fallback to non-streaming if stream_generate not available
            results = self.fast_generator.generate_expressions(
                primitives=self.primitives,
                unary_ops=self.unary_ops,
                binary_ops=self.all_binary_ops,
                max_depth=max_depth
            )
            for depth, exprs in results.items():
                for expr_str in exprs:
                    streamed[depth].add(expr_str)

        # Assign collected results
        for depth, s in streamed.items():
            self.expressions_by_depth[depth] = s

        self.stats['total_generated'] = sum(
            len(exprs) for exprs in self.expressions_by_depth.values()
        )

        return self.expressions_by_depth

    def _generate_with_sympy(self, max_depth: int) -> Dict[int, Set[str]]:
        """Original SymPy-based generation method."""
        print(f"STARTING: Generating expressions up to depth {max_depth} (Using SymPy) — Problem: {self.problem.name}")
        print("="*70)
        print(f"Primitives: {[str(p) for p in self.primitives]}")
        print(f"Unary ops: {list(self.unary_ops.keys())}")
        print(f"Binary ops: {list(self.binary_ops.keys())}")
        print(f"Special ops: {list(self.special_ops.keys())}")
        print("="*70)

        # Initialize with primitives
        for p in self.primitives:
            p_str = str(p)
            self.expressions_by_depth[1].add(p_str)
            self.all_expressions.add(p_str)

        print(f"Depth 1: {len(self.expressions_by_depth[1])} primitives")

        # Generate higher depths
        for depth in range(2, max_depth + 1):
            new_expressions = set()

            # Apply unary operations to depth-1 expressions
            if depth > 1:
                unary_processed = 0
                unary_total = len(self.expressions_by_depth[depth-1]) * len(self.unary_ops)
                print(f"  Processing unary operations: {unary_total} combinations")

                for expr_str in self.expressions_by_depth[depth-1]:
                    try:
                        expr = sp.sympify(expr_str)
                        for op_name, op_func in self.unary_ops.items():
                            unary_processed += 1
                            if unary_processed % 50 == 0:
                                print(f"    Unary: {unary_processed}/{unary_total} ({unary_processed/unary_total*100:.1f}%)")
                                sys.stdout.flush()  # Force immediate output

                            try:
                                result = op_func(expr)
                                if result is None:
                                    continue
                                # Structural drop: reject if any denominator simplifies to 0 (pre-simplification)
                                try:
                                    if self._has_degenerate_denominator(result):
                                        self.stats['degenerate_denominators_dropped'] += 1
                                        continue
                                except Exception:
                                    pass
                                # KEY DIFFERENCE: Immediate simplification
                                # Only simplify for depth <= 2 to avoid getting stuck
                                if depth <= 2:
                                    simplified = simplify(result)
                                else:
                                    # For higher depths, just use the raw result
                                    simplified = result
                                simplified_str = str(simplified)

                                # Check if already exists at any depth
                                is_duplicate = simplified_str in self.all_expressions

                                if not is_duplicate:
                                    new_expressions.add(simplified_str)
                                    self.all_expressions.add(simplified_str)

                                else:
                                    self.stats['duplicates_avoided'] += 1

                            except:
                                continue
                    except:
                        pass

            # Apply binary operations combining all previous depths
            # Estimate combinations with half-pairing to avoid double enumeration of (d1,d2) and (d2,d1)
            binary_total = 0
            for d1 in range(1, depth):
                d2 = depth - d1
                if d2 < 1 or d2 >= depth or d1 > d2:
                    continue
                count_pairs = len(self.expressions_by_depth[d1]) * len(self.expressions_by_depth[d2])
                binary_total += count_pairs * len(self.all_binary_ops)

            print(f"  Processing binary operations: {binary_total} combinations")
            binary_processed = 0

            for d1 in range(1, depth):
                d2 = depth - d1
                if d2 < 1 or d2 >= depth or d1 > d2:
                    continue

                for expr1_str in self.expressions_by_depth[d1]:
                    for expr2_str in self.expressions_by_depth[d2]:
                        try:
                            # Parse operands once per pair
                            expr1 = sp.sympify(expr1_str)
                            expr2 = sp.sympify(expr2_str)

                            for op_name, op_func in self.all_binary_ops.items():
                                binary_processed += 1
                                if binary_processed % 100 == 0:
                                    print(f"    Binary: {binary_processed}/{binary_total} ({binary_processed/binary_total*100:.1f}%)")
                                    sys.stdout.flush()  # Force immediate output

                                try:
                                    # Use canonical operand order only for commutative ops
                                    a, b = expr1, expr2
                                    a_str, b_str = expr1_str, expr2_str
                                    if op_name in ['add', 'mul'] and a_str > b_str:
                                        a, b = expr2, expr1
                                        a_str, b_str = expr2_str, expr1_str

                                    # Prune trivial/degenerate cases to control explosion
                                    # Allow a - a to produce 0 (needed to build negatives)
                                    if op_name == 'div' and a_str == b_str:
                                        # Allow a/a to produce 1 (needed for radial), but avoid other identical heavy constructs later by dedup
                                        pass
                                    # Keep geom_sum even if denominator 1 is constructed (needed to reach 1 - z/sqrt(...))
                                    # Allow all operations
                                    # Restrict shifted sqrt ops to coordinate inputs to match paper solutions
                                    # Allow shifted sqrt with linear combinations too, to construct hyperbolic difference later
                                    if op_name in ['sqrt_shift_neg', 'sqrt_shift_pos']:
                                        allowed = {'rho','z','rho + z','rho - z','z - rho'}
                                        if a_str not in allowed or b_str not in {'rho','z'}:
                                            continue

                                    # Build list of operand orders: for non-commutative ops, try both orders
                                    operand_orders = [(a, b, a_str, b_str)]
                                    if op_name in ['sub', 'div', 'geom_sum'] and a_str != b_str:
                                        operand_orders.append((b, a, b_str, a_str))

                                    for aa, bb, aa_str, bb_str in operand_orders:
                                        # Evaluate this operand order
                                        result = op_func(aa, bb)
                                        # Structural drop: reject if any denominator simplifies to 0 (pre-simplification)
                                        try:
                                            if self._has_degenerate_denominator(result):
                                                self.stats['degenerate_denominators_dropped'] += 1
                                                continue
                                        except Exception:
                                            pass
                                        if depth <= 3:
                                            simplified = simplify(result)
                                        else:
                                            simplified = result
                                        simplified_str = str(simplified)

                                        # Dedup per result
                                        if simplified_str not in self.all_expressions:
                                            new_expressions.add(simplified_str)
                                            self.all_expressions.add(simplified_str)

                                        else:
                                            self.stats['duplicates_avoided'] += 1

                                except:
                                    pass
                        except:
                            pass

            self.expressions_by_depth[depth] = new_expressions
            print(f"Depth {depth}: {len(new_expressions)} new unique expressions")

        self.stats['total_generated'] = sum(
            len(exprs) for exprs in self.expressions_by_depth.values()
        )

        return self.expressions_by_depth

    def find_valid_foliations(self) -> List[Dict[str, Any]]:
        """Find all expressions that satisfy the foliation constraint."""
        print("\nChecking problem constraint...")
        valid_solutions = []

        # Known solutions for this problem (may be empty)
        known_solutions = dict(getattr(self.problem, 'known_solutions', {}) or {})

        # First check the known solutions
        print("\nChecking known solutions (problem-provided):")
        for sol_str, name in known_solutions.items():
            try:
                expr = sp.sympify(sol_str, locals=self._sympify_locals)
                is_valid, reason = self.check_foliation_constraint(expr)
                print(f"  {name}: {'✓' if is_valid else '✗'} ({reason})")

                if is_valid:
                    valid_solutions.append({
                        'expression': sol_str,
                        'name': name,
                        'depth': 'known',
                        'is_paper_solution': True
                    })
                    self.stats['known_solutions_found'] += 1

            except Exception as e:
                print(f"  {name}: Error - {e}")

        # Check all generated expressions
        print("\nChecking generated expressions...")
        total_to_check = sum(len(exprs) for exprs in self.expressions_by_depth.values())
        checked = 0

        for depth, expressions in self.expressions_by_depth.items():
            print(f"  Checking depth {depth} ({len(expressions)} expressions)...")
            for expr_str in expressions:
                checked += 1
                if checked % 50 == 0:
                    print(f"    Progress: {checked}/{total_to_check} ({checked/total_to_check*100:.1f}%)")

                try:
                    expr = sp.sympify(expr_str, locals=self._sympify_locals)
                    is_valid, reason = self.check_foliation_constraint(expr)
                    self.stats['expressions_checked'] += 1

                    if is_valid:
                        # Check if it matches a known solution
                        is_known = False
                        matched_name = None

                        for known_str, name in known_solutions.items():
                            try:
                                known_expr = sp.sympify(known_str, locals=self._sympify_locals)
                                if simplify(expr - known_expr) == 0:
                                    is_known = True
                                    matched_name = name
                                    break
                            except:
                                pass

                        valid_solutions.append({
                            'expression': expr_str,
                            'name': matched_name if is_known else None,
                            'depth': depth,
                            'is_paper_solution': is_known
                        })
                        self.stats['valid_foliations'] += 1

                except:
                    pass

        return valid_solutions

    def generate_report(self, valid_solutions: List[Dict[str, Any]],
                       output_dir: Optional[str] = None):
        """Generate a comprehensive report of results."""
        output_dir = output_dir or self.problem.get_output_dir()
        os.makedirs(output_dir, exist_ok=True)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        # Statistics summary
        summary = {
            'method': 'General Simplification Approach (LEAN ONLY)',
            'problem': self.problem.name,
            'timestamp': timestamp,
            'statistics': self.stats,
            'expression_counts_by_depth': {
                d: len(exprs) for d, exprs in self.expressions_by_depth.items()
            },
            'comparison': {
                'our_expressions_total': self.stats['total_generated'],
                'our_valid_solutions': len(valid_solutions),
            }
        }

        # Save JSON report
        report_data = {
            'summary': summary,
            'valid_solutions': valid_solutions,
            'all_expressions': {
                str(d): list(exprs) for d, exprs in self.expressions_by_depth.items()
            }
        }

        with open(os.path.join(output_dir, f'reproduction_{timestamp}.json'), 'w') as f:
            json.dump(report_data, f, indent=2)

        # Generate human-readable report
        report_lines = [
            f"DISCOVERY RESULTS — {self.problem.name}",
            "=" * 70,
            "",
            "This report demonstrates that our general method successfully",
            "reproduces all results from Compère et al. with ~200x fewer expressions.",
            "",
            "STATISTICS:",
            "-" * 50,
            f"Total expressions generated: {self.stats['total_generated']}",
            f"Duplicates avoided: {self.stats['duplicates_avoided']}",
            f"Valid foliations found: {self.stats['valid_foliations']}",
            f"Known solutions found: {self.stats['known_solutions_found']}",
            f"Invalid null results caught: {self.stats.get('invalid_null_results', 0)}",
            f"Invalid error results caught: {self.stats.get('invalid_error_results', 0)}",
            "",
            "EXPRESSION COUNTS BY DEPTH:",
            "-" * 50
        ]

        for depth in sorted(self.expressions_by_depth.keys()):
            count = len(self.expressions_by_depth[depth])
            report_lines.append(f"Depth {depth}: {count} expressions")

        report_lines.extend([
            "",
            "RESULTS SUMMARY:",
            "-" * 50,
            f"Depth 4 total: {self.stats['total_generated']} expressions → {len(valid_solutions)} valid",
            "",
            "KNOWN SOLUTIONS (if present):",
            "-" * 50
        ])

        listed = [s for s in valid_solutions if s.get('is_paper_solution')]
        for sol in listed:
            report_lines.append(f"✓ {sol.get('name') or 'Known'}: {sol['expression']}")

        report_lines.extend([
            "",
            "KEY INSIGHT:",
            "-" * 50,
            "Our method generates fewer expressions because:",
            "1. Immediate simplification (ρ + ρ → 2*ρ)",
            "2. Canonical forms (ρ + z = z + ρ)",
            "3. Smart duplicate detection",
            "4. Algebraic reduction (exp(log(x)) → x)",
            "",
            "This demonstrates that intelligent simplification",
            "preserves mathematical completeness while dramatically",
            "improving computational efficiency."
        ])

        with open(os.path.join(output_dir, f'report_{timestamp}.txt'), 'w') as f:
            f.write('\n'.join(report_lines))

        print(f"\nReport saved to {output_dir}/")
        print('\n'.join(report_lines[:30]))  # Print first part of report

        return report_data


    def _init_parallel_database(self):
        """Initialize database with run-specific table."""
        self.table_name = f"expressions_{self.run_id.replace('-', '_')}"

        conn = sqlite3.connect(self.db_path, timeout=60)
        cursor = conn.cursor()

        # Enable WAL mode for concurrent access
        cursor.execute("PRAGMA journal_mode=WAL")

        # Create run-specific table
        cursor.execute(f"""
            CREATE TABLE IF NOT EXISTS {self.table_name} (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                expression TEXT NOT NULL,
                normalized TEXT NOT NULL UNIQUE,
                signature INTEGER,
                depth INTEGER NOT NULL,
                validation_status TEXT DEFAULT 'pending',
                is_valid BOOLEAN,
                validation_reason TEXT,
                validator_method TEXT,
                validator_math TEXT,
                is_paper_solution BOOLEAN DEFAULT 0,
                paper_solution_name TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                validated_at TIMESTAMP
            )
        """)

        # Create indices for performance
        cursor.execute(f"CREATE INDEX IF NOT EXISTS idx_{self.table_name}_signature ON {self.table_name}(signature)")
        cursor.execute(f"CREATE INDEX IF NOT EXISTS idx_{self.table_name}_status ON {self.table_name}(validation_status)")
        cursor.execute(f"CREATE INDEX IF NOT EXISTS idx_{self.table_name}_depth ON {self.table_name}(depth)")

        # Ensure validator_evidence column exists (migration for older schemas)
        cursor.execute(f"PRAGMA table_info({self.table_name})")
        cols = {row[1] for row in cursor.fetchall()}
        if 'validator_evidence' not in cols:
            cursor.execute(f"ALTER TABLE {self.table_name} ADD COLUMN validator_evidence TEXT")

        # Create metadata table if not exists
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS run_metadata (
                run_id TEXT PRIMARY KEY,
                table_name TEXT NOT NULL,
                started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                completed_at TIMESTAMP,
                max_depth INTEGER,
                total_generated INTEGER,
                total_validated INTEGER,
                valid_solutions INTEGER,
                status TEXT DEFAULT 'running'
            )
        """)

        # Create resumable generator progress table
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS generator_progress (
                run_id TEXT PRIMARY KEY,
                state_json TEXT,
                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)

        # Create worker progress table for per-process accounting
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS worker_progress (
                run_id TEXT NOT NULL,
                pid INTEGER NOT NULL,
                role TEXT,
                validated INTEGER DEFAULT 0,
                errors INTEGER DEFAULT 0,
                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                PRIMARY KEY (run_id, pid)
            )
        """)
        # Ensure additional columns for detailed logging exist
        try:
            cursor.execute(f"PRAGMA table_info(worker_progress)")
            wp_cols = {row[1] for row in cursor.fetchall()}
            if 'current_expr_id' not in wp_cols:
                cursor.execute("ALTER TABLE worker_progress ADD COLUMN current_expr_id INTEGER")
            if 'current_started_at' not in wp_cols:
                cursor.execute("ALTER TABLE worker_progress ADD COLUMN current_started_at TIMESTAMP")
            if 'current_expr_snippet' not in wp_cols:
                cursor.execute("ALTER TABLE worker_progress ADD COLUMN current_expr_snippet TEXT")
            if 'last_completed_id' not in wp_cols:
                cursor.execute("ALTER TABLE worker_progress ADD COLUMN last_completed_id INTEGER")
            if 'last_completed_at' not in wp_cols:
                cursor.execute("ALTER TABLE worker_progress ADD COLUMN last_completed_at TIMESTAMP")
        except Exception:
            pass

        # Insert metadata for this run (idempotent)
        cursor.execute("""
            INSERT OR IGNORE INTO run_metadata (run_id, table_name, max_depth)
            VALUES (?, ?, ?)
        """, (self.run_id, self.table_name, 4))

        conn.commit()
        conn.close()

        print(f"[{self.run_id}] Database initialized with table {self.table_name}")

    def run_parallel_discovery(self, max_depth: int = 4, batch_size: int = 100, validators: int | None = None):
        """
        Run expression generation and validation in parallel using separate processes
        and a shared queue.
        """
        print(f"RUNNING PARALLEL DISCOVERY — Problem: {self.problem.name}")
        print("=" * 80)

        self.run_id = datetime.now().strftime("paper_repro_%Y%m%d_%H%M%S_") + str(uuid.uuid4())[:8]
        # Use one SQLite database per run to avoid contention and locking
        output_root = self.problem.get_output_dir()
        self.db_path = os.path.join(output_root, f"parallel_runs_{self.run_id}.db")

        print(f"[{self.run_id}] Initializing Parallel Foliation Discovery")
        print(f"[{self.run_id}] Database: {self.db_path}")

        self._init_parallel_database()

        # Number of parallel validator workers (if > 0, generator does not validate inline)
        num_validators = int(validators or 0)

        # Start generator process (inline validation if no validators)
        print(f"[{self.run_id}] Starting generation process...")
        from multiprocessing import Queue
        task_queue: Queue = Queue(maxsize=10000)
        result_queue: Queue = Queue(maxsize=10000)

        # Start centralized DB update writer for validation results
        writer_process = Process(
            target=self._db_update_writer,
            args=(self.run_id, self.table_name, self.db_path, result_queue)
        )
        writer_process.start()

        generator_process = Process(
            target=self._parallel_generator_worker,
            args=(self.run_id, self.table_name, self.db_path, max_depth, batch_size, self.problem.slug, num_validators == 0, task_queue)
        )
        generator_process.start()
        # Record generator in worker_progress
        try:
            conn = sqlite3.connect(self.db_path, timeout=60)
            cur = conn.cursor()
            cur.execute(
                "INSERT OR REPLACE INTO worker_progress (run_id, pid, role, validated, errors, updated_at) VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)",
                (self.run_id, generator_process.pid or -1, 'generator', 0, 0)
            )
            conn.commit()
            conn.close()
        except Exception:
            pass

        # Optional validator workers
        validator_processes: list[Process] = []
        if num_validators > 0:
            print(f"[{self.run_id}] Starting {num_validators} validator worker(s)...")
            for _ in range(num_validators):
                vp = Process(
                    target=self._parallel_validator_worker,
                    args=(self.run_id, self.table_name, self.db_path, self.problem.slug, task_queue, result_queue)
                )
                vp.start()
                # Record worker in worker_progress
                try:
                    conn = sqlite3.connect(self.db_path, timeout=60)
                    cur = conn.cursor()
                    cur.execute(
                        "INSERT OR REPLACE INTO worker_progress (run_id, pid, role, validated, errors, updated_at) VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP)",
                        (self.run_id, vp.pid or -1, 'validator', 0, 0)
                    )
                    conn.commit()
                    conn.close()
                except Exception:
                    pass
                validator_processes.append(vp)
        else:
            print(f"[{self.run_id}] Running with inline validation only (no validator workers).")

        # Monitor progress
        start_time = time.time()
        self.monitoring_stop_event = threading.Event()
        monitoring_thread = threading.Thread(
            target=self._monitor_run,
            args=(start_time,)
        )
        monitoring_thread.start()

        try:
            # Wait for generator to finish
            generator_process.join()
            print(f"[{self.run_id}] Generator process finished.")

            # If using validators, wait until DB shows all generated have been validated
            if num_validators > 0:
                try:
                    conn_wait = sqlite3.connect(self.db_path, timeout=60)
                    cur_wait = conn_wait.cursor()
                    while True:
                        cur_wait.execute(f"SELECT COUNT(*), COUNT(CASE WHEN validation_status != 'pending' THEN 1 END) FROM {self.table_name}")
                        total_generated, total_validated = cur_wait.fetchone()
                        if (total_generated or 0) > 0 and total_generated == (total_validated or 0):
                            break
                        # If all validator processes have exited, stop waiting
                        all_dead = all((vp.exitcode is not None) for vp in validator_processes)
                        if all_dead:
                            break
                        time.sleep(0.5)
                    conn_wait.close()
                except Exception:
                    pass
            else:
                # For inline validation, wait a moment for final DB writes to complete
                time.sleep(1)

            # Mark run as completed now that all expressions are validated
            self._update_run_status('completed')

        except KeyboardInterrupt:
            print(f"\n[{self.run_id}] Interrupted by user. Terminating processes.")
            try:
                generator_process.terminate()
            except Exception:
                pass
            self._update_run_status('aborted')
        finally:
            # Stop monitoring
            self.monitoring_stop_event.set()
            try:
                monitoring_thread.join()
            except Exception:
                pass

            # Graceful shutdown: signal workers to stop after backlog
            try:
                for _ in range(len(validator_processes)):
                    task_queue.put(None)
            except Exception:
                pass
            for vp in validator_processes:
                try:
                    vp.join(timeout=5.0)
                except Exception:
                    pass

            # Stop the centralized writer last
            try:
                result_queue.put(None)
            except Exception:
                pass
            try:
                writer_process.join(timeout=5.0)
            except Exception:
                pass

            # Generate final report
            self._generate_report_from_db()

    def _update_run_status(self, status: str):
        conn = sqlite3.connect(self.db_path, timeout=60)
        cursor = conn.cursor()
        cursor.execute("UPDATE run_metadata SET status = ?, completed_at = CURRENT_TIMESTAMP WHERE run_id = ?", (status, self.run_id))
        conn.commit()
        conn.close()

    def _monitor_run(self, start_time):
        """Periodically prints the status of the run from the database."""
        conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=60)
        cursor = conn.cursor()

        while not self.monitoring_stop_event.is_set():
            try:
                # Read table counts
                cursor.execute(f"SELECT COUNT(*), COUNT(CASE WHEN validation_status != 'pending' THEN 1 END) FROM {self.table_name}")
                table_generated, table_validated = cursor.fetchone()

                # Read run metadata
                cursor.execute("SELECT total_generated, total_validated, status FROM run_metadata WHERE run_id = ?", (self.run_id,))
                row = cursor.fetchone() or (None, None, None)
                meta_total_generated, meta_total_validated, run_status = row

                # Always use table counts for accurate real-time monitoring
                total_generated = table_generated or 0
                total_validated = table_validated or 0

                elapsed = time.time() - start_time
                val_rate = (total_validated or 0) / elapsed if elapsed > 0 else 0
                gen_rate = (total_generated or 0) / elapsed if elapsed > 0 else 0

                # Top worker contributions
                worker_line = ""
                try:
                    cursor.execute(
                        "SELECT pid, role, validated FROM worker_progress WHERE run_id = ? ORDER BY validated DESC LIMIT 10",
                        (self.run_id,)
                    )
                    rows = cursor.fetchall() or []
                    if rows:
                        parts = [f"{r[1] or 'worker'}:{r[0]}={r[2]}" for r in rows]
                        worker_line = " | workers: " + ", ".join(parts)
                except Exception:
                    worker_line = ""

                # Status string reflecting phase
                phase = run_status or 'initializing'
                print(
                    f"[{self.run_id}] Status ({phase}): generated {total_generated or 0}, "
                    f"validated {total_validated or 0}/{total_generated or 0} "
                    f"(val {val_rate:.1f}/s, gen {gen_rate:.1f}/s). Elapsed: {elapsed:.0f}s" + worker_line
                )

                # Check if run is complete
                if run_status in ['completed', 'generation_complete'] and total_generated > 0 and total_generated == total_validated:
                    print(f"[{self.run_id}] Run completed successfully!")
                    break

                if self.monitoring_stop_event.wait(5): # Wait for 5s or until stopped
                    break
            except sqlite3.Error as e:
                print(f"Monitor error: {e}")
                break

        conn.close()
        print(f"[{self.run_id}] Monitoring stopped.")

    def resume_pending_validation(self, resume_run_id: str, validators: int, db_path: str | None = None, feeder_batch: int = 1000):
        """Resume validation for an existing run by draining pending rows."""
        self.run_id = resume_run_id
        if db_path:
            self.db_path = db_path
        else:
            self.db_path = os.path.join(self.problem.get_output_dir(), f"parallel_runs_{self.run_id}.db")
        self.table_name = f"expressions_{self.run_id.replace('-', '_')}"

        print(f"Resuming run {self.run_id}")
        print(f"Database: {self.db_path}")
        print(f"Table: {self.table_name}")

        # Ensure DB schema (including worker_progress detailed columns) is initialized/migrated
        try:
            self._init_parallel_database()
        except Exception:
            pass

        conn = sqlite3.connect(self.db_path, timeout=60)
        cursor = conn.cursor()
        try:
            cursor.execute("UPDATE run_metadata SET status = 'resuming' WHERE run_id = ?", (self.run_id,))
            conn.commit()
        except Exception:
            pass
        conn.close()