Fix pagerank convergence threshold for large sparse graphs

pathway · pathway · commit d59f6abfdd48 · 2026-04-14T10:40:13.000-07:00
The previous convergence check `norm < n * tol` scaled the L1 tolerance by graph size, which made it a useless threshold once N > 2/tol (since L1 distance between probability vectors is bounded by 2). On large sparse graphs, the first power-iteration step's L1 diff from the uniform starting vector could trivially fall below `n * tol`, causing `pagerank` to return the initial uniform 1/N distribution without any indication of failure. Minimal reproduction: a 2000-node graph with 2 edges (path 0->1->2) returns `pr[2] = 0.0005` (uniform) instead of the correct `pr[2] = 0.00128` (2.6x above uniform). This patch changes the check to `norm < tol` (absolute L1 tolerance, matching the docstring semantics) and adds a regression test. Fixes #1575
diff --git a/releasenotes/notes/fix-pagerank-convergence-threshold-1575.yaml b/releasenotes/notes/fix-pagerank-convergence-threshold-1575.yaml
@@ -0,0 +1,13 @@
+---
+fixes:
+  - |
+    Fixed a silent convergence bug in :func:`~rustworkx.pagerank` that caused
+    the function to return the initial uniform ``1/N`` distribution on large
+    sparse graphs with no error. The convergence check ``norm < n * tol``
+    scaled the L1 tolerance by graph size, which (since L1 distance between
+    probability vectors is bounded by 2) rendered the threshold useless once
+    ``N > 2/tol``. On such graphs the first power-iteration step's L1 diff
+    from the uniform starting vector could trivially fall below ``n * tol``,
+    incorrectly reporting convergence. The threshold is now an absolute
+    ``norm < tol``, matching the docstring semantics. See
+    `#1575 <https://github.com/Qiskit/rustworkx/issues/1575>`__ for details.
diff --git a/src/link_analysis.rs b/src/link_analysis.rs
@@ -199,7 +199,14 @@ pub fn pagerank(
         let new_popularity =
             alpha * ((&a * &popularity) + (dangling_sum * &dangling_weights)) + &damping;
         let norm: f64 = new_popularity.l1_dist(&popularity).unwrap();
-        if norm < (n as f64) * tol {
+        // The L1 distance between two probability vectors is bounded by 2, so
+        // `(n as f64) * tol` becomes a useless threshold once N > 2/tol (e.g.
+        // N > 2000 with the default tol = 1e-6). On large sparse graphs the
+        // first power-iteration step's L1 diff from the uniform starting
+        // vector can trivially fall below `n * tol`, causing this check to
+        // return the initial uniform vector and report convergence silently.
+        // See https://github.com/Qiskit/rustworkx/issues/1575
+        if norm < tol {
             has_converged = true;
             break;
         } else {
diff --git a/tests/digraph/test_pagerank.py b/tests/digraph/test_pagerank.py
@@ -321,3 +321,34 @@ def test_multi_digraph_versus_weighted(self):
 
         for v in multi_graph.node_indices():
             self.assertAlmostEqual(ranks_multi[v], ranks_weight[v], delta=1.0e-4)
+
+    def test_sparse_large_graph_does_not_return_uniform(self):
+        """Regression test for #1575.
+
+        On a large graph with very few active edges, the first power-iteration
+        step's L1 diff from the uniform starting vector can be very small.
+        The old convergence check `norm < n * tol` would trip on iteration 0
+        (because `n * tol` grows with graph size) and return the uniform
+        initial vector, silently corrupting results.
+
+        This test builds a 2000-node graph with only 2 edges (path 0->1->2)
+        and verifies that pagerank returns non-uniform scores — specifically
+        that node 2 has a higher score than node 0 (since mass flows 0->1->2).
+        """
+        graph = rustworkx.PyDiGraph()
+        for _ in range(2000):
+            graph.add_node(None)
+        graph.add_edge(0, 1, None)
+        graph.add_edge(1, 2, None)
+
+        ranks = rustworkx.pagerank(graph, alpha=0.85)
+
+        uniform_value = 1.0 / 2000
+        # Node 2 should have meaningfully higher PR than uniform
+        self.assertGreater(ranks[2], uniform_value * 2.0)
+        # Node 1 should also have higher PR than uniform (mass flows through it)
+        self.assertGreater(ranks[1], uniform_value * 1.5)
+        # Node 0 (dangling endpoint of flow) should be near uniform
+        self.assertAlmostEqual(ranks[0], uniform_value, delta=uniform_value * 0.5)
+        # A randomly picked isolated node should be uniform
+        self.assertAlmostEqual(ranks[500], uniform_value, delta=uniform_value * 0.1)