1+ import numpy as np
2+ import scipy
3+ import math
4+
5+ def Naive (matrix , threshold ):
6+ copy_matrix = matrix .copy ()
7+ copy_matrix [np .abs (matrix ) <= threshold ] = 0
8+ copy_matrix = scipy .sparse .csr_matrix (copy_matrix )
9+ return copy_matrix
10+
11+ def Naive_nonzeros (matrix , threshold ):
12+ return np .sum (np .abs (matrix ) > threshold )
13+
14+ def AHK06 (matrix , threshold ):
15+ copy_matrix = matrix .copy ()
16+ n , d = matrix .shape
17+ probs = np .random .random ((n , d ))
18+ copy_matrix [np .abs (matrix ) < threshold ] = 0
19+ copy_matrix [probs < (np .abs (matrix ) / threshold ) * (np .abs (matrix ) < threshold )] = threshold
20+
21+ copy_matrix = scipy .sparse .csr_matrix (copy_matrix )
22+ return copy_matrix
23+
24+ def AHK06_nonzeros (matrix , threshold ):
25+ n , d = matrix .shape
26+ indices = np .abs (matrix ) < threshold
27+ return n * d - np .sum (indices ) + np .sum (matrix [indices ] / threshold )
28+
29+ def compute_row_distribution (matrix , s , delta , row_norms ):
30+ m , n = matrix .shape
31+ z = row_norms / np .sum (row_norms )
32+ alpha , beta = math .sqrt (np .log ((m + n ) / delta ) / s ), np .log ((m + n ) / delta ) / (3 * s )
33+ zeta = 1
34+ rou = (alpha * z / (2 * zeta ) + ((alpha * z / (2 * zeta )) ** 2 + beta * z / zeta ) ** (1 / 2 )) ** 2
35+ sum = np .sum (rou )
36+ while np .abs (sum - 1 ) > 1e-5 :
37+ zeta *= sum
38+ rou = (alpha * z / (2 * zeta ) + ((alpha * z / (2 * zeta )) ** 2 + beta * z / zeta ) ** (1 / 2 )) ** 2
39+ sum = np .sum (rou )
40+ return rou
41+
42+ def AKL13 (matrix , s ):
43+ matrix = matrix .T
44+ s = int (s )
45+ n , d = matrix .shape
46+ row_norms = np .linalg .norm (matrix , axis = 1 , ord = 1 )
47+ rou = compute_row_distribution (matrix , s , 0.1 , row_norms )
48+ nonzero_indices = matrix .nonzero ()
49+ data = matrix [nonzero_indices ]
50+ row_norms [row_norms == 0 ] = 1
51+ probs_matrix = rou .reshape ((n , 1 )) * matrix / row_norms .reshape ((n , 1 ))
52+ probs = probs_matrix [nonzero_indices ]
53+ probs /= np .sum (probs )
54+ indices = np .arange (len (data ))
55+ selected = np .random .choice (indices , s , p = probs , replace = True )
56+ result = np .zeros ((n , d ))
57+ np .add .at (result , (nonzero_indices [0 ][selected ], nonzero_indices [1 ][selected ]), data [selected ] / (probs [selected ] * s ))
58+ result = result .T
59+ matrix = matrix .T
60+ result = scipy .sparse .csr_matrix (result )
61+ return result
62+
63+ def AKL13_nonzeros (matrix , s ):
64+ matrix = matrix .T
65+ s = int (s )
66+ n = matrix .shape [0 ]
67+ row_norms = np .linalg .norm (matrix , axis = 1 , ord = 1 )
68+ rou = compute_row_distribution (matrix , s , 0.1 , row_norms )
69+ nonzero_indices = matrix .nonzero ()
70+ data = matrix [nonzero_indices ]
71+ row_norms [row_norms == 0 ] = 1
72+ probs_matrix = rou .reshape ((n , 1 )) * matrix / row_norms .reshape ((n , 1 ))
73+ probs = probs_matrix [nonzero_indices ]
74+ probs /= np .sum (probs )
75+ indices = np .arange (len (data ))
76+ selected = np .random .choice (indices , s , p = probs , replace = True )
77+ matrix = matrix .T
78+ return len (np .unique (selected ))
79+
80+ def row_operation (copy_row , threshold ):
81+ argzero = np .argwhere ((np .abs (copy_row ) <= threshold ) * (copy_row != 0 ))
82+ argzero = argzero .reshape (len (argzero ),)
83+ argzero_copy = copy_row [argzero ]
84+ copy_row [argzero ] = 0
85+ sum = np .sum (argzero_copy )
86+ if sum != 0 :
87+ k = math .ceil (sum / threshold )
88+
89+ indices = np .random .choice (argzero , k , p = argzero_copy / sum , replace = True )
90+ np .add .at (copy_row , indices , sum / k )
91+
92+ def RMR (matrix , threshold ):
93+ copy_matrix = matrix .copy ()
94+ np .apply_along_axis (row_operation , 1 , copy_matrix , threshold )
95+ copy_matrix = scipy .sparse .csr_matrix (copy_matrix )
96+ return copy_matrix
97+
98+ def RMR_nonzeros (matrix , threshold ):
99+ n , d = matrix .shape
100+ sum = 0
101+ for i in range (n ):
102+ argzero = np .argwhere (np .abs (matrix [i , :]) <= threshold )
103+ sum2 = np .sum (np .abs (matrix [i , argzero ]))
104+ if sum2 != 0 :
105+ k = math .ceil (sum2 / threshold )
106+ sum += d - len (argzero ) + np .sum (1 - (1 - np .abs (matrix [i , argzero ]) / sum2 ) ** k )
107+ else :
108+ sum += d - len (argzero )
109+ return sum
110+
111+ def DZ11 (matrix , threshold ):
112+ copy_matrix = matrix .copy ()
113+ n , d = matrix .shape
114+ norm_fro = np .linalg .norm (matrix , ord = "fro" )
115+ copy_matrix [np .abs (matrix ) <= threshold / (n + d )] = 0
116+ s = int (14 * (n + d ) * np .log (np .sqrt (2 ) / 2 * (n + d )) * (norm_fro / threshold ) ** 2 )
117+ nonzero_indices = copy_matrix .nonzero ()
118+ data = copy_matrix [nonzero_indices ]
119+ probs_matrix = copy_matrix * copy_matrix
120+ probs = probs_matrix [nonzero_indices ]
121+ probs /= np .sum (probs )
122+ indices = np .arange (len (data ))
123+ selected = np .random .choice (indices , s , p = probs , replace = True )
124+ result = np .zeros ((n , d ))
125+ np .add .at (result , (nonzero_indices [0 ][selected ], nonzero_indices [1 ][selected ]), data [selected ] / (probs [selected ] * s ))
126+ result = scipy .sparse .csr_matrix (result )
127+ return result
128+
129+ def DZ11_nonzeros (matrix , threshold ):
130+ copy_matrix = matrix .copy ()
131+ n , d = matrix .shape
132+ norm_fro = np .linalg .norm (matrix , ord = "fro" )
133+ copy_matrix [np .abs (matrix ) <= threshold / (n + d )] = 0
134+ s = int (14 * (n + d ) * np .log (np .sqrt (2 ) / 2 * (n + d )) * (norm_fro / threshold ) ** 2 )
135+ nonzero_indices = copy_matrix .nonzero ()
136+ data = copy_matrix [nonzero_indices ]
137+ probs_matrix = copy_matrix * copy_matrix
138+ probs = probs_matrix [nonzero_indices ]
139+ probs /= np .sum (probs )
140+ indices = np .arange (len (data ))
141+ selected = np .random .choice (indices , s , p = probs , replace = True )
142+ return len (np .unique (selected ))
143+
144+ def BKKS21 (matrix , s ):
145+ n , d = matrix .shape
146+ probs = np .random .random ((n , d ))
147+ row_norms = np .linalg .norm (matrix , axis = 1 , ord = 1 )
148+ col_norms = np .linalg .norm (matrix , axis = 0 , ord = 1 )
149+ p1 = np .abs (matrix ) / np .sum (np .abs (matrix ))
150+ p2 = np .abs (matrix ) * (row_norms / np .sum (row_norms ** 2 )).reshape (- 1 , 1 )
151+ p3 = np .abs (matrix ) * (col_norms / np .sum (col_norms ** 2 )).reshape (1 , - 1 )
152+ p = np .minimum (1 , s * np .maximum (p1 , np .maximum (p2 , p3 )))
153+ probs [p == 0 ] = 1
154+ p [p == 0 ] = 1
155+ result = (matrix / p ) * (probs < p )
156+
157+ result = scipy .sparse .csr_matrix (result )
158+ return result
159+
160+ def BKKS21_nonzeros (matrix , s ):
161+ row_norms = np .linalg .norm (matrix , axis = 1 , ord = 1 )
162+ col_norms = np .linalg .norm (matrix , axis = 0 , ord = 1 )
163+ p1 = np .abs (matrix ) / np .sum (np .abs (matrix ))
164+ p2 = np .abs (matrix ) * (row_norms / np .sum (row_norms ** 2 )).reshape (- 1 , 1 )
165+ p3 = np .abs (matrix ) * (col_norms / np .sum (col_norms ** 2 )).reshape (1 , - 1 )
166+ p = np .minimum (1 , s * np .maximum (p1 , np .maximum (p2 , p3 )))
167+ return np .sum (p )
0 commit comments