4444
4545
4646def get_reward_arm (df_graph , df_weights , new_seed ):
47- """ Runs independent cascade model.
48- Input: df_g -- a dataframe representing the graph (with the probabilities)
49- S -- initial set of vertices
50- tracking -- whether we want to check for active/observed nodes
51- Output: T -- resulted influenced set of vertices (including S)
47+ """ Run the IC model and get the reward of adding a seed node
48+
49+ Parameters
50+ ----------
51+ df_graph : pandas.DataFrame
52+ The graph we run the RSB on, in the form of a DataFrame. A row represents one
53+ edge in the graph, with columns being named "source", "target", "probab".
54+ "probab" column is the "true" activation probability used for the simulation.
55+ df_weights : pandas.DataFrame
56+ A dataframe of the node weights used by RSB.
57+ new_seed : int
58+ An id of the new seed node that we are going to test.
59+
60+ Returns
61+ -------
62+ results : numpy.Array
63+ An array for nodes affected by adding the new_seed to the seed nodes
64+
5265 """
5366 prev_affected = df_weights [df_weights ["walked" ] == 1 ].index .tolist ()
5467 new_affected = [new_seed ]
@@ -69,76 +82,70 @@ def get_reward_arm(df_graph, df_weights, new_seed):
6982 return np .array (new_affected )
7083
7184
72- def rsb (df_edges , nodes , times , num_seeds = 10 , C = 1 , gamma = 0.2 , num_repeats_expect = 25 ):
73- num_nodes = nodes .shape [0 ]
74- df_weights = pd .DataFrame (
75- data = 1 , index = nodes , columns = [f"weight_{ k } " for k in range (num_seeds )]
76- )
77- df_weights ["walked" ] = False
78- results = []
79- for t in tqdm (times ):
80- # print(t)
81- df_t = df_edges [df_edges ["day" ] <= t ]
82- df_weights ["walked" ] = False
83- selected = []
84- for cur_seed in range (num_seeds ):
85- df_weights ["temp_weight" ] = (
86- gamma / num_nodes
87- + (1 - gamma )
88- * df_weights [f"weight_{ cur_seed } " ]
89- / df_weights [f"weight_{ cur_seed } " ].sum ()
90- )
91-
92- selection_probab = (
93- df_weights [~ df_weights .index .isin (selected )]["temp_weight" ]
94- / df_weights [~ df_weights .index .isin (selected )]["temp_weight" ].sum ()
95- )
96- # Draw an arm
97- random_pt = random .uniform (0 , df_weights [f"weight_{ cur_seed } " ].sum ())
98- selected_node = (
99- df_weights [f"weight_{ cur_seed } " ].cumsum () >= random_pt
100- ).idxmax ()
101- # Receiving the reward
102- affected_arm = get_reward_arm (df_t , df_weights , selected_node )
103- df_weights .loc [affected_arm , "walked" ] = True
104- marginal_gain = len (affected_arm )
105- df_weights ["expected_gain" ] = 0
106- df_weights .loc [selected_node , "expected_gain" ] = (
107- marginal_gain / selection_probab [selected_node ]
108- )
109-
110- selected .append (selected_node )
111- df_weights [f"weight_{ cur_seed } " ] = df_weights [
112- f"weight_{ cur_seed } "
113- ] * np .exp ((gamma * df_weights ["expected_gain" ]) / (num_nodes * C ))
114-
115- results .append (
116- {
117- "time" : t ,
118- "reward" : get_avg_reward (df_t , selected , num_repeats_expect ),
119- "selected" : selected ,
120- }
121- )
122- return pd .DataFrame (results )
123-
124-
12585# --------------------------------------------------------------------------------------
126- # %% ------------------------------------- RSB 2 ---------------------------------------
86+ # %% -------------------------------------- RSB - ---------------------------------------
12787# --------------------------------------------------------------------------------------
12888
12989
130- def rsb2 (
90+ def rsb (
13191 df_edges ,
13292 nodes ,
13393 times ,
13494 num_seeds = 10 ,
135- C = 1 ,
95+ C = 1.0 ,
13696 gamma = 0.2 ,
13797 num_repeats_expect = 25 ,
13898 persist_params = True ,
13999 style = "additive" ,
140100 hide_tqdm = False ,
141101):
102+ """ Run the RSB algorithm on a graph
103+
104+ Parameters
105+ ----------
106+ df_edges : pandas.DataFrame
107+ The graph we run the TOIM on, in the form of a DataFrame. A row represents one
108+ edge in the graph, with columns being named "source", "target", "probab",
109+ and "day". "probab" column is the "true" activation probability and "day" should
110+ correspond to the days specified in times.
111+ nodes : pandas.Series
112+ A series containing all unique nodes in df.
113+ times : pandas.Series, list
114+ A series or a list of the times that we are going to iterate through. Useful
115+ if you don't want to iterate through every day in the network.
116+ num_seeds : int, optional
117+ Number of seed nodes to find. Default: 10
118+ C: float, optional
119+ A hyperparameter used by the RSB algorithm. Refer to the RSB paper for
120+ more details. [1] Default: 1.0
121+ gamma : float, optional
122+ A hyperparameter used by the RSB algorithm. Refer to the RSB paper for
123+ more details. [1] Default: 0.2
124+ num_repeats_expect : int, optional
125+ Default: 25
126+ persist_params : boolean, optional
127+ Determines if we want to persist the OIM parameters. Default: False
128+ style : str, optional
129+ Determines whether we take into account all edges up to t ("additive") or just
130+ the ones that were formed at t ("dynamic"). Default: "additive"
131+ hide_tqdm : boolean, optional
132+ A paremeters used if you want to hide all tqdm progress bars. It's useful if
133+ you want to paralellize the algorithm. Default: False
134+
135+ Returns
136+ -------
137+ results : DataFrame
138+ A dataframe with the following columns
139+ - time_t, the time step at which everything else was obtained
140+ - reward, the average reward obtained by running IC with s_best
141+ - selected, the list of the selected seed nodes
142+
143+
144+ .. [1] Bao, Yixin, et al.
145+ "Online influence maximization in non-stationary social networks."
146+ 2016 IEEE/ACM 24th International Symposium on Quality of Service (IWQoS). IEEE, 2016
147+
148+ """
142149 num_nodes = nodes .shape [0 ]
143150 df_weights = pd .DataFrame (
144151 data = 1 ,
0 commit comments