Skip to content

Commit 5546ac4

Browse files
committed
Add numpy docstrings to rsb.py
1 parent 2e0de3a commit 5546ac4

1 file changed

Lines changed: 68 additions & 61 deletions

File tree

rsb.py

Lines changed: 68 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,24 @@
4444

4545

4646
def get_reward_arm(df_graph, df_weights, new_seed):
47-
""" Runs independent cascade model.
48-
Input: df_g -- a dataframe representing the graph (with the probabilities)
49-
S -- initial set of vertices
50-
tracking -- whether we want to check for active/observed nodes
51-
Output: T -- resulted influenced set of vertices (including S)
47+
""" Run the IC model and get the reward of adding a seed node
48+
49+
Parameters
50+
----------
51+
df_graph : pandas.DataFrame
52+
The graph we run the RSB on, in the form of a DataFrame. A row represents one
53+
edge in the graph, with columns being named "source", "target", "probab".
54+
"probab" column is the "true" activation probability used for the simulation.
55+
df_weights : pandas.DataFrame
56+
A dataframe of the node weights used by RSB.
57+
new_seed : int
58+
An id of the new seed node that we are going to test.
59+
60+
Returns
61+
-------
62+
results : numpy.Array
63+
An array for nodes affected by adding the new_seed to the seed nodes
64+
5265
"""
5366
prev_affected = df_weights[df_weights["walked"] == 1].index.tolist()
5467
new_affected = [new_seed]
@@ -69,76 +82,70 @@ def get_reward_arm(df_graph, df_weights, new_seed):
6982
return np.array(new_affected)
7083

7184

72-
def rsb(df_edges, nodes, times, num_seeds=10, C=1, gamma=0.2, num_repeats_expect=25):
73-
num_nodes = nodes.shape[0]
74-
df_weights = pd.DataFrame(
75-
data=1, index=nodes, columns=[f"weight_{k}" for k in range(num_seeds)]
76-
)
77-
df_weights["walked"] = False
78-
results = []
79-
for t in tqdm(times):
80-
# print(t)
81-
df_t = df_edges[df_edges["day"] <= t]
82-
df_weights["walked"] = False
83-
selected = []
84-
for cur_seed in range(num_seeds):
85-
df_weights["temp_weight"] = (
86-
gamma / num_nodes
87-
+ (1 - gamma)
88-
* df_weights[f"weight_{cur_seed}"]
89-
/ df_weights[f"weight_{cur_seed}"].sum()
90-
)
91-
92-
selection_probab = (
93-
df_weights[~df_weights.index.isin(selected)]["temp_weight"]
94-
/ df_weights[~df_weights.index.isin(selected)]["temp_weight"].sum()
95-
)
96-
# Draw an arm
97-
random_pt = random.uniform(0, df_weights[f"weight_{cur_seed}"].sum())
98-
selected_node = (
99-
df_weights[f"weight_{cur_seed}"].cumsum() >= random_pt
100-
).idxmax()
101-
# Receiving the reward
102-
affected_arm = get_reward_arm(df_t, df_weights, selected_node)
103-
df_weights.loc[affected_arm, "walked"] = True
104-
marginal_gain = len(affected_arm)
105-
df_weights["expected_gain"] = 0
106-
df_weights.loc[selected_node, "expected_gain"] = (
107-
marginal_gain / selection_probab[selected_node]
108-
)
109-
110-
selected.append(selected_node)
111-
df_weights[f"weight_{cur_seed}"] = df_weights[
112-
f"weight_{cur_seed}"
113-
] * np.exp((gamma * df_weights["expected_gain"]) / (num_nodes * C))
114-
115-
results.append(
116-
{
117-
"time": t,
118-
"reward": get_avg_reward(df_t, selected, num_repeats_expect),
119-
"selected": selected,
120-
}
121-
)
122-
return pd.DataFrame(results)
123-
124-
12585
# --------------------------------------------------------------------------------------
126-
# %% ------------------------------------- RSB 2 ---------------------------------------
86+
# %% -------------------------------------- RSB ----------------------------------------
12787
# --------------------------------------------------------------------------------------
12888

12989

130-
def rsb2(
90+
def rsb(
13191
df_edges,
13292
nodes,
13393
times,
13494
num_seeds=10,
135-
C=1,
95+
C=1.0,
13696
gamma=0.2,
13797
num_repeats_expect=25,
13898
persist_params=True,
13999
style="additive",
140100
hide_tqdm=False,
141101
):
102+
""" Run the RSB algorithm on a graph
103+
104+
Parameters
105+
----------
106+
df_edges : pandas.DataFrame
107+
The graph we run the TOIM on, in the form of a DataFrame. A row represents one
108+
edge in the graph, with columns being named "source", "target", "probab",
109+
and "day". "probab" column is the "true" activation probability and "day" should
110+
correspond to the days specified in times.
111+
nodes : pandas.Series
112+
A series containing all unique nodes in df.
113+
times : pandas.Series, list
114+
A series or a list of the times that we are going to iterate through. Useful
115+
if you don't want to iterate through every day in the network.
116+
num_seeds : int, optional
117+
Number of seed nodes to find. Default: 10
118+
C: float, optional
119+
A hyperparameter used by the RSB algorithm. Refer to the RSB paper for
120+
more details. [1] Default: 1.0
121+
gamma : float, optional
122+
A hyperparameter used by the RSB algorithm. Refer to the RSB paper for
123+
more details. [1] Default: 0.2
124+
num_repeats_expect : int, optional
125+
Default: 25
126+
persist_params : boolean, optional
127+
Determines if we want to persist the OIM parameters. Default: False
128+
style : str, optional
129+
Determines whether we take into account all edges up to t ("additive") or just
130+
the ones that were formed at t ("dynamic"). Default: "additive"
131+
hide_tqdm : boolean, optional
132+
A paremeters used if you want to hide all tqdm progress bars. It's useful if
133+
you want to paralellize the algorithm. Default: False
134+
135+
Returns
136+
-------
137+
results : DataFrame
138+
A dataframe with the following columns
139+
- time_t, the time step at which everything else was obtained
140+
- reward, the average reward obtained by running IC with s_best
141+
- selected, the list of the selected seed nodes
142+
143+
144+
.. [1] Bao, Yixin, et al.
145+
"Online influence maximization in non-stationary social networks."
146+
2016 IEEE/ACM 24th International Symposium on Quality of Service (IWQoS). IEEE, 2016
147+
148+
"""
142149
num_nodes = nodes.shape[0]
143150
df_weights = pd.DataFrame(
144151
data=1,

0 commit comments

Comments
 (0)