corr-im/config.py at main · justanothergithubber/corr-im · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""Configuration used in the experiments."""

from enum import IntEnum
from pathlib import Path
from typing import Type

from igraph import Graph
from numpy import float64
from numpy.typing import NDArray

# Defaults
# For data
## Folders
COMMON_PAPER_FOLDER = Path("paper_dir")
PAPER_MS_OUTPUT_FOLDER = COMMON_PAPER_FOLDER / "ms"
"""Folder to contain outputs shown in Management Science paper."""
NEURIPS_FOLDER = COMMON_PAPER_FOLDER / "neurips"
"""
Folder to contain outputs shown in NeurIPS paper.
Some outputs shared with Management Science paper.
"""

## Inputs
DATA_FOLDER = Path("data")

## Intermediate outputs
### Folders
"""Input data folder"""
TSV_FOLDER = Path("tsvs")
"""Folder contain .tsv version for each graph."""

RAW_OUTPUT_FOLDER = Path("out")
"""Folder for intermediate outputs"""
CONV_CVAR_OUT_PATH = RAW_OUTPUT_FOLDER / "conv_cvar"  # includes cvar-only
"""Folder for convex combination outputs"""
AMAZON_FILE = DATA_FOLDER / "amazon-meta.txt"
AMAZON_PAPER_OUTPUTS = PAPER_MS_OUTPUT_FOLDER / "amazon"
"""Folder for amazon processed outputs"""
AMAZON_RAW_OUTPUTS = RAW_OUTPUT_FOLDER / "amazon"
"""Folder for amazon relatively more unprocessed outputs."""

### Files
SUMMARY_CSV = COMMON_PAPER_FOLDER / Path("summary.csv")
AMAZON_ALL_GENRE_FILE = AMAZON_RAW_OUTPUTS / Path("amazon_allgenres.csv")
AMAZON_NODEIDS = AMAZON_RAW_OUTPUTS / Path("amazon_nodeids.csv")

## Small Random graph constants
DEFAULT_RANDOM_GRAPH_SIZE = 1000

DEFAULT_TARGET_SIZE = 40
"""Capital *K* inside paper"""
K_RANGE = range(1, DEFAULT_TARGET_SIZE + 1)
"""Alias for all values that k takes."""
EST_SEEDS = range(0, 100000, 10000)
"""Alias for estimation seeds we use"""

## Executables for gathering data
CONV_GREED_EXE_NAME = "conv_greed"
PMC_GREED_EXE_NAME = "pmc_greed"
CVAR_GREED_EXE_NAME = "cvar_greed"
PMC_EST_EXE_NAME = "pmc_est"
CVAR_EST_EXE_NAME = "cvar_est"

# Type hint aliases
GRT = tuple[list[int], list[float], list[float]]
"""Type hint alias for greedy algorithm results tuple."""
DISTMAT = NDArray[float64]
"""Type hint alias for distance matrix"""

#  More aliases at end of file


class GraphType(IntEnum):
    """IntEnum to denote graph types used."""

    polblogs = 0
    """
    Refers to the `polblogs` data set by Adamic and Glance (2004)

    Lada A. Adamic and Natalie Glance. 2005. The political blogosphere
    and the 2004 U.S. election: divided they blog. In Proceedings of
    the 3rd international workshop on Link discovery (LinkKDD '05).
    Association for Computing Machinery, New York, NY, USA, 36-43.
    https://doi.org/10.1145/1134271.1134277
    """
    wikivote = 1
    """
    Wikipedia adminship vote network

    See https://snap.stanford.edu/data/wiki-Vote.html
    """
    random_scale_free = 2
    """
    Randomly generated directed scale free graph

    Algorithm via `<https://networkx.org>`_, which uses
    B. Bollobás, C. Borgs, J. Chayes, and O. Riordan,
    Directed scale-free graphs, Proceedings of the fourteenth
    annual ACM-SIAM Symposium on Discrete Algorithms, 132-139,
    2003."""
    amazon = 3
    """
    Amazon co-purchasing network

    See https://snap.stanford.edu/data/amazon-meta.html
    """

    # This code bit is left in to easily allow for customization
    # other = 4

    def __str__(self) -> str:
        """Redefining __str__ for nicer printing in help."""
        return str(self.name)


class HetEdgeWeightType(IntEnum):
    """IntEnum to denote different heterogeneneous edge weights."""

    uniform = 0
    """Uniform random edge types between 0 and 1"""
    trivalency = 1
    """Equal probability between 1e-1, 1e-2, 1e-3"""
    weightedcascade = 2
    """Edge weight depending on indegree of target of edge"""
    amazonlow = 3
    """For amazon, low p values"""
    amazonhigh = 4
    """For amazon, high p values"""
    custom = 3
    """
    For special graph weight functions fit the other possibilities

    Specially given a definition within
    :py:func:`graph_functions.standardize_graph`.
    """

    def __str__(self) -> str:
        """Redefining __str__ for nicer printing in help."""
        return str(self.name)


class SolveMethod(IntEnum):
    """
    IntEnum to denote solution method used.

    'solution' here denotes getting a seed set.
    """

    correlation_robust = 0
    """
    Solution method corresponding to the distributionally robust model.

    Leverages shortest paths in graph.
    """
    independence_cascade = 1
    """Uses independence cascade program."""
    highest_outdegree = 2
    """Uses highest outdegree heuristic."""
    linear_program = 3
    """Solves as a linear program using Pyomo."""

    def __str__(self) -> str:
        """Redefining __str__ for nicer printing in help."""
        return str(self.name)


class EnumParser:
    """This generalises the parser for various IntEnums."""

    def __init__(self, enumcls: Type[IntEnum]) -> None:
        """Initialize the Enum."""
        self.cls = enumcls

    def __call__(self, arg: str) -> IntEnum:
        """Create an IntEnum via arg."""
        try:
            return self.cls(int(arg))
        except ValueError:
            pass
        try:
            return self.cls[arg]
        except KeyError:
            pass
        raise ValueError(f"Invalid {self.cls.__name__} input:'{arg}'")


# More Aliases
GRAPHS_USED = (GraphType.polblogs, GraphType.wikivote)
GraphDict = dict[GraphType | tuple[GraphType, float | GraphType], Graph]
"""Graph Dictionary Type"""
HETEDGEWEIGHT = [
    HetEdgeWeightType.uniform,
    HetEdgeWeightType.trivalency,
    HetEdgeWeightType.weightedcascade,
]

# Final constants
P_LIST = [0.01] + [x / 20 for x in range(1, 20)]
"""0.01, 0.05, 0.1, ... 0.95"""
EDGE_WEIGHTS: list[float | HetEdgeWeightType] = P_LIST + HETEDGEWEIGHT

## NeurIPS constants
## Here to prevent circular imports - config is the "root" of all Python imports
NEURIPS_METHODS_USED = (
    SolveMethod.correlation_robust,
    SolveMethod.independence_cascade,
)
NEURIPS_SOL_NAMES = {
    SolveMethod.correlation_robust: "corr",
    SolveMethod.independence_cascade: "ic",
}
"""Solution used in the NeurIPS paper."""
NEURIPS_SOL_SHORT_NAMES = {
    SolveMethod.correlation_robust.name: "corr",
    SolveMethod.independence_cascade.name: "ic",
}
"""Maps longer names like `correlation_robust` to something shorter, like `corr`"""
NEURIPS_SOL_TUPLE: tuple[str, str] = ("corr", "ic")