@@ -39,6 +39,101 @@ def create_demo_dataset(seed=9999, N=20):
3939 return df
4040
4141
42+
43+ def create_demo_dataset_rm (seed = 9999 , N = 20 ):
44+
45+ import numpy as np
46+ import pandas as pd
47+ from scipy .stats import norm # Used in generation of populations.
48+
49+ np .random .seed (9999 ) # Fix the seed so the results are replicable.
50+ # pop_size = 10000 # Size of each population.
51+
52+ # Create samples
53+ timepoint0 = norm .rvs (loc = 3 , scale = 0.4 , size = N )
54+ timepoint1 = norm .rvs (loc = 3.5 , scale = 0.75 , size = N )
55+ timepoint2 = norm .rvs (loc = 3.25 , scale = 0.4 , size = N )
56+ timepoint3 = norm .rvs (loc = 3.5 , scale = 0.5 , size = N )
57+ timepoint4 = norm .rvs (loc = 2.5 , scale = 0.6 , size = N )
58+ timepoint5 = norm .rvs (loc = 3 , scale = 0.75 , size = N )
59+ timepoint6 = norm .rvs (loc = 3.5 , scale = 0.75 , size = N )
60+ timepoint7 = norm .rvs (loc = 3.25 , scale = 0.4 , size = N )
61+ timepoint8 = norm .rvs (loc = 3.25 , scale = 0.4 , size = N )
62+
63+
64+ # Add a `gender` column for coloring the data.
65+ grp1 = np .repeat ('Group 1' , N / 2 ).tolist ()
66+ grp2 = np .repeat ('Group 2' , N / 2 ).tolist ()
67+ grp = grp1 + grp2
68+
69+ # Add an `id` column for paired data plotting.
70+ id_col = pd .Series (range (1 , N + 1 ))
71+
72+ # Combine samples and gender into a DataFrame.
73+ df = pd .DataFrame ({'Time Point 0' : timepoint0 ,
74+ 'Time Point 1' : timepoint1 ,
75+ 'Time Point 2' : timepoint2 ,
76+ 'Time Point 3' : timepoint3 ,
77+ 'Time Point 4' : timepoint4 ,
78+ 'Time Point 5' : timepoint5 ,
79+ 'Time Point 6' : timepoint6 ,
80+ 'Time Point 7' : timepoint7 ,
81+ 'Time Point 8' : timepoint8 ,
82+ 'Group' : grp ,
83+ 'ID' : id_col
84+ })
85+
86+ return df
87+
88+
89+ def create_demo_dataset_delta (seed = 9999 , N = 20 ):
90+
91+ import numpy as np
92+ import pandas as pd
93+ from scipy .stats import norm # Used in generation of populations.
94+
95+ np .random .seed (seed ) # Fix the seed so the results are replicable.
96+ # pop_size = 10000 # Size of each population.
97+
98+ from scipy .stats import norm # Used in generation of populations.
99+
100+ # Create samples
101+ y = norm .rvs (loc = 3 , scale = 0.4 , size = N * 2 )
102+
103+ # Add experiment column
104+ e1 = np .repeat ('Control' , N ).tolist ()
105+ e2 = np .repeat ('Test' , N ).tolist ()
106+ experiment = e1 + e2
107+
108+ # Add a `Light` column as the first variable
109+ light = []
110+ for i in range (N ):
111+ light .append ('L1' )
112+ light .append ('L2' )
113+
114+ # Add a `genotype` column as the second variable
115+ g1 = np .repeat ('G1' , N / 2 ).tolist ()
116+ g2 = np .repeat ('G2' , N / 2 ).tolist ()
117+ g3 = np .repeat ('G3' , N ).tolist ()
118+ genotype = g1 + g2 + g3
119+
120+ # Add an `id` column for paired data plotting.
121+ id_col = []
122+ for i in range (N ):
123+ id_col .append (i )
124+ id_col .append (i )
125+
126+ # Combine samples and gender into a DataFrame.
127+ df = pd .DataFrame ({'ID' : id_col ,
128+ 'Light' : light ,
129+ 'Genotype' : genotype ,
130+ 'Experiment' : experiment ,
131+ 'Y' : y
132+ })
133+
134+ return df
135+
136+
42137def create_demo_prop_dataset (seed = 9999 , N = 40 ):
43138 import numpy as np
44139 import pandas as pd
@@ -135,4 +230,4 @@ def get_swarm_yspans(coll, round_result=False, decimals=12):
135230# df["idcol"] = pd.Series(range(1, n+1))
136231# df.columns = [str(c) for c in df.columns]
137232#
138- # return random_seed, max_mean_diff, df
233+ # return random_seed, max_mean_diff, df
0 commit comments