@@ -30,19 +30,20 @@ Reading the HDF5 format
3030Suppose that you have completed the training of the ``torchbiggraph_example_fb15k `` command and want to now
3131look up the embedding of some entity. For that, we'll need to read:
3232
33- - the embeddings, from the checkpoint files (the :file: `.h5 ` files in the `model/fb15k ` directory, or
33+ - the embeddings, from the checkpoint files (the :file: `.h5 ` files in the :file: `model/fb15k ` directory, or
3434 whatever directory was specified as the ``checkpoint_path ``); and
35- - the mapping from entity names to their partitions and offsets, from the :file: `data/FB15k/dictionary.json `
36- file created by the ``torchbiggraph_import_from_tsv `` command.
35+ - the names of the entities of a certain type and partition (ordere by their offset), from the files in the
36+ :file: `data/FB15k ` directory (or an alternative directory given as the ``entity_path ``), created by the
37+ ``torchbiggraph_import_from_tsv `` command.
3738
3839The embedding of, say, entity ``/m/05hf_5 `` can be found as follows::
3940
4041 import json
4142 import h5py
4243
43- with open("data/FB15k/dictionary .json", "rt") as tf:
44- dictionary = json.load(tf)
45- offset = dictionary["entities"]["all"] .index("/m/05hf_5")
44+ with open("data/FB15k/entity_names_all_0 .json", "rt") as tf:
45+ names = json.load(tf)
46+ offset = names .index("/m/05hf_5")
4647
4748 with h5py.File("model/fb15k/embeddings_all_0.v50.h5", "r") as hf:
4849 embedding = hf["embeddings"][offset, :]
@@ -162,12 +163,16 @@ being the capital of France::
162163 operator.load_state_dict(operator_state_dict)
163164 comparator = DotComparator()
164165
165- # Load the offsets of the entities and the index of the relation type
166- with open("data/FB15k/dictionary.json", "rt") as tf:
167- dictionary = json.load(tf)
168- src_entity_offset = dictionary["entities"]["all"].index("/m/0f8l9c") # France
169- dest_entity_offset = dictionary["entities"]["all"].index("/m/05qtj") # Paris
170- rel_type_index = dictionary["relations"].index("/location/country/capital")
166+ # Load the names of the entities, ordered by offset.
167+ with open("data/FB15k/entity_names_all_0.json", "rt") as tf:
168+ entity_names = json.load(tf)
169+ src_entity_offset = entity_names.index("/m/0f8l9c") # France
170+ dest_entity_offset = entity_names.index("/m/05qtj") # Paris
171+
172+ # Load the names of the relation types, ordered by index.
173+ with open("data/FB15k/dynamic_rel_names.json", "rt") as tf:
174+ rel_type_names = json.load(tf)
175+ rel_type_index = rel_type_names.index("/location/country/capital")
171176
172177 # Load the trained embeddings
173178 with h5py.File("model/fb15k/embeddings_all_0.v50.h5", "r") as hf:
@@ -220,10 +225,12 @@ entities are most likely to be the capital of France::
220225 comparator = DotComparator()
221226
222227 # Load the offsets of the entities and the index of the relation type
223- with open("data/FB15k/dictionary.json", "rt") as tf:
224- dictionary = json.load(tf)
225- src_entity_offset = dictionary["entities"]["all"].index("/m/0f8l9c") # France
226- rel_type_index = dictionary["relations"].index("/location/country/capital")
228+ with open("data/FB15k/entity_names_all_0.json", "rt") as tf:
229+ entity_names = json.load(tf)
230+ src_entity_offset = entity_names.index("/m/0f8l9c") # France
231+ with open("data/FB15k/dynamic_rel_names.json", "rt") as tf:
232+ rel_type_names = json.load(tf)
233+ rel_type_index = rel_type_names.index("/location/country/capital")
227234
228235 # Load the trained embeddings
229236 with h5py.File("model/fb15k/embeddings_all_0.v50.h5", "r") as hf:
@@ -245,7 +252,7 @@ entities are most likely to be the capital of France::
245252
246253 # Sort the entities by their score
247254 permutation = scores.flatten().argsort(descending=True)
248- top5_entities = [dictionary["entities"]["all"] [index] for index in permutation[:5]]
255+ top5_entities = [entity_names [index] for index in permutation[:5]]
249256
250257 print(top5_entities)
251258
@@ -271,17 +278,17 @@ library. The following code looks for the entities that are closest to Paris::
271278 index.add(hf["embeddings"][...])
272279
273280 # Get trained embedding of Paris
274- with open("data/FB15k/dictionary .json", "rt") as f :
275- dictionary = json.load(f )
276- target_entity_offset = dictionary["entities"]["all"] .index("/m/05qtj") # Paris
281+ with open("data/FB15k/entity_names_all_0 .json", "rt") as tf :
282+ entity_names = json.load(tf )
283+ target_entity_offset = entity_names .index("/m/05qtj") # Paris
277284 with h5py.File("model/fb15k/embeddings_all_0.v50.h5", "r") as hf:
278285 target_embedding = hf["embeddings"][target_entity_offset, :]
279286
280287 # Search nearest neighbors
281288 _, neighbors = index.search(target_embedding.reshape((1, 400)), 5)
282289
283290 # Map back to entity names
284- top5_entities = [dictionary["entities"]["all"] [index] for index in neighbors[0]]
291+ top5_entities = [entity_names [index] for index in neighbors[0]]
285292
286293 print(top5_entities)
287294
0 commit comments