|
16 | 16 | from .layers.utils import Hash |
17 | 17 |
|
18 | 18 |
|
| 19 | +def _create_embedding_layer(feat, l2_reg, prefix, name_suffix, mask_zero=False): |
| 20 | + emb = Embedding(feat.vocabulary_size, feat.embedding_dim, |
| 21 | + embeddings_initializer=feat.embeddings_initializer, |
| 22 | + embeddings_regularizer=l2(l2_reg), |
| 23 | + name=prefix + '_' + name_suffix + '_' + feat.embedding_name, |
| 24 | + mask_zero=mask_zero) |
| 25 | + emb.trainable = feat.trainable |
| 26 | + return emb |
| 27 | + |
| 28 | + |
| 29 | +def _check_embedding_compatible(embedding_name, existing_feat, feat): |
| 30 | + for attr in ('vocabulary_size', 'embedding_dim', 'trainable'): |
| 31 | + if getattr(existing_feat, attr) != getattr(feat, attr): |
| 32 | + raise ValueError( |
| 33 | + "Feature columns with the same embedding_name must share the same " |
| 34 | + "{}. embedding_name='{}' has {} and {}.".format( |
| 35 | + attr, embedding_name, getattr(existing_feat, attr), getattr(feat, attr) |
| 36 | + ) |
| 37 | + ) |
| 38 | + |
| 39 | + |
19 | 40 | def get_inputs_list(inputs): |
20 | 41 | return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs))))) |
21 | 42 |
|
22 | 43 |
|
23 | 44 | def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, |
24 | 45 | prefix='sparse_', seq_mask_zero=True): |
25 | 46 | sparse_embedding = {} |
| 47 | + embedding_feature_dict = {} |
| 48 | + varlen_embedding_names = set( |
| 49 | + feat.embedding_name for feat in varlen_sparse_feature_columns |
| 50 | + ) if varlen_sparse_feature_columns else set() |
| 51 | + |
26 | 52 | for feat in sparse_feature_columns: |
27 | | - emb = Embedding(feat.vocabulary_size, feat.embedding_dim, |
28 | | - embeddings_initializer=feat.embeddings_initializer, |
29 | | - embeddings_regularizer=l2(l2_reg), |
30 | | - name=prefix + '_emb_' + feat.embedding_name) |
31 | | - emb.trainable = feat.trainable |
32 | | - sparse_embedding[feat.embedding_name] = emb |
| 53 | + embedding_name = feat.embedding_name |
| 54 | + if embedding_name in sparse_embedding: |
| 55 | + _check_embedding_compatible(embedding_name, embedding_feature_dict[embedding_name], feat) |
| 56 | + continue |
| 57 | + mask_zero = seq_mask_zero and feat.embedding_name in varlen_embedding_names |
| 58 | + emb = _create_embedding_layer(feat, l2_reg, prefix, 'emb', mask_zero) |
| 59 | + sparse_embedding[embedding_name] = emb |
| 60 | + embedding_feature_dict[embedding_name] = feat |
33 | 61 |
|
34 | 62 | if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0: |
35 | 63 | for feat in varlen_sparse_feature_columns: |
36 | | - # if feat.name not in sparse_embedding: |
37 | | - emb = Embedding(feat.vocabulary_size, feat.embedding_dim, |
38 | | - embeddings_initializer=feat.embeddings_initializer, |
39 | | - embeddings_regularizer=l2( |
40 | | - l2_reg), |
41 | | - name=prefix + '_seq_emb_' + feat.name, |
42 | | - mask_zero=seq_mask_zero) |
43 | | - emb.trainable = feat.trainable |
| 64 | + embedding_name = feat.embedding_name |
| 65 | + if embedding_name in sparse_embedding: |
| 66 | + _check_embedding_compatible(embedding_name, embedding_feature_dict[embedding_name], feat) |
| 67 | + continue |
| 68 | + emb = _create_embedding_layer(feat, l2_reg, prefix, 'seq_emb', seq_mask_zero) |
44 | 69 | sparse_embedding[feat.embedding_name] = emb |
| 70 | + embedding_feature_dict[feat.embedding_name] = feat |
45 | 71 | return sparse_embedding |
46 | 72 |
|
47 | 73 |
|
|
0 commit comments