Skip to content

Commit a5d6bbe

Browse files
authored
Concept Set Optimize update (#2459)
New implementation for optimizing concept set. Thanks to @schuemie for implementation.
1 parent 47bc438 commit a5d6bbe

3 files changed

Lines changed: 485 additions & 74 deletions

File tree

src/main/java/org/ohdsi/webapi/service/VocabularyService.java

Lines changed: 83 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import org.ohdsi.webapi.vocabulary.ConceptRecommendedNotInstalledException;
6262
import org.ohdsi.webapi.vocabulary.ConceptRelationship;
6363
import org.ohdsi.webapi.vocabulary.ConceptSearch;
64+
import org.ohdsi.webapi.vocabulary.ConceptSetCondenser;
6465
import org.ohdsi.webapi.vocabulary.DescendentOfAncestorSearch;
6566
import org.ohdsi.webapi.vocabulary.Domain;
6667
import org.ohdsi.webapi.vocabulary.RecommendedConcept;
@@ -1673,107 +1674,115 @@ public Collection<ConceptSetComparison> compareConceptSets(ConceptSetExpression[
16731674
return compareConceptSets(defaultSourceKey, conceptSetExpressionList);
16741675
}
16751676

1677+
16761678
/**
16771679
* Optimizes a concept set expressions to find redundant concepts specified
1678-
* in a concept set expression for the selected source key.
1680+
* in a concept set expression.
16791681
*
1680-
* @summary Optimize concept set
1682+
* @summary Optimize concept set (default vocabulary)
16811683
* @param sourceKey The source containing the vocabulary
16821684
* @param conceptSetExpression The concept set expression to optimize
16831685
* @return A concept set optimization
16841686
*/
1685-
@Path("{sourceKey}/optimize")
1687+
@Path("optimize")
16861688
@POST
16871689
@Produces(MediaType.APPLICATION_JSON)
16881690
@Consumes(MediaType.APPLICATION_JSON)
1689-
public ConceptSetOptimizationResult optimizeConceptSet(@PathParam("sourceKey") String sourceKey, ConceptSetExpression conceptSetExpression) throws Exception {
1690-
Source source = getSourceRepository().findBySourceKey(sourceKey);
1691-
String tableQualifier = source.getTableQualifier(SourceDaimon.DaimonType.Vocabulary);
1692-
1693-
// Get the optimization script
1694-
String sql_statement = ResourceHelper.GetResourceAsString("/resources/vocabulary/sql/optimizeConceptSet.sql");
1695-
1696-
// Find all of the concepts that should be considered for optimization
1697-
// Create a hashtable to hold all of the contents of the ConceptSetExpression
1698-
// for use later
1699-
Hashtable<Integer, ConceptSetExpression.ConceptSetItem> allConceptSetItems = new Hashtable<>();
1700-
ArrayList<Integer> includedConcepts = new ArrayList<>();
1701-
ArrayList<Integer> descendantConcepts = new ArrayList<>();
1702-
ArrayList<Integer> allOtherConcepts = new ArrayList<>();
1703-
for(ConceptSetExpression.ConceptSetItem item : conceptSetExpression.items) {
1704-
allConceptSetItems.put(item.concept.conceptId.intValue(), item);
1705-
if (!item.isExcluded) {
1706-
includedConcepts.add(item.concept.conceptId.intValue());
1707-
if (item.includeDescendants) {
1708-
descendantConcepts.add(item.concept.conceptId.intValue());
1709-
}
1710-
} else {
1711-
allOtherConcepts.add(item.concept.conceptId.intValue());
1712-
}
1713-
}
1714-
1715-
// If no descendant concepts are specified, initialize this field to use concept_id = 0 so the query will work properly
1716-
if (descendantConcepts.isEmpty())
1717-
descendantConcepts.add(0);
1718-
1719-
String allConceptsList = includedConcepts.stream().map(Object::toString).collect(Collectors.joining(", "));
1720-
String descendantConceptsList = descendantConcepts.stream().map(Object::toString).collect(Collectors.joining(", "));
1691+
public ConceptSetOptimizationResult optimizeConceptSet(ConceptSetExpression conceptSetExpression) throws Exception {
1692+
String defaultSourceKey = getDefaultVocabularySourceKey();
17211693

1722-
sql_statement = SqlRender.renderSql(sql_statement, new String[]{"allConcepts", "descendantConcepts", "cdm_database_schema"}, new String[]{allConceptsList, descendantConceptsList, tableQualifier});
1723-
sql_statement = SqlTranslate.translateSql(sql_statement, source.getSourceDialect());
1694+
if (defaultSourceKey == null)
1695+
throw new WebApplicationException(new Exception("No vocabulary or cdm daimon was found in configured sources. Search failed."), Response.Status.SERVICE_UNAVAILABLE); // http 503
17241696

1725-
// Execute the query to obtain a result set that contains the
1726-
// most optimized version of the concept set. Then, using these results,
1727-
// construct a new ConceptSetExpression object that only contains the
1728-
// concepts that were identified as optimal to achieve the same definition
1729-
ConceptSetOptimizationResult returnVal = new ConceptSetOptimizationResult();
1730-
ArrayList<ConceptSetExpression.ConceptSetItem> optimzedExpressionItems = new ArrayList<>();
1731-
ArrayList<ConceptSetExpression.ConceptSetItem> removedExpressionItems = new ArrayList<>();
1732-
List<Map<String, Object>> rows = getSourceJdbcTemplate(source).queryForList(sql_statement);
1733-
for (Map rs : rows) {
1734-
Integer conceptId = Integer.parseInt(rs.get("concept_id").toString());
1735-
String removed = String.valueOf(rs.get("removed"));
1736-
ConceptSetExpression.ConceptSetItem csi = allConceptSetItems.get(conceptId);
1737-
if (removed.equals("0")) {
1738-
optimzedExpressionItems.add(csi);
1739-
} else {
1740-
removedExpressionItems.add(csi);
1741-
}
1742-
}
1743-
// Re-add back the other concepts that are not considered
1744-
// as part of the optimizatin process
1745-
for(Integer conceptId : allOtherConcepts) {
1746-
ConceptSetExpression.ConceptSetItem csi = allConceptSetItems.get(conceptId);
1747-
optimzedExpressionItems.add(csi);
1748-
}
1749-
returnVal.optimizedConceptSet.items = optimzedExpressionItems.toArray(new ConceptSetExpression.ConceptSetItem[optimzedExpressionItems.size()]);
1750-
returnVal.removedConceptSet.items = removedExpressionItems.toArray(new ConceptSetExpression.ConceptSetItem[removedExpressionItems.size()]);
1751-
1752-
return returnVal;
1697+
return optimizeConceptSet(defaultSourceKey, conceptSetExpression);
17531698
}
1754-
1699+
17551700
/**
17561701
* Optimizes a concept set expressions to find redundant concepts specified
1757-
* in a concept set expression.
1702+
* in a concept set expression for the selected source key.
17581703
*
1759-
* @summary Optimize concept set (default vocabulary)
1704+
* @summary Optimize concept set
17601705
* @param sourceKey The source containing the vocabulary
17611706
* @param conceptSetExpression The concept set expression to optimize
17621707
* @return A concept set optimization
17631708
*/
1764-
@Path("optimize")
1709+
@Path("{sourceKey}/optimize")
17651710
@POST
17661711
@Produces(MediaType.APPLICATION_JSON)
17671712
@Consumes(MediaType.APPLICATION_JSON)
1768-
public ConceptSetOptimizationResult optimizeConceptSet(ConceptSetExpression conceptSetExpression) throws Exception {
1769-
String defaultSourceKey = getDefaultVocabularySourceKey();
1713+
public ConceptSetOptimizationResult optimizeConceptSet(@PathParam("sourceKey") String sourceKey, ConceptSetExpression conceptSetExpression) throws Exception {
1714+
// resolve the concept set to get included concepts
1715+
Collection<Long> includedConcepts = this.resolveConceptSetExpression(sourceKey, conceptSetExpression);
1716+
long[] includedConceptsArray = includedConcepts.stream().mapToLong(Long::longValue).toArray();
17701717

1771-
if (defaultSourceKey == null)
1772-
throw new WebApplicationException(new Exception("No vocabulary or cdm daimon was found in configured sources. Search failed."), Response.Status.SERVICE_UNAVAILABLE); // http 503
1718+
// perform vocabulary search to find ancestor/descendant concepts for the included concepts
1719+
Source source = getSourceRepository().findBySourceKey(sourceKey);
1720+
String tableQualifier = source.getTableQualifier(SourceDaimon.DaimonType.Vocabulary);
1721+
String ancestorSql = ResourceHelper.GetResourceAsString("/resources/vocabulary/sql/calculateAncestors.sql");
1722+
String allConceptsList = includedConcepts.stream().map(Object::toString).collect(Collectors.joining(", "));
1723+
1724+
ancestorSql = SqlRender.renderSql(ancestorSql, new String[]{"ancestors", "CDM_schema"}, new String[]{allConceptsList, tableQualifier});
1725+
ancestorSql = SqlTranslate.translateSql(ancestorSql, source.getSourceDialect());
1726+
List<Map<String, Object>> rows = getSourceJdbcTemplate(source).queryForList(ancestorSql);
1727+
1728+
// the candidate concepts are all ancestors from the query, and we add any
1729+
// descendants in the result to the collection of CandidateConcepts
1730+
Map<Long, Collection<Long>> ancestorMap = new HashMap<>();
1731+
for (Map rs : rows) {
1732+
final Long ancestorConceptId = Long.valueOf(rs.get("ancestor_id").toString());
1733+
ancestorMap.computeIfAbsent(ancestorConceptId,k -> new ArrayList<>())
1734+
.add(Long.valueOf(rs.get("descendant_id").toString()));
1735+
}
1736+
1737+
// use conceptSetCondenser to optimize concept set
1738+
ArrayList<ConceptSetCondenser.CandidateConcept> candidateConcepts = new ArrayList<>();
1739+
for (Long candidateConcept : ancestorMap.keySet()){
1740+
long[] candidateDescendants = ancestorMap.get(candidateConcept).stream().mapToLong(Long::longValue).toArray();
1741+
candidateConcepts.add(new ConceptSetCondenser.CandidateConcept(candidateConcept, candidateDescendants));
1742+
}
1743+
ConceptSetCondenser.CandidateConcept[] candidateConceptsArray = candidateConcepts.toArray(new ConceptSetCondenser.CandidateConcept[0]);
1744+
ConceptSetCondenser condenser = new ConceptSetCondenser(includedConceptsArray, candidateConceptsArray);
1745+
condenser.condense();
1746+
ConceptSetCondenser.ConceptExpression[] conceptExpressionArray = condenser.getConceptSetExpression();
1747+
1748+
// convert condensed concept set to a ConceptSetExpression
1749+
// 1. get lookup of Concept objects from the conceptExpression[] and make a map
1750+
Collection<Concept> concepts = executeIdentifierLookup(source, Arrays.stream(conceptExpressionArray).mapToLong(ce -> ce.conceptId).toArray());
1751+
Map<Long, Concept> conceptMap = concepts.stream().collect(Collectors.toMap(obj -> obj.conceptId, obj -> obj));
1752+
1753+
// 2. map conceptExpressionArray into an array of ConceptSetItem and put into the optimimized ConceptSetExpression.
1754+
ConceptSetExpression optimizedCSE = new ConceptSetExpression();
1755+
optimizedCSE.items = Arrays.stream(conceptExpressionArray)
1756+
.map((ce -> {
1757+
ConceptSetExpression.ConceptSetItem csi = new ConceptSetExpression.ConceptSetItem();
1758+
csi.concept = conceptMap.get(ce.conceptId);
1759+
csi.includeDescendants = ce.descendants;
1760+
csi.isExcluded = ce.exclude;
1761+
return csi;
1762+
})).toArray(ConceptSetExpression.ConceptSetItem[]::new);
1763+
1764+
// Create the result and return to client
1765+
// 1. The condensed concept set is the optimized results
1766+
ConceptSetOptimizationResult result = new ConceptSetOptimizationResult();
1767+
result.optimizedConceptSet = optimizedCSE;
1768+
1769+
// 2. the removed items are those concepts + options (from the conceptSetExpression input)
1770+
// that don't match any in the optimized result
1771+
ConceptSetExpression.ConceptSetItem[] removedCsi = Arrays.stream(conceptSetExpression.items)
1772+
.filter(ci ->
1773+
Arrays.stream(optimizedCSE.items)
1774+
.noneMatch(oci -> Objects.equals(ci.concept.conceptId, oci.concept.conceptId) &&
1775+
ci.includeDescendants == oci.includeDescendants &&
1776+
ci.isExcluded == oci.isExcluded)
1777+
).toArray(ConceptSetExpression.ConceptSetItem[]::new);
1778+
ConceptSetExpression removedConceptSet = new ConceptSetExpression();
1779+
removedConceptSet.items = removedCsi;
1780+
result.removedConceptSet = removedConceptSet;
1781+
return result;
17731782

1774-
return optimizeConceptSet(defaultSourceKey, conceptSetExpression);
17751783
}
17761784

1785+
17771786
private String JoinArray(final long[] array) {
17781787
String result = "";
17791788

0 commit comments

Comments
 (0)