Skip to content

Commit 8175594

Browse files
committed
🆕 Add org.spdx.storage.listedlicense.SpdxListedLicenseWebStore.cacheCheckIntervalSecs for even better control over the cache
1 parent 39fa709 commit 8175594

1 file changed

Lines changed: 46 additions & 15 deletions

File tree

src/main/java/org/spdx/storage/listedlicense/SpdxListedLicenseWebStore.java

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.time.Instant;
3939
import java.time.ZoneOffset;
4040
import java.time.format.DateTimeFormatter;
41+
import java.time.temporal.ChronoUnit;
4142
import java.util.Arrays;
4243
import java.util.Base64;
4344
import java.util.Collections;
@@ -63,7 +64,8 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
6364

6465
private static final int READ_TIMEOUT = 5000;
6566
private static final int IO_BUFFER_SIZE = 8192;
66-
67+
private static final long CACHE_CHECK_INTERVAL_SECS = 86400; // 24 hours, in seconds
68+
6769
static final List<String> WHITE_LIST = Collections.unmodifiableList(Arrays.asList(
6870
"spdx.org", "spdx.dev", "spdx.com", "spdx.info")); // Allowed host names for the SPDX listed licenses
6971

@@ -76,6 +78,7 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
7678

7779
private final boolean cacheEnabled = Boolean.parseBoolean(
7880
System.getProperty("org.spdx.storage.listedlicense.SpdxListedLicenseWebStore.enableCache"));
81+
private final long cacheCheckIntervalSecs;
7982

8083
final DateTimeFormatter iso8601 = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.000'Z'").withZone(ZoneOffset.UTC);
8184

@@ -92,6 +95,14 @@ public SpdxListedLicenseWebStore() throws InvalidSPDXAnalysisException {
9295
throw new InvalidSPDXAnalysisException("Unable to create cache directory: " + cacheDir, ioe);
9396
}
9497
}
98+
long tmpCacheCheckIntervalSecs = CACHE_CHECK_INTERVAL_SECS;
99+
try {
100+
tmpCacheCheckIntervalSecs = Long.parseLong(
101+
System.getProperty("org.spdx.storage.listedlicense.SpdxListedLicenseWebStore.cacheCheckIntervalSecs"));
102+
} catch(NumberFormatException nfe) {
103+
// Ignore parse failures - in this case we use the default value of 24 hours
104+
}
105+
cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs;
95106
}
96107

97108
/**
@@ -138,23 +149,14 @@ private InputStream getUrlInputStreamThroughCache(final URL url) throws IOExcept
138149
final HashMap<String,String> cachedMetadata = readMetadataFile(cachedMetadataFile);
139150

140151
if (cachedMetadata != null) {
141-
final String eTag = cachedMetadata.get("eTag");
142-
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
143-
connection.setReadTimeout(READ_TIMEOUT);
144-
connection.setRequestProperty("If-None-Match", eTag);
145-
final int status = connection.getResponseCode();
146-
if (status != HttpURLConnection.HTTP_NOT_MODIFIED) {
147-
cacheMiss(url, connection);
148-
} else {
149-
logger.debug("Cache hit for " + String.valueOf(url));
150-
}
152+
checkCache(cachedMetadataFile, cachedMetadata, url);
151153
} else {
152154
cacheMiss(url);
153155
}
154156
} catch (IOException ioe) {
155-
// We know we have a locally cached file here, so if we happen to get an error while making the ETag
156-
// request to check if it's up-to-date, we can safely ignore it and fall back on the (possibly stale)
157-
// cached content file. This makes the code more robust when the cache has previously been populated.
157+
// We know we have a locally cached file here, so if we happen to get an exception we can safely ignore
158+
// it and fall back on the (possibly stale) cached content file. This makes the code more robust in the
159+
// presence of network errors when the cache has previously been populated.
158160
}
159161
} else {
160162
cacheMiss(url);
@@ -196,7 +198,7 @@ private HashMap<String,String> readMetadataFile(final File metadataFile) {
196198
result = new Gson().fromJson(r, new TypeToken<HashMap<String, String>>(){}.getType());
197199
}
198200
catch (IOException ioe) {
199-
result = null; // Treat errors as a cache miss
201+
result = null; // Treat metadata read errors as a cache miss
200202
}
201203
return result;
202204
}
@@ -226,6 +228,34 @@ private void writeContentFile(final InputStream urlInputStream, final File cache
226228
}
227229
}
228230

231+
private void checkCache(final File cachedMetadataFile, final HashMap<String,String> cachedMetadata, final URL url) throws IOException {
232+
final Instant lastChecked = Instant.parse(cachedMetadata.get("lastChecked"));
233+
final long difference = Math.abs(ChronoUnit.SECONDS.between(Instant.now(), lastChecked));
234+
235+
if (difference > cacheCheckIntervalSecs) {
236+
// It's been a while since we checked the cached download of this URL for staleness, so make an ETag request
237+
logger.debug("Outside cache check interval; checking for updates to " + String.valueOf(url));
238+
final String eTag = cachedMetadata.get("eTag");
239+
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
240+
connection.setReadTimeout(READ_TIMEOUT);
241+
connection.setRequestProperty("If-None-Match", eTag);
242+
final int status = connection.getResponseCode();
243+
if (status != HttpURLConnection.HTTP_NOT_MODIFIED) {
244+
// The content of the URL has changed, which we handle the same as a cache miss (i.e. we re-download
245+
// the content, and write a new metadata file from scratch)
246+
cacheMiss(url, connection);
247+
} else {
248+
// The content hasn't changed, so just update the lastChecked metadata but otherwise do nothing
249+
logger.debug("Cache hit for " + String.valueOf(url));
250+
cachedMetadata.put("lastChecked", iso8601.format(Instant.now()));
251+
writeMetadataFile(cachedMetadataFile, cachedMetadata);
252+
}
253+
} else {
254+
// We checked recently, so don't need to do anything - the cached content will be used
255+
logger.debug("Within cache check interval; skipping check of updates to " + String.valueOf(url));
256+
}
257+
}
258+
229259
private void cacheMiss(URL url, HttpURLConnection connection) throws IOException {
230260
logger.debug("Cache miss for " + String.valueOf(url));
231261

@@ -243,6 +273,7 @@ private void cacheMiss(URL url, HttpURLConnection connection) throws IOException
243273
final HashMap<String, String> metadata = new HashMap<String, String>();
244274
metadata.put("eTag", connection.getHeaderField("ETag"));
245275
metadata.put("downloadedAt", iso8601.format(Instant.now()));
276+
metadata.put("lastChecked", iso8601.format(Instant.now()));
246277
metadata.put("sourceUrl", url.toString());
247278
writeMetadataFile(cachedMetadataFile, metadata);
248279
} else {

0 commit comments

Comments
 (0)