3838import java .time .Instant ;
3939import java .time .ZoneOffset ;
4040import java .time .format .DateTimeFormatter ;
41+ import java .time .temporal .ChronoUnit ;
4142import java .util .Arrays ;
4243import java .util .Base64 ;
4344import java .util .Collections ;
@@ -63,7 +64,8 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
6364
6465 private static final int READ_TIMEOUT = 5000 ;
6566 private static final int IO_BUFFER_SIZE = 8192 ;
66-
67+ private static final long CACHE_CHECK_INTERVAL_SECS = 86400 ; // 24 hours, in seconds
68+
6769 static final List <String > WHITE_LIST = Collections .unmodifiableList (Arrays .asList (
6870 "spdx.org" , "spdx.dev" , "spdx.com" , "spdx.info" )); // Allowed host names for the SPDX listed licenses
6971
@@ -76,6 +78,7 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
7678
7779 private final boolean cacheEnabled = Boolean .parseBoolean (
7880 System .getProperty ("org.spdx.storage.listedlicense.SpdxListedLicenseWebStore.enableCache" ));
81+ private final long cacheCheckIntervalSecs ;
7982
8083 final DateTimeFormatter iso8601 = DateTimeFormatter .ofPattern ("yyyy-MM-dd'T'HH:mm:ss.000'Z'" ).withZone (ZoneOffset .UTC );
8184
@@ -92,6 +95,14 @@ public SpdxListedLicenseWebStore() throws InvalidSPDXAnalysisException {
9295 throw new InvalidSPDXAnalysisException ("Unable to create cache directory: " + cacheDir , ioe );
9396 }
9497 }
98+ long tmpCacheCheckIntervalSecs = CACHE_CHECK_INTERVAL_SECS ;
99+ try {
100+ tmpCacheCheckIntervalSecs = Long .parseLong (
101+ System .getProperty ("org.spdx.storage.listedlicense.SpdxListedLicenseWebStore.cacheCheckIntervalSecs" ));
102+ } catch (NumberFormatException nfe ) {
103+ // Ignore parse failures - in this case we use the default value of 24 hours
104+ }
105+ cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs ;
95106 }
96107
97108 /**
@@ -138,23 +149,14 @@ private InputStream getUrlInputStreamThroughCache(final URL url) throws IOExcept
138149 final HashMap <String ,String > cachedMetadata = readMetadataFile (cachedMetadataFile );
139150
140151 if (cachedMetadata != null ) {
141- final String eTag = cachedMetadata .get ("eTag" );
142- final HttpURLConnection connection = (HttpURLConnection ) url .openConnection ();
143- connection .setReadTimeout (READ_TIMEOUT );
144- connection .setRequestProperty ("If-None-Match" , eTag );
145- final int status = connection .getResponseCode ();
146- if (status != HttpURLConnection .HTTP_NOT_MODIFIED ) {
147- cacheMiss (url , connection );
148- } else {
149- logger .debug ("Cache hit for " + String .valueOf (url ));
150- }
152+ checkCache (cachedMetadataFile , cachedMetadata , url );
151153 } else {
152154 cacheMiss (url );
153155 }
154156 } catch (IOException ioe ) {
155- // We know we have a locally cached file here, so if we happen to get an error while making the ETag
156- // request to check if it's up-to-date, we can safely ignore it and fall back on the (possibly stale)
157- // cached content file. This makes the code more robust when the cache has previously been populated.
157+ // We know we have a locally cached file here, so if we happen to get an exception we can safely ignore
158+ // it and fall back on the (possibly stale) cached content file. This makes the code more robust in the
159+ // presence of network errors when the cache has previously been populated.
158160 }
159161 } else {
160162 cacheMiss (url );
@@ -196,7 +198,7 @@ private HashMap<String,String> readMetadataFile(final File metadataFile) {
196198 result = new Gson ().fromJson (r , new TypeToken <HashMap <String , String >>(){}.getType ());
197199 }
198200 catch (IOException ioe ) {
199- result = null ; // Treat errors as a cache miss
201+ result = null ; // Treat metadata read errors as a cache miss
200202 }
201203 return result ;
202204 }
@@ -226,6 +228,34 @@ private void writeContentFile(final InputStream urlInputStream, final File cache
226228 }
227229 }
228230
231+ private void checkCache (final File cachedMetadataFile , final HashMap <String ,String > cachedMetadata , final URL url ) throws IOException {
232+ final Instant lastChecked = Instant .parse (cachedMetadata .get ("lastChecked" ));
233+ final long difference = Math .abs (ChronoUnit .SECONDS .between (Instant .now (), lastChecked ));
234+
235+ if (difference > cacheCheckIntervalSecs ) {
236+ // It's been a while since we checked the cached download of this URL for staleness, so make an ETag request
237+ logger .debug ("Outside cache check interval; checking for updates to " + String .valueOf (url ));
238+ final String eTag = cachedMetadata .get ("eTag" );
239+ final HttpURLConnection connection = (HttpURLConnection ) url .openConnection ();
240+ connection .setReadTimeout (READ_TIMEOUT );
241+ connection .setRequestProperty ("If-None-Match" , eTag );
242+ final int status = connection .getResponseCode ();
243+ if (status != HttpURLConnection .HTTP_NOT_MODIFIED ) {
244+ // The content of the URL has changed, which we handle the same as a cache miss (i.e. we re-download
245+ // the content, and write a new metadata file from scratch)
246+ cacheMiss (url , connection );
247+ } else {
248+ // The content hasn't changed, so just update the lastChecked metadata but otherwise do nothing
249+ logger .debug ("Cache hit for " + String .valueOf (url ));
250+ cachedMetadata .put ("lastChecked" , iso8601 .format (Instant .now ()));
251+ writeMetadataFile (cachedMetadataFile , cachedMetadata );
252+ }
253+ } else {
254+ // We checked recently, so don't need to do anything - the cached content will be used
255+ logger .debug ("Within cache check interval; skipping check of updates to " + String .valueOf (url ));
256+ }
257+ }
258+
229259 private void cacheMiss (URL url , HttpURLConnection connection ) throws IOException {
230260 logger .debug ("Cache miss for " + String .valueOf (url ));
231261
@@ -243,6 +273,7 @@ private void cacheMiss(URL url, HttpURLConnection connection) throws IOException
243273 final HashMap <String , String > metadata = new HashMap <String , String >();
244274 metadata .put ("eTag" , connection .getHeaderField ("ETag" ));
245275 metadata .put ("downloadedAt" , iso8601 .format (Instant .now ()));
276+ metadata .put ("lastChecked" , iso8601 .format (Instant .now ()));
246277 metadata .put ("sourceUrl" , url .toString ());
247278 writeMetadataFile (cachedMetadataFile , metadata );
248279 } else {
0 commit comments