Skip to content

Commit ee1f1a9

Browse files
committed
🎨 Refactor download cache out into separate class, as it may be useful beyond the WebStore
1 parent e30f100 commit ee1f1a9

2 files changed

Lines changed: 472 additions & 288 deletions

File tree

src/main/java/org/spdx/storage/listedlicense/SpdxListedLicenseWebStore.java

Lines changed: 2 additions & 288 deletions
Original file line numberDiff line numberDiff line change
@@ -17,43 +17,15 @@
1717
*/
1818
package org.spdx.storage.listedlicense;
1919

20-
import java.io.BufferedInputStream;
21-
import java.io.BufferedOutputStream;
22-
import java.io.BufferedReader;
23-
import java.io.BufferedWriter;
24-
import java.io.File;
25-
import java.io.FileInputStream;
26-
import java.io.FileOutputStream;
27-
import java.io.FileReader;
28-
import java.io.FileWriter;
2920
import java.io.IOException;
3021
import java.io.InputStream;
31-
import java.io.OutputStream;
32-
import java.io.Reader;
33-
import java.io.Writer;
34-
import java.net.HttpURLConnection;
3522
import java.net.URL;
36-
import java.nio.charset.StandardCharsets;
37-
import java.nio.file.Files;
38-
import java.time.Instant;
39-
import java.time.ZoneOffset;
40-
import java.time.format.DateTimeFormatter;
41-
import java.time.format.DateTimeParseException;
42-
import java.time.temporal.ChronoUnit;
43-
import java.util.Arrays;
44-
import java.util.Base64;
45-
import java.util.Collections;
46-
import java.util.HashMap;
47-
import java.util.List;
48-
import java.util.Objects;
4923

50-
import com.google.gson.Gson;
51-
import com.google.gson.reflect.TypeToken;
5224
import org.slf4j.Logger;
5325
import org.slf4j.LoggerFactory;
54-
import org.spdx.Configuration;
5526
import org.spdx.library.InvalidSPDXAnalysisException;
5627
import org.spdx.library.SpdxConstants;
28+
import org.spdx.utility.DownloadCache;
5729

5830
/**
5931
* @author gary Original code
@@ -64,274 +36,16 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
6436

6537
private static final Logger logger = LoggerFactory.getLogger(SpdxListedLicenseModelStore.class);
6638

67-
private static final int READ_TIMEOUT = 5000;
68-
private static final int IO_BUFFER_SIZE = 8192;
69-
private static final long DEFAULT_CACHE_CHECK_INTERVAL_SECS = 86400; // 24 hours, in seconds
70-
71-
static final List<String> WHITE_LIST = Collections.unmodifiableList(Arrays.asList(
72-
"spdx.org", "spdx.dev", "spdx.com", "spdx.info")); // Allowed host names for the SPDX listed licenses
73-
74-
// See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
75-
private final String cacheDir = ((System.getenv("XDG_CACHE_HOME") == null ||
76-
System.getenv("XDG_CACHE_HOME").trim() == "") ?
77-
System.getProperty("user.home") + File.separator + ".cache" :
78-
System.getenv("XDG_CACHE_HOME")) +
79-
File.separator + "Spdx-Java-Library";
80-
81-
private final String CONFIG_PROPERTY_CACHE_ENABLED = "org.spdx.storage.listedlicense.enableCache";
82-
private final String CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS = "org.spdx.storage.listedlicense.cacheCheckIntervalSecs";
83-
private final boolean cacheEnabled;
84-
private final long cacheCheckIntervalSecs;
85-
86-
private final DateTimeFormatter iso8601 = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.000'Z'").withZone(ZoneOffset.UTC);
8739

8840
/**
8941
* @throws InvalidSPDXAnalysisException
9042
*/
9143
public SpdxListedLicenseWebStore() throws InvalidSPDXAnalysisException {
9244
super();
93-
94-
// Initialise cache
95-
boolean tmpCacheEnabled = Boolean.parseBoolean(Configuration.getInstance().getProperty(CONFIG_PROPERTY_CACHE_ENABLED, "false"));
96-
if (tmpCacheEnabled) {
97-
try {
98-
final File cacheDirectory = new File(cacheDir);
99-
Files.createDirectories(cacheDirectory.toPath());
100-
} catch (IOException ioe) {
101-
logger.warn("Unable to create cache directory '" + cacheDir + "'; continuing with cache disabled.", ioe);
102-
tmpCacheEnabled = false;
103-
}
104-
}
105-
cacheEnabled = tmpCacheEnabled;
106-
long tmpCacheCheckIntervalSecs = DEFAULT_CACHE_CHECK_INTERVAL_SECS;
107-
try {
108-
tmpCacheCheckIntervalSecs = Long.parseLong(Configuration.getInstance().getProperty(CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS));
109-
} catch(NumberFormatException nfe) {
110-
// Ignore parse failures - in this case we use the default value of 24 hours
111-
}
112-
cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs;
113-
}
114-
115-
/**
116-
* @param s The String to BASE64 encode.
117-
* @return The BASE64 encoding of s (as UTF-8).
118-
*/
119-
private String base64Encode(final String s) {
120-
String result = null;
121-
if (s != null) {
122-
result = Base64.getEncoder().encodeToString(s.getBytes(StandardCharsets.UTF_8));
123-
}
124-
return result;
125-
}
126-
127-
/**
128-
* @param u The URL to BASE64 encode.
129-
* @return The BASE64 encoding of u (as a UTF-8 encoded String).
130-
*/
131-
private String base64Encode(final URL u) {
132-
String result = null;
133-
if (u != null) {
134-
result = base64Encode(u.toString());
135-
}
136-
return result;
13745
}
13846

13947
private InputStream getUrlInputStream(final URL url) throws IOException {
140-
InputStream result = null;
141-
if (cacheEnabled) {
142-
result = getUrlInputStreamThroughCache(url);
143-
} else {
144-
result = getUrlInputStreamDirect(url);
145-
}
146-
return result;
147-
}
148-
149-
private InputStream getUrlInputStreamThroughCache(final URL url) throws IOException {
150-
final String cacheKey = base64Encode(url);
151-
final File cachedFile = new File(cacheDir, cacheKey);
152-
final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json");
153-
154-
if (cachedFile.exists() && cachedMetadataFile.exists()) {
155-
try {
156-
checkCache(url);
157-
} catch (IOException ioe) {
158-
// We know we have a locally cached file here, so if we happen to get an exception we can safely ignore
159-
// it and fall back on the (possibly stale) cached content file. This makes the code more robust in the
160-
// presence of network errors when the cache has previously been populated.
161-
}
162-
} else {
163-
cacheMiss(url);
164-
}
165-
166-
// At this point the cached file definitely exists
167-
return new BufferedInputStream(new FileInputStream(cachedFile));
168-
}
169-
170-
private URL processPossibleRedirect(final HttpURLConnection connection) throws IOException {
171-
URL result = null;
172-
final int status = connection.getResponseCode();
173-
if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM
174-
|| status == HttpURLConnection.HTTP_SEE_OTHER) {
175-
// redirect
176-
final String redirectUrlStr = connection.getHeaderField("Location");
177-
if (Objects.isNull(redirectUrlStr) || redirectUrlStr.isEmpty()) {
178-
throw new IOException("Empty redirect URL response");
179-
}
180-
try {
181-
result = new URL(redirectUrlStr);
182-
} catch(Exception ex) {
183-
throw new IOException("Invalid redirect URL", ex);
184-
}
185-
if (!result.getProtocol().toLowerCase().startsWith("http")) {
186-
throw new IOException("Invalid redirect protocol");
187-
}
188-
if (!WHITE_LIST.contains(result.getHost())) {
189-
throw new IOException("Invalid redirect host - not on the allowed 'white list'");
190-
}
191-
}
192-
return result;
193-
}
194-
195-
private HashMap<String,String> readMetadataFile(final File metadataFile) {
196-
HashMap<String,String> result = null;
197-
try {
198-
final Reader r = new BufferedReader(new FileReader(metadataFile));
199-
result = new Gson().fromJson(r, new TypeToken<HashMap<String, String>>(){}.getType());
200-
}
201-
catch (IOException ioe) {
202-
result = null; // Treat metadata read errors as a cache miss
203-
}
204-
return result;
205-
}
206-
207-
private void writeMetadataFile(final File metadataFile, HashMap<String,String> metadata) throws IOException {
208-
final Writer w = new BufferedWriter(new FileWriter(metadataFile));
209-
try {
210-
new Gson().toJson(metadata, new TypeToken<HashMap<String, String>>(){}.getType(), w);
211-
} finally {
212-
w.flush();
213-
w.close();
214-
}
215-
}
216-
217-
private void writeContentFile(final InputStream urlInputStream, final File cachedFile) throws IOException {
218-
final OutputStream cacheFileOutputStream = new BufferedOutputStream(new FileOutputStream(cachedFile));
219-
try {
220-
byte[] ioBuffer = new byte[IO_BUFFER_SIZE];
221-
int length;
222-
while ((length = urlInputStream.read(ioBuffer)) != -1) {
223-
cacheFileOutputStream.write(ioBuffer, 0, length);
224-
}
225-
} finally {
226-
urlInputStream.close();
227-
cacheFileOutputStream.flush();
228-
cacheFileOutputStream.close();
229-
}
230-
}
231-
232-
/**
233-
* Attempts to parse s as if it were an ISO8601 formatted String.
234-
* @param s The string to attempt to parse.
235-
* @return The Instant for that ISO8601 value if parsing succeeded, or null if it didn't.
236-
*/
237-
private final Instant parseISO8601String(final String s) {
238-
Instant result = null;
239-
if (s != null) {
240-
try {
241-
result = Instant.parse(s);
242-
} catch (final DateTimeParseException dtpe) {
243-
result = null;
244-
}
245-
}
246-
return result;
247-
}
248-
249-
private void checkCache(final URL url) throws IOException {
250-
final String cacheKey = base64Encode(url);
251-
final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json");
252-
final HashMap<String,String> cachedMetadata = readMetadataFile(cachedMetadataFile);
253-
254-
if (cachedMetadata != null) {
255-
final Instant lastChecked = parseISO8601String(cachedMetadata.get("lastChecked"));
256-
final long difference = lastChecked != null ? Math.abs(ChronoUnit.SECONDS.between(Instant.now(), lastChecked)) : Long.MAX_VALUE;
257-
258-
if (difference > cacheCheckIntervalSecs) {
259-
// It's been a while since we checked the cached download of this URL for staleness, so make an ETag request
260-
logger.debug("Cache check interval exceeded; checking for updates to " + String.valueOf(url));
261-
final String eTag = cachedMetadata.get("eTag");
262-
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
263-
connection.setReadTimeout(READ_TIMEOUT);
264-
connection.setRequestProperty("If-None-Match", eTag);
265-
final int status = connection.getResponseCode();
266-
if (status != HttpURLConnection.HTTP_NOT_MODIFIED) {
267-
// The content of the URL has changed, which we handle the same as a cache miss (i.e. we re-download
268-
// the content, and write a new metadata file from scratch)
269-
cacheMiss(url, connection);
270-
} else {
271-
// The content hasn't changed, so just update the lastChecked metadata but otherwise do nothing
272-
logger.debug("Cache hit for " + String.valueOf(url));
273-
cachedMetadata.put("lastChecked", iso8601.format(Instant.now()));
274-
writeMetadataFile(cachedMetadataFile, cachedMetadata);
275-
}
276-
} else {
277-
// We checked recently, so don't need to do anything - the cached content will be used
278-
logger.debug("Within cache check interval; skipping check of updates to " + String.valueOf(url));
279-
}
280-
} else {
281-
// Metadata doesn't exist - treat it as a cache miss
282-
cacheMiss(url);
283-
}
284-
}
285-
286-
private void cacheMiss(URL url, HttpURLConnection connection) throws IOException {
287-
logger.debug("Cache miss for " + String.valueOf(url));
288-
289-
final URL redirectUrl = processPossibleRedirect(connection);
290-
if (redirectUrl != null) {
291-
url = redirectUrl;
292-
connection = (HttpURLConnection)redirectUrl.openConnection();
293-
}
294-
final int status = connection.getResponseCode();
295-
if (status == HttpURLConnection.HTTP_OK) {
296-
final String cacheKey = base64Encode(url);
297-
final File cachedFile = new File(cacheDir, cacheKey);
298-
writeContentFile(connection.getInputStream(), cachedFile);
299-
final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json");
300-
final HashMap<String, String> metadata = new HashMap<String, String>();
301-
metadata.put("eTag", connection.getHeaderField("ETag"));
302-
metadata.put("downloadedAt", iso8601.format(Instant.now()));
303-
metadata.put("lastChecked", iso8601.format(Instant.now()));
304-
metadata.put("sourceUrl", url.toString());
305-
writeMetadataFile(cachedMetadataFile, metadata);
306-
} else {
307-
throw new IOException("Unexpected HTTP status code from " + url.toString() + ": " + status);
308-
}
309-
}
310-
311-
private void cacheMiss(final URL url) throws IOException {
312-
final HttpURLConnection connection = (HttpURLConnection)url.openConnection();
313-
connection.setReadTimeout(READ_TIMEOUT);
314-
cacheMiss(url, connection);
315-
}
316-
317-
private InputStream getUrlInputStreamDirect(URL url) throws IOException {
318-
InputStream result = null;
319-
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
320-
connection.setReadTimeout(READ_TIMEOUT);
321-
final URL redirectUrl = processPossibleRedirect(connection);
322-
323-
if (redirectUrl != null) {
324-
url = redirectUrl;
325-
connection = (HttpURLConnection)redirectUrl.openConnection();
326-
connection.setReadTimeout(READ_TIMEOUT);
327-
}
328-
final int status = connection.getResponseCode();
329-
if (status == HttpURLConnection.HTTP_OK) {
330-
result = connection.getInputStream();
331-
} else {
332-
throw new IOException("Unexpected HTTP status code from " + url.toString() + ": " + status);
333-
}
334-
return result;
48+
return DownloadCache.getInstance().getUrlInputStream(url);
33549
}
33650

33751
@Override

0 commit comments

Comments
 (0)