1717 */
1818package org .spdx .storage .listedlicense ;
1919
20- import java .io .BufferedInputStream ;
21- import java .io .BufferedOutputStream ;
22- import java .io .BufferedReader ;
23- import java .io .BufferedWriter ;
24- import java .io .File ;
25- import java .io .FileInputStream ;
26- import java .io .FileOutputStream ;
27- import java .io .FileReader ;
28- import java .io .FileWriter ;
2920import java .io .IOException ;
3021import java .io .InputStream ;
31- import java .io .OutputStream ;
32- import java .io .Reader ;
33- import java .io .Writer ;
34- import java .net .HttpURLConnection ;
3522import java .net .URL ;
36- import java .nio .charset .StandardCharsets ;
37- import java .nio .file .Files ;
38- import java .time .Instant ;
39- import java .time .ZoneOffset ;
40- import java .time .format .DateTimeFormatter ;
41- import java .time .format .DateTimeParseException ;
42- import java .time .temporal .ChronoUnit ;
43- import java .util .Arrays ;
44- import java .util .Base64 ;
45- import java .util .Collections ;
46- import java .util .HashMap ;
47- import java .util .List ;
48- import java .util .Objects ;
4923
50- import com .google .gson .Gson ;
51- import com .google .gson .reflect .TypeToken ;
5224import org .slf4j .Logger ;
5325import org .slf4j .LoggerFactory ;
54- import org .spdx .Configuration ;
5526import org .spdx .library .InvalidSPDXAnalysisException ;
5627import org .spdx .library .SpdxConstants ;
28+ import org .spdx .utility .DownloadCache ;
5729
5830/**
5931 * @author gary Original code
@@ -64,274 +36,16 @@ public class SpdxListedLicenseWebStore extends SpdxListedLicenseModelStore {
6436
6537 private static final Logger logger = LoggerFactory .getLogger (SpdxListedLicenseModelStore .class );
6638
67- private static final int READ_TIMEOUT = 5000 ;
68- private static final int IO_BUFFER_SIZE = 8192 ;
69- private static final long DEFAULT_CACHE_CHECK_INTERVAL_SECS = 86400 ; // 24 hours, in seconds
70-
71- static final List <String > WHITE_LIST = Collections .unmodifiableList (Arrays .asList (
72- "spdx.org" , "spdx.dev" , "spdx.com" , "spdx.info" )); // Allowed host names for the SPDX listed licenses
73-
74- // See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
75- private final String cacheDir = ((System .getenv ("XDG_CACHE_HOME" ) == null ||
76- System .getenv ("XDG_CACHE_HOME" ).trim () == "" ) ?
77- System .getProperty ("user.home" ) + File .separator + ".cache" :
78- System .getenv ("XDG_CACHE_HOME" )) +
79- File .separator + "Spdx-Java-Library" ;
80-
81- private final String CONFIG_PROPERTY_CACHE_ENABLED = "org.spdx.storage.listedlicense.enableCache" ;
82- private final String CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS = "org.spdx.storage.listedlicense.cacheCheckIntervalSecs" ;
83- private final boolean cacheEnabled ;
84- private final long cacheCheckIntervalSecs ;
85-
86- private final DateTimeFormatter iso8601 = DateTimeFormatter .ofPattern ("yyyy-MM-dd'T'HH:mm:ss.000'Z'" ).withZone (ZoneOffset .UTC );
8739
8840 /**
8941 * @throws InvalidSPDXAnalysisException
9042 */
9143 public SpdxListedLicenseWebStore () throws InvalidSPDXAnalysisException {
9244 super ();
93-
94- // Initialise cache
95- boolean tmpCacheEnabled = Boolean .parseBoolean (Configuration .getInstance ().getProperty (CONFIG_PROPERTY_CACHE_ENABLED , "false" ));
96- if (tmpCacheEnabled ) {
97- try {
98- final File cacheDirectory = new File (cacheDir );
99- Files .createDirectories (cacheDirectory .toPath ());
100- } catch (IOException ioe ) {
101- logger .warn ("Unable to create cache directory '" + cacheDir + "'; continuing with cache disabled." , ioe );
102- tmpCacheEnabled = false ;
103- }
104- }
105- cacheEnabled = tmpCacheEnabled ;
106- long tmpCacheCheckIntervalSecs = DEFAULT_CACHE_CHECK_INTERVAL_SECS ;
107- try {
108- tmpCacheCheckIntervalSecs = Long .parseLong (Configuration .getInstance ().getProperty (CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS ));
109- } catch (NumberFormatException nfe ) {
110- // Ignore parse failures - in this case we use the default value of 24 hours
111- }
112- cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs ;
113- }
114-
115- /**
116- * @param s The String to BASE64 encode.
117- * @return The BASE64 encoding of s (as UTF-8).
118- */
119- private String base64Encode (final String s ) {
120- String result = null ;
121- if (s != null ) {
122- result = Base64 .getEncoder ().encodeToString (s .getBytes (StandardCharsets .UTF_8 ));
123- }
124- return result ;
125- }
126-
127- /**
128- * @param u The URL to BASE64 encode.
129- * @return The BASE64 encoding of u (as a UTF-8 encoded String).
130- */
131- private String base64Encode (final URL u ) {
132- String result = null ;
133- if (u != null ) {
134- result = base64Encode (u .toString ());
135- }
136- return result ;
13745 }
13846
13947 private InputStream getUrlInputStream (final URL url ) throws IOException {
140- InputStream result = null ;
141- if (cacheEnabled ) {
142- result = getUrlInputStreamThroughCache (url );
143- } else {
144- result = getUrlInputStreamDirect (url );
145- }
146- return result ;
147- }
148-
149- private InputStream getUrlInputStreamThroughCache (final URL url ) throws IOException {
150- final String cacheKey = base64Encode (url );
151- final File cachedFile = new File (cacheDir , cacheKey );
152- final File cachedMetadataFile = new File (cacheDir , cacheKey + ".metadata.json" );
153-
154- if (cachedFile .exists () && cachedMetadataFile .exists ()) {
155- try {
156- checkCache (url );
157- } catch (IOException ioe ) {
158- // We know we have a locally cached file here, so if we happen to get an exception we can safely ignore
159- // it and fall back on the (possibly stale) cached content file. This makes the code more robust in the
160- // presence of network errors when the cache has previously been populated.
161- }
162- } else {
163- cacheMiss (url );
164- }
165-
166- // At this point the cached file definitely exists
167- return new BufferedInputStream (new FileInputStream (cachedFile ));
168- }
169-
170- private URL processPossibleRedirect (final HttpURLConnection connection ) throws IOException {
171- URL result = null ;
172- final int status = connection .getResponseCode ();
173- if (status == HttpURLConnection .HTTP_MOVED_TEMP || status == HttpURLConnection .HTTP_MOVED_PERM
174- || status == HttpURLConnection .HTTP_SEE_OTHER ) {
175- // redirect
176- final String redirectUrlStr = connection .getHeaderField ("Location" );
177- if (Objects .isNull (redirectUrlStr ) || redirectUrlStr .isEmpty ()) {
178- throw new IOException ("Empty redirect URL response" );
179- }
180- try {
181- result = new URL (redirectUrlStr );
182- } catch (Exception ex ) {
183- throw new IOException ("Invalid redirect URL" , ex );
184- }
185- if (!result .getProtocol ().toLowerCase ().startsWith ("http" )) {
186- throw new IOException ("Invalid redirect protocol" );
187- }
188- if (!WHITE_LIST .contains (result .getHost ())) {
189- throw new IOException ("Invalid redirect host - not on the allowed 'white list'" );
190- }
191- }
192- return result ;
193- }
194-
195- private HashMap <String ,String > readMetadataFile (final File metadataFile ) {
196- HashMap <String ,String > result = null ;
197- try {
198- final Reader r = new BufferedReader (new FileReader (metadataFile ));
199- result = new Gson ().fromJson (r , new TypeToken <HashMap <String , String >>(){}.getType ());
200- }
201- catch (IOException ioe ) {
202- result = null ; // Treat metadata read errors as a cache miss
203- }
204- return result ;
205- }
206-
207- private void writeMetadataFile (final File metadataFile , HashMap <String ,String > metadata ) throws IOException {
208- final Writer w = new BufferedWriter (new FileWriter (metadataFile ));
209- try {
210- new Gson ().toJson (metadata , new TypeToken <HashMap <String , String >>(){}.getType (), w );
211- } finally {
212- w .flush ();
213- w .close ();
214- }
215- }
216-
217- private void writeContentFile (final InputStream urlInputStream , final File cachedFile ) throws IOException {
218- final OutputStream cacheFileOutputStream = new BufferedOutputStream (new FileOutputStream (cachedFile ));
219- try {
220- byte [] ioBuffer = new byte [IO_BUFFER_SIZE ];
221- int length ;
222- while ((length = urlInputStream .read (ioBuffer )) != -1 ) {
223- cacheFileOutputStream .write (ioBuffer , 0 , length );
224- }
225- } finally {
226- urlInputStream .close ();
227- cacheFileOutputStream .flush ();
228- cacheFileOutputStream .close ();
229- }
230- }
231-
232- /**
233- * Attempts to parse s as if it were an ISO8601 formatted String.
234- * @param s The string to attempt to parse.
235- * @return The Instant for that ISO8601 value if parsing succeeded, or null if it didn't.
236- */
237- private final Instant parseISO8601String (final String s ) {
238- Instant result = null ;
239- if (s != null ) {
240- try {
241- result = Instant .parse (s );
242- } catch (final DateTimeParseException dtpe ) {
243- result = null ;
244- }
245- }
246- return result ;
247- }
248-
249- private void checkCache (final URL url ) throws IOException {
250- final String cacheKey = base64Encode (url );
251- final File cachedMetadataFile = new File (cacheDir , cacheKey + ".metadata.json" );
252- final HashMap <String ,String > cachedMetadata = readMetadataFile (cachedMetadataFile );
253-
254- if (cachedMetadata != null ) {
255- final Instant lastChecked = parseISO8601String (cachedMetadata .get ("lastChecked" ));
256- final long difference = lastChecked != null ? Math .abs (ChronoUnit .SECONDS .between (Instant .now (), lastChecked )) : Long .MAX_VALUE ;
257-
258- if (difference > cacheCheckIntervalSecs ) {
259- // It's been a while since we checked the cached download of this URL for staleness, so make an ETag request
260- logger .debug ("Cache check interval exceeded; checking for updates to " + String .valueOf (url ));
261- final String eTag = cachedMetadata .get ("eTag" );
262- final HttpURLConnection connection = (HttpURLConnection ) url .openConnection ();
263- connection .setReadTimeout (READ_TIMEOUT );
264- connection .setRequestProperty ("If-None-Match" , eTag );
265- final int status = connection .getResponseCode ();
266- if (status != HttpURLConnection .HTTP_NOT_MODIFIED ) {
267- // The content of the URL has changed, which we handle the same as a cache miss (i.e. we re-download
268- // the content, and write a new metadata file from scratch)
269- cacheMiss (url , connection );
270- } else {
271- // The content hasn't changed, so just update the lastChecked metadata but otherwise do nothing
272- logger .debug ("Cache hit for " + String .valueOf (url ));
273- cachedMetadata .put ("lastChecked" , iso8601 .format (Instant .now ()));
274- writeMetadataFile (cachedMetadataFile , cachedMetadata );
275- }
276- } else {
277- // We checked recently, so don't need to do anything - the cached content will be used
278- logger .debug ("Within cache check interval; skipping check of updates to " + String .valueOf (url ));
279- }
280- } else {
281- // Metadata doesn't exist - treat it as a cache miss
282- cacheMiss (url );
283- }
284- }
285-
286- private void cacheMiss (URL url , HttpURLConnection connection ) throws IOException {
287- logger .debug ("Cache miss for " + String .valueOf (url ));
288-
289- final URL redirectUrl = processPossibleRedirect (connection );
290- if (redirectUrl != null ) {
291- url = redirectUrl ;
292- connection = (HttpURLConnection )redirectUrl .openConnection ();
293- }
294- final int status = connection .getResponseCode ();
295- if (status == HttpURLConnection .HTTP_OK ) {
296- final String cacheKey = base64Encode (url );
297- final File cachedFile = new File (cacheDir , cacheKey );
298- writeContentFile (connection .getInputStream (), cachedFile );
299- final File cachedMetadataFile = new File (cacheDir , cacheKey + ".metadata.json" );
300- final HashMap <String , String > metadata = new HashMap <String , String >();
301- metadata .put ("eTag" , connection .getHeaderField ("ETag" ));
302- metadata .put ("downloadedAt" , iso8601 .format (Instant .now ()));
303- metadata .put ("lastChecked" , iso8601 .format (Instant .now ()));
304- metadata .put ("sourceUrl" , url .toString ());
305- writeMetadataFile (cachedMetadataFile , metadata );
306- } else {
307- throw new IOException ("Unexpected HTTP status code from " + url .toString () + ": " + status );
308- }
309- }
310-
311- private void cacheMiss (final URL url ) throws IOException {
312- final HttpURLConnection connection = (HttpURLConnection )url .openConnection ();
313- connection .setReadTimeout (READ_TIMEOUT );
314- cacheMiss (url , connection );
315- }
316-
317- private InputStream getUrlInputStreamDirect (URL url ) throws IOException {
318- InputStream result = null ;
319- HttpURLConnection connection = (HttpURLConnection )url .openConnection ();
320- connection .setReadTimeout (READ_TIMEOUT );
321- final URL redirectUrl = processPossibleRedirect (connection );
322-
323- if (redirectUrl != null ) {
324- url = redirectUrl ;
325- connection = (HttpURLConnection )redirectUrl .openConnection ();
326- connection .setReadTimeout (READ_TIMEOUT );
327- }
328- final int status = connection .getResponseCode ();
329- if (status == HttpURLConnection .HTTP_OK ) {
330- result = connection .getInputStream ();
331- } else {
332- throw new IOException ("Unexpected HTTP status code from " + url .toString () + ": " + status );
333- }
334- return result ;
48+ return DownloadCache .getInstance ().getUrlInputStream (url );
33549 }
33650
33751 @ Override
0 commit comments