Skip to content

Commit 90f854b

Browse files
committed
FINERACT-1794: Address improvement in file upload APIs
1 parent 27489a6 commit 90f854b

13 files changed

Lines changed: 389 additions & 13 deletions

File tree

buildSrc/src/main/groovy/org.apache.fineract.dependencies.gradle

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,38 @@ dependencyManagement {
6666
dependency 'com.github.spullara.mustache.java:compiler:0.9.10'
6767
dependency 'com.jayway.jsonpath:json-path:2.7.0'
6868
dependency 'org.apache.tika:tika-core:2.4.1'
69+
dependency ('org.apache.tika:tika-parser-microsoft-module:2.6.0') {
70+
exclude 'org.bouncycastle:bcprov-jdk15on'
71+
exclude 'org.bouncycastle:bcmail-jdk15on'
72+
exclude 'commons-logging:commons-logging'
73+
exclude 'org.apache.logging.log4j:log4j-api'
74+
exclude 'org.slf4j:slf4j-api'
75+
exclude 'commons-io:commons-io'
76+
exclude 'commons-codec:commons-codec'
77+
exclude 'org.apache.commons:commons-compress'
78+
exclude 'org.apache.commons:commons-lang3'
79+
exclude 'org.apache.poi:poi'
80+
exclude 'org.apache.poi:poi-scratchpad'
81+
exclude 'org.glassfish.jaxb:jaxb-runtime'
82+
exclude 'org.apache.commons:commons-compress'
83+
exclude 'xml-apis:xml-apis'
84+
}
85+
dependency ('org.apache.tika:tika-parser-miscoffice-module:2.6.0') {
86+
exclude 'org.bouncycastle:bcprov-jdk15on'
87+
exclude 'org.bouncycastle:bcmail-jdk15on'
88+
exclude 'commons-logging:commons-logging'
89+
exclude 'org.apache.logging.log4j:log4j-api'
90+
exclude 'org.slf4j:slf4j-api'
91+
exclude 'commons-io:commons-io'
92+
exclude 'commons-codec:commons-codec'
93+
exclude 'org.apache.commons:commons-compress'
94+
exclude 'org.apache.commons:commons-lang3'
95+
exclude 'org.apache.poi:poi'
96+
exclude 'org.apache.poi:poi-scratchpad'
97+
exclude 'org.glassfish.jaxb:jaxb-runtime'
98+
exclude 'org.apache.commons:commons-compress'
99+
exclude 'xml-apis:xml-apis'
100+
}
69101
dependency 'org.apache.httpcomponents:httpclient:4.5.13'
70102
dependency 'jakarta.management.j2ee:jakarta.management.j2ee-api:1.1.4'
71103
dependency 'jakarta.jms:jakarta.jms-api:2.0.3'

fineract-provider/dependencies.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ dependencies {
5353
'org.apache.poi:poi',
5454
'org.apache.poi:poi-ooxml',
5555
'org.apache.tika:tika-core',
56+
'org.apache.tika:tika-parser-microsoft-module',
57+
'org.apache.tika:tika-parser-miscoffice-module',
5658

5759
'org.liquibase:liquibase-core',
5860

fineract-provider/src/main/java/org/apache/fineract/infrastructure/bulkimport/service/BulkImportWorkbookServiceImpl.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.fineract.infrastructure.bulkimport.service;
2020

21+
import java.io.BufferedInputStream;
2122
import java.io.ByteArrayInputStream;
2223
import java.io.ByteArrayOutputStream;
2324
import java.io.File;
@@ -92,7 +93,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
9293
IOUtils.copy(inputStream, baos);
9394
final byte[] bytes = baos.toByteArray();
9495
InputStream clonedInputStream = new ByteArrayInputStream(bytes);
95-
InputStream clonedInputStreamWorkbook = new ByteArrayInputStream(bytes);
96+
final BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(bytes));
9697
final Tika tika = new Tika();
9798
final TikaInputStream tikaInputStream = TikaInputStream.get(clonedInputStream);
9899
final String fileType = tika.detect(tikaInputStream);
@@ -104,7 +105,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
104105
"Uploaded file extension is not recognized.");
105106

106107
}
107-
Workbook workbook = new HSSFWorkbook(clonedInputStreamWorkbook);
108+
Workbook workbook = new HSSFWorkbook(clonedInputStream);
108109
GlobalEntityType entityType = null;
109110
int primaryColumn = 0;
110111
if (entity.trim().equalsIgnoreCase(GlobalEntityType.CLIENTS_PERSON.toString())) {
@@ -169,7 +170,7 @@ public Long importWorkbook(String entity, InputStream inputStream, FormDataConte
169170
throw new GeneralPlatformDomainRuleException("error.msg.unable.to.find.resource", "Unable to find requested resource");
170171

171172
}
172-
return publishEvent(primaryColumn, fileDetail, clonedInputStreamWorkbook, entityType, workbook, locale, dateFormat);
173+
return publishEvent(primaryColumn, fileDetail, bis, entityType, workbook, locale, dateFormat);
173174
}
174175
throw new GeneralPlatformDomainRuleException("error.msg.null", "One or more of the given parameters not found");
175176
} catch (IOException e) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;
20+
21+
import java.io.BufferedInputStream;
22+
23+
public interface ContentPathSanitizer {
24+
25+
String sanitize(String path);
26+
27+
String sanitize(String path, BufferedInputStream is);
28+
}

fineract-provider/src/main/java/org/apache/fineract/infrastructure/documentmanagement/contentrepository/ContentRepositoryFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,20 @@ public class ContentRepositoryFactory {
3333
private final ApplicationContext applicationContext;
3434
private final ExternalServicesPropertiesReadPlatformService externalServicesReadPlatformService;
3535

36+
private final FileSystemContentPathSanitizer contentPathSanitizer;
37+
3638
public ContentRepository getRepository() {
3739
final ConfigurationDomainService configurationDomainServiceJpa = this.applicationContext.getBean("configurationDomainServiceJpa",
3840
ConfigurationDomainService.class);
3941
if (configurationDomainServiceJpa.isAmazonS3Enabled()) {
4042
return createS3DocumentStore();
4143
}
42-
return new FileSystemContentRepository();
44+
return new FileSystemContentRepository(contentPathSanitizer);
4345
}
4446

4547
public ContentRepository getRepository(final StorageType documentStoreType) {
4648
if (documentStoreType == StorageType.FILE_SYSTEM) {
47-
return new FileSystemContentRepository();
49+
return new FileSystemContentRepository(contentPathSanitizer);
4850
}
4951
return createS3DocumentStore();
5052
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;
20+
21+
import java.io.BufferedInputStream;
22+
import java.nio.file.Path;
23+
import java.util.List;
24+
import java.util.regex.Pattern;
25+
import javax.annotation.PostConstruct;
26+
import lombok.RequiredArgsConstructor;
27+
import lombok.extern.slf4j.Slf4j;
28+
import org.apache.commons.io.FilenameUtils;
29+
import org.apache.commons.lang3.StringUtils;
30+
import org.apache.fineract.infrastructure.core.service.ThreadLocalContextUtil;
31+
import org.apache.fineract.infrastructure.documentmanagement.exception.ContentManagementException;
32+
import org.apache.tika.Tika;
33+
import org.apache.tika.io.TikaInputStream;
34+
import org.apache.tika.metadata.Metadata;
35+
import org.apache.tika.parser.AutoDetectParser;
36+
import org.apache.tika.sax.BodyContentHandler;
37+
import org.springframework.beans.factory.annotation.Value;
38+
import org.springframework.stereotype.Component;
39+
40+
@Slf4j
41+
@RequiredArgsConstructor
42+
@Component
43+
public class FileSystemContentPathSanitizer implements ContentPathSanitizer {
44+
45+
private static Pattern OVERWRITE_SIBLING_IMAGE = Pattern.compile(".*\\.\\./+[0-9]+/+.*");
46+
47+
@Value("${fineract.content.regex-whitelist-enabled}")
48+
private boolean isRegexWhitelistEnabled;
49+
@Value("${fineract.content.regex-whitelist}")
50+
private List<String> regexWhitelist;
51+
@Value("${fineract.content.mime-whitelist-enabled}")
52+
private boolean isMimeWhitelistEnabled;
53+
@Value("${fineract.content.mime-whitelist}")
54+
private List<String> mimeWhitelist;
55+
private List<Pattern> regexWhitelistPatterns;
56+
57+
@PostConstruct
58+
public void init() {
59+
regexWhitelistPatterns = regexWhitelist.stream().map(Pattern::compile).toList();
60+
}
61+
62+
@Override
63+
public String sanitize(String path) {
64+
return sanitize(path, null);
65+
}
66+
67+
@Override
68+
public String sanitize(String path, BufferedInputStream is) {
69+
try {
70+
if (OVERWRITE_SIBLING_IMAGE.matcher(path).matches()) {
71+
throw new RuntimeException(String.format("Trying to overwrite another resource's image: %s", path));
72+
}
73+
74+
String sanitizedPath = Path.of(path).normalize().toString();
75+
76+
String fileName = FilenameUtils.getName(sanitizedPath).toLowerCase();
77+
78+
if (log.isDebugEnabled()) {
79+
log.debug("Path: {} -> {} ({})", path, sanitizedPath, fileName);
80+
}
81+
82+
if (isRegexWhitelistEnabled) {
83+
boolean matches = regexWhitelistPatterns.stream().anyMatch(p -> p.matcher(fileName).matches());
84+
85+
if (!matches) {
86+
throw new RuntimeException(String.format("File name not allowed: %s", fileName));
87+
}
88+
}
89+
90+
if (is != null && isMimeWhitelistEnabled) {
91+
Tika tika = new Tika();
92+
String extensionMimeType = tika.detect(fileName);
93+
94+
if (StringUtils.isEmpty(extensionMimeType)) {
95+
throw new RuntimeException(String.format("Could not detect mime type for filename %s!", fileName));
96+
}
97+
98+
if (!mimeWhitelist.contains(extensionMimeType)) {
99+
throw new RuntimeException(
100+
String.format("Detected mime type %s for filename %s not allowed!", extensionMimeType, fileName));
101+
}
102+
103+
String contentMimeType = detectContentMimeType(is);
104+
105+
if (StringUtils.isEmpty(contentMimeType)) {
106+
throw new RuntimeException(String.format("Could not detect content mime type for %s!", fileName));
107+
}
108+
109+
if (!mimeWhitelist.contains(contentMimeType)) {
110+
throw new RuntimeException(
111+
String.format("Detected content mime type %s for %s not allowed!", contentMimeType, fileName));
112+
}
113+
114+
if (!contentMimeType.equalsIgnoreCase(extensionMimeType)) {
115+
throw new RuntimeException(String.format("Detected filename (%s) and content (%s) mime type do not match!",
116+
extensionMimeType, contentMimeType));
117+
}
118+
}
119+
120+
Path target = Path.of(sanitizedPath);
121+
Path rootFolder = Path.of(FileSystemContentRepository.FINERACT_BASE_DIR,
122+
ThreadLocalContextUtil.getTenant().getName().replaceAll(" ", "").trim());
123+
124+
if (!target.startsWith(rootFolder)) {
125+
throw new RuntimeException(String.format("Path traversal attempt: %s (%s)", target, rootFolder));
126+
}
127+
128+
return sanitizedPath;
129+
} catch (Exception e) {
130+
throw new ContentManagementException(path, e.getMessage(), e);
131+
}
132+
}
133+
134+
private String detectContentMimeType(BufferedInputStream bis) throws Exception {
135+
TikaInputStream tis = TikaInputStream.get(bis);
136+
AutoDetectParser parser = new AutoDetectParser();
137+
BodyContentHandler handler = new BodyContentHandler(-1);
138+
Metadata metadata = new Metadata();
139+
parser.parse(tis, handler, metadata);
140+
141+
return metadata.get("Content-Type");
142+
}
143+
}

fineract-provider/src/main/java/org/apache/fineract/infrastructure/documentmanagement/contentrepository/FileSystemContentRepository.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.fineract.infrastructure.documentmanagement.contentrepository;
2020

2121
import com.google.common.io.Files;
22+
import java.io.BufferedInputStream;
2223
import java.io.ByteArrayInputStream;
2324
import java.io.File;
2425
import java.io.IOException;
@@ -42,6 +43,12 @@ public class FileSystemContentRepository implements ContentRepository {
4243

4344
public static final String FINERACT_BASE_DIR = System.getProperty("user.home") + File.separator + ".fineract";
4445

46+
private final FileSystemContentPathSanitizer pathSanitizer;
47+
48+
public FileSystemContentRepository(final FileSystemContentPathSanitizer pathSanitizer) {
49+
this.pathSanitizer = pathSanitizer;
50+
}
51+
4552
@Override
4653
public String saveFile(final InputStream uploadedInputStream, final DocumentCommand documentCommand) {
4754
final String fileName = documentCommand.getFileName();
@@ -88,23 +95,29 @@ public void deleteFile(final String documentPath) {
8895
}
8996

9097
private void deleteFileInternal(final String documentPath) {
91-
final File fileToBeDeleted = new File(documentPath);
98+
String path = pathSanitizer.sanitize(documentPath);
99+
100+
final File fileToBeDeleted = new File(path);
92101
final boolean fileDeleted = fileToBeDeleted.delete();
93102
if (!fileDeleted) {
94103
// no need to throw an Error, what's a caller going to do about it, so simply log a warning
95-
LOG.warn("Unable to delete file {}", documentPath);
104+
LOG.warn("Unable to delete file {}", path);
96105
}
97106
}
98107

99108
@Override
100109
public FileData fetchFile(final DocumentData documentData) {
101-
final File file = new File(documentData.fileLocation());
102-
return new FileData(Files.asByteSource(file), documentData.fileName(), documentData.contentType());
110+
String path = pathSanitizer.sanitize(documentData.fileLocation());
111+
112+
final File file = new File(path);
113+
return new FileData(Files.asByteSource(file), file.getName(), documentData.contentType());
103114
}
104115

105116
@Override
106117
public FileData fetchImage(final ImageData imageData) {
107-
final File file = new File(imageData.location());
118+
String path = pathSanitizer.sanitize(imageData.location());
119+
120+
final File file = new File(path);
108121
return new FileData(Files.asByteSource(file), imageData.getEntityDisplayName(), imageData.contentType().getValue());
109122
}
110123

@@ -139,9 +152,10 @@ private void makeDirectories(final String uploadDocumentLocation) throws IOExcep
139152
}
140153

141154
private void writeFileToFileSystem(final String fileName, final InputStream uploadedInputStream, final String fileLocation) {
142-
try {
143-
makeDirectories(fileLocation);
144-
FileUtils.copyInputStreamToFile(uploadedInputStream, new File(fileLocation)); // NOSONAR
155+
try (BufferedInputStream bis = new BufferedInputStream(uploadedInputStream)) {
156+
String sanitizedPath = pathSanitizer.sanitize(fileLocation, bis);
157+
makeDirectories(sanitizedPath);
158+
FileUtils.copyInputStreamToFile(bis, new File(sanitizedPath)); // NOSONAR
145159
} catch (final IOException ioException) {
146160
LOG.warn("writeFileToFileSystem() IOException (logged because cause is not propagated in ContentManagementException)",
147161
ioException);

fineract-provider/src/main/resources/application.properties

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ fineract.mode.batch-manager-enabled=${FINERACT_MODE_BATCH_MANAGER_ENABLED:true}
4343
fineract.correlation.enabled=${FINERACT_LOGGING_HTTP_CORRELATION_ID_ENABLED:false}
4444
fineract.correlation.header-name=${FINERACT_LOGGING_HTTP_CORRELATION_ID_HEADER_NAME:X-Correlation-ID}
4545

46+
fineract.content.regex-whitelist-enabled=${FINERACT_CONTENT_REGEX_WHITELIST_ENABLED:true}
47+
fineract.content.regex-whitelist=${FINERACT_CONTENT_REGEX_WHITELIST:.*\\.pdf$,.*\\.doc,.*\\.docx,.*\\.xls,.*\\.xlsx,.*\\.jpg,.*\\.jpeg,.*\\.png}
48+
fineract.content.mime-whitelist-enabled=${FINERACT_CONTENT_MIME_WHITELIST_ENABLED:true}
49+
fineract.content.mime-whitelist=${FINERACT_CONTENT_MIME_WHITELIST:application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,image/jpeg,image/png}
50+
4651
# Logging pattern for the console
4752
logging.pattern.console=${CONSOLE_LOG_PATTERN:%clr(%d{yyyy-MM-dd HH:mm:ss.SSS}){faint} %clr(${LOG_LEVEL_PATTERN:-%5p}) %clr(${PID:- }){magenta} %clr(%replace([%X{correlationId}]){'\\[\\]', ''}) %clr(---){faint} %clr([%15.15t]){faint} %clr(%-40.40logger{39}){cyan} %clr(:){faint} %m%n${LOG_EXCEPTION_CONVERSION_WORD:%wEx}}
4853

fineract-provider/src/test/resources/application-test.properties

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,17 @@ fineract.mode.read-enabled=true
3737
fineract.mode.write-enabled=true
3838
fineract.mode.batch-enabled=true
3939

40+
fineract.content.regex-whitelist-enabled=true
41+
fineract.content.regex-whitelist=.*\\.pdf$,.*\\.doc,.*\\.docx,.*\\.xls,.*\\.xlsx,.*\\.jpg,.*\\.jpeg,.*\\.png
42+
fineract.content.mime-whitelist-enabled=true
43+
fineract.content.mime-whitelist=application/pdf,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,image/jpeg,image/png
44+
fineract.content.filesystem.enabled=true
45+
fineract.content.filesystem.rootFolder=${user.home}/.fineract
46+
fineract.content.s3.enabled=false
47+
fineract.content.s3.bucketName=
48+
fineract.content.s3.accessName=
49+
fineract.content.s3.secretName=
50+
4051
management.health.jms.enabled=false
4152

4253
# FINERACT 1296

0 commit comments

Comments
 (0)