Skip to content

Commit 5aad1e0

Browse files
committed
Redesign FilterTemplateOutputHandler to output optional regex
Signed-off-by: Gary O'Neall <gary@sourceauditor.com>
1 parent 18a80a0 commit 5aad1e0

7 files changed

Lines changed: 766 additions & 16 deletions

File tree

TestFiles/GPL-2.0-NL.txt

Lines changed: 339 additions & 0 deletions
Large diffs are not rendered by default.

TestFiles/GPL-2.0-only.template.txt

Lines changed: 127 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
THE POSSIBILITY OF SUCH DAMAGES.<<beginOptional>> END OF TERMS AND CONDITIONS
2+
3+
How to Apply These Terms to Your New Programs
4+
5+
If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
6+
7+
To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
8+
9+
<<beginOptional>><<<endOptional>>one line to give the program's name and <<var;name="ideaArticle";original="an";match="a brief|an">> idea of what it does.<<beginOptional>>><<endOptional>>
10+
11+
Copyright (C)<<beginOptional>><<<endOptional>> <<var;name="templateYear";original="yyyy";match="yyyy|year">><<beginOptional>>> <<endOptional>><<beginOptional>> <<<endOptional>>name of author<<beginOptional>>><<endOptional>>
12+
13+
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16+
17+
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301<<beginOptional>>, <<endOptional>> USA.
18+
19+
Also add information on how to contact you by electronic and paper mail.
20+
21+
If the program is interactive, make it output a short notice like this when it starts in an interactive mode:
22+
23+
Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details.
24+
25+
The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program.
26+
27+
You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names:
28+
29+
Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker.
30+
31+
<<beginOptional>><<<endOptional>>signature of Ty Coon<<beginOptional>> ><<endOptional>>, 1 April 1989 Ty Coon, President of Vice
32+
33+
<<endOptional>>

src/main/java/org/spdx/utility/compare/FilterTemplateOutputHandler.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
package org.spdx.utility.compare;
1717

1818
import java.util.ArrayList;
19+
import java.util.Arrays;
20+
import java.util.HashMap;
1921
import java.util.List;
22+
import java.util.Map;
23+
import java.util.regex.Pattern;
2024

2125
import org.spdx.licenseTemplate.ILicenseTemplateOutputHandler;
2226
import org.spdx.licenseTemplate.LicenseTemplateRule;
@@ -35,34 +39,53 @@ public enum VarTextHandling {
3539
REGEX, // Include the regex itself included by the REGEX_ESCAPE strings
3640
}
3741

42+
public enum OptionalTextHandling {
43+
OMIT, // Omit the optional text
44+
ORIGINAL, // Retain the optional text
45+
REGEX_USING_TOKENS // Create a regex for the optional text with the REGEX_ESCAPE string tokenizing the words
46+
}
47+
3848
private VarTextHandling varTextHandling;
49+
private OptionalTextHandling optionalTextHandling;
3950
private List<String> filteredText = new ArrayList<>();
4051
StringBuilder currentString = new StringBuilder();
4152
private int optionalDepth = 0; // depth of optional rules
53+
private Map<Integer, List<String>> optionalTokens = new HashMap<>(); // map of optional dept to a list of tokens for the optional text
4254

4355
/**
4456
* @param includeVarText if true, include the default variable text
4557
*/
4658
@Deprecated
4759
public FilterTemplateOutputHandler(boolean includeVarText) {
48-
this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT);
60+
this(includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT, OptionalTextHandling.OMIT);
4961
}
5062

51-
5263
/**
5364
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
5465
*/
5566
public FilterTemplateOutputHandler(VarTextHandling varTextHandling) {
67+
this(varTextHandling, OptionalTextHandling.OMIT);
68+
}
69+
70+
/**
71+
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
72+
* @param optionalTextHandling include optional text, exclude, or include a regex for the optional text
73+
*/
74+
public FilterTemplateOutputHandler(VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) {
5675
this.varTextHandling = varTextHandling;
76+
this.optionalTextHandling = optionalTextHandling;
5777
}
5878

5979
/* (non-Javadoc)
6080
* @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#text(java.lang.String)
6181
*/
6282
@Override
6383
public void text(String text) {
64-
if (optionalDepth <= 0) {
84+
if (optionalDepth <= 0 || OptionalTextHandling.ORIGINAL.equals(optionalTextHandling)) {
6585
currentString.append(text);
86+
} else if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
87+
optionalTokens.get(optionalDepth).addAll(Arrays.asList(
88+
LicenseCompareHelper.tokenizeLicenseText(text, new HashMap<Integer, LineColumn>())));
6689
}
6790
}
6891

@@ -73,10 +96,16 @@ public void text(String text) {
7396
public void variableRule(LicenseTemplateRule rule) {
7497
if (VarTextHandling.REGEX.equals(varTextHandling) && optionalDepth <= 0) {
7598
currentString.append(REGEX_ESCAPE);
99+
currentString.append('(');
76100
currentString.append(rule.getMatch());
101+
currentString.append(')');
77102
currentString.append(REGEX_ESCAPE);
78103
} else if (VarTextHandling.ORIGINAL.equals(varTextHandling) && optionalDepth <= 0) {
79104
currentString.append(rule.getOriginal());
105+
} else if (optionalDepth > 0 && OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
106+
currentString.append('(');
107+
currentString.append(rule.getMatch());
108+
currentString.append(')');
80109
} else {
81110
if (currentString.length() > 0) {
82111
filteredText.add(currentString.toString());
@@ -90,19 +119,63 @@ public void variableRule(LicenseTemplateRule rule) {
90119
*/
91120
@Override
92121
public void beginOptional(LicenseTemplateRule rule) {
122+
if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
123+
if (optionalDepth == 0) {
124+
if (currentString.length() > 0) {
125+
filteredText.add(currentString.toString());
126+
currentString.setLength(0);
127+
}
128+
currentString.append(REGEX_ESCAPE);
129+
} else {
130+
currentString.append(toTokenRegex(optionalTokens.get(optionalDepth)));
131+
optionalTokens.get(optionalDepth).clear();
132+
}
133+
currentString.append('(');
134+
} else if (currentString.length() > 0) {
135+
filteredText.add(currentString.toString());
136+
currentString.setLength(0);
137+
}
93138
optionalDepth++;
139+
optionalTokens.put(optionalDepth, new ArrayList<>());
94140
}
95141

142+
/**
143+
* @param tokens list of tokens
144+
* @return regular expression with quoted tokens
145+
*/
146+
private String toTokenRegex(List<String> tokens) {
147+
StringBuilder sb = new StringBuilder();
148+
for (String token:optionalTokens.get(optionalDepth)) {
149+
token = token.trim();
150+
if (LicenseCompareHelper.NORMALIZE_TOKENS.containsKey(token.toLowerCase())) {
151+
token = LicenseCompareHelper.NORMALIZE_TOKENS.get(token.toLowerCase());
152+
}
153+
sb.append(Pattern.quote(token));
154+
sb.append("\\s*");
155+
}
156+
return sb.toString();
157+
}
158+
159+
96160
/* (non-Javadoc)
97161
* @see org.spdx.licenseTemplate.ILicenseTemplateOutputHandler#endOptional(org.spdx.licenseTemplate.LicenseTemplateRule)
98162
*/
99163
@Override
100164
public void endOptional(LicenseTemplateRule rule) {
101-
optionalDepth--;
102-
if (optionalDepth == 0 && currentString.length() > 0) {
165+
if (OptionalTextHandling.REGEX_USING_TOKENS.equals(optionalTextHandling)) {
166+
currentString.append(toTokenRegex(optionalTokens.get(optionalDepth)));
167+
currentString.append(")?");
168+
if (optionalDepth == 1) {
169+
currentString.append(REGEX_ESCAPE);
103170
filteredText.add(currentString.toString());
104171
currentString.setLength(0);
172+
}
173+
} else if (currentString.length() > 0) {
174+
filteredText.add(currentString.toString());
175+
currentString.setLength(0);
105176
}
177+
optionalTokens.remove(optionalDepth);
178+
optionalDepth--;
106179
}
107180

108181
/* (non-Javadoc)

src/main/java/org/spdx/utility/compare/LicenseCompareHelper.java

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.spdx.licenseTemplate.LicenseTemplateRuleException;
5353
import org.spdx.licenseTemplate.SpdxLicenseTemplateHelper;
5454
import org.spdx.utility.compare.CompareTemplateOutputHandler.DifferenceDescription;
55+
import org.spdx.utility.compare.FilterTemplateOutputHandler.OptionalTextHandling;
5556
import org.spdx.utility.compare.FilterTemplateOutputHandler.VarTextHandling;
5657

5758
/**
@@ -646,18 +647,33 @@ private static boolean isLicenseSetsEqual(LicenseSet license1, LicenseSet licens
646647
@Deprecated
647648
public static List<String> getNonOptionalLicenseText(String licenseTemplate, boolean includeVarText) throws SpdxCompareException {
648649
return getNonOptionalLicenseText(licenseTemplate,
649-
includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT);
650+
includeVarText ? VarTextHandling.ORIGINAL : VarTextHandling.OMIT,
651+
OptionalTextHandling.OMIT);
650652
}
651653

652654
/**
653-
* Get the text of a license minus any optional text - note: this include the default variable text
655+
* Get the text of a license minus any optional text
654656
* @param licenseTemplate license template containing optional and var tags
655657
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
656658
* @return list of strings for all non-optional license text.
657659
* @throws SpdxCompareException
658660
*/
659-
public static List<String> getNonOptionalLicenseText(String licenseTemplate, VarTextHandling varTextHandling) throws SpdxCompareException {
660-
FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling);
661+
public static List<String> getNonOptionalLicenseText(String licenseTemplate,
662+
VarTextHandling varTextHandling) throws SpdxCompareException {
663+
return getNonOptionalLicenseText(licenseTemplate, varTextHandling, OptionalTextHandling.OMIT);
664+
}
665+
666+
/**
667+
* Get the text of a license converting variable and optional text according to the options
668+
* @param licenseTemplate license template containing optional and var tags
669+
* @param varTextHandling include original, exclude, or include the regex (enclosed with "~~~") for "var" text
670+
* @param optionalTextHandling include optional text, exclude, or include a regex for the optional text
671+
* @return list of strings for all non-optional license text.
672+
* @throws SpdxCompareException
673+
*/
674+
public static List<String> getNonOptionalLicenseText(String licenseTemplate,
675+
VarTextHandling varTextHandling, OptionalTextHandling optionalTextHandling) throws SpdxCompareException {
676+
FilterTemplateOutputHandler filteredOutput = new FilterTemplateOutputHandler(varTextHandling, optionalTextHandling);
661677
try {
662678
SpdxLicenseTemplateHelper.parseTemplate(licenseTemplate, filteredOutput);
663679
} catch (LicenseTemplateRuleException e) {
@@ -686,9 +702,9 @@ public static Pair<Pattern, Pattern> nonOptionalTextToPatterns(List<String> nonO
686702
String lastRegex = "";
687703
while (startWordCount < numberOfWords && startTextIndex < nonOptionalText.size()) {
688704
String line = nonOptionalText.get(startTextIndex++);
689-
if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) {
690-
startPatternBuilder.append(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
691-
}
705+
// if (startPatternBuilder.length() > 0 && line.trim().length() > 0 && !startPatternBuilder.toString().endsWith("}")) {
706+
// startPatternBuilder.append(".{0").append(regexLimit); //TODO: Replace this with the optional text match itself - requires redesign
707+
// }
692708
String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE);
693709
boolean inRegex = false; // if it starts with a regex, it will start with a blank line
694710
for (String regexSplit:regexSplits) {
@@ -753,9 +769,9 @@ public static Pair<Pattern, Pattern> nonOptionalTextToPatterns(List<String> nonO
753769
(endTextIndex == lastProcessedStartLine && (numberOfWords - endWordCount) < (nonOptionalText.get(endTextIndex).length() - wordsInLastLine)))) { // Check to make sure we're not overlapping the start words
754770
List<String> nonEmptyTokens = new ArrayList<>();
755771
String line = nonOptionalText.get(endTextIndex);
756-
if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) {
757-
endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
758-
}
772+
// if (endTextReversePattern.size() > 0 && line.trim().length() > 0 && !endTextReversePattern.get(endTextReversePattern.size()-1).endsWith("}")) {
773+
// endTextReversePattern.add(".{0,50}"); //TODO: Replace this with the optional text match itself - requires redesign
774+
// }
759775
String[] regexSplits = line.trim().split(FilterTemplateOutputHandler.REGEX_ESCAPE);
760776
boolean inRegex = false;
761777
for (String regexSplit:regexSplits) {
@@ -953,7 +969,8 @@ private static String findTemplateWithinText(String text, String template) throw
953969
return null;
954970
}
955971

956-
List<String> templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template), VarTextHandling.REGEX);
972+
List<String> templateNonOptionalText = getNonOptionalLicenseText(removeCommentChars(template),
973+
VarTextHandling.REGEX, OptionalTextHandling.REGEX_USING_TOKENS);
957974
if (templateNonOptionalText.size() > 0 && templateNonOptionalText.get(0).startsWith("~~~.")) {
958975
// Change to a non-greedy match
959976
String firstLine = templateNonOptionalText.get(0);

0 commit comments

Comments
 (0)