Skip to content

Commit 350c452

Browse files
committed
refactor: optimize code duplication detection and normalization process
1 parent 94662ee commit 350c452

2 files changed

Lines changed: 24 additions & 20 deletions

File tree

CodeLineCounter.Tests/CodeDuplicationCheckerTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ public void AnotherTestMethod()
102102
// Act
103103
checker.DetectCodeDuplicationInSourceCode(file1, sourceCode1);
104104
checker.DetectCodeDuplicationInSourceCode(file2, sourceCode2);
105+
checker.UpdateDuplicationMap();
105106
var result = checker.GetCodeDuplicationMap();
106107

107108
// Assert

CodeLineCounter/Services/CodeDuplicationChecker.cs

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,47 +34,57 @@ public void DetectCodeDuplicationInSourceCode(string normalizedPath, string sour
3434
{
3535
var tree = CSharpSyntaxTree.ParseText(sourceCode);
3636
var root = tree.GetRoot();
37+
// Get all method declarations
3738
var methods = root.DescendantNodes().OfType<MethodDeclarationSyntax>();
3839

3940
Parallel.ForEach(methods, method =>
4041
{
42+
// Extract code blocks from each method.
4143
var blocks = ExtractBlocks(method);
4244

4345
foreach (var block in blocks)
4446
{
45-
var code = NormalizeCode(block.ToFullString());
47+
// Optimize normalization using string.Concat filtering whitespace.
48+
var originalCode = block.ToFullString();
49+
var code = NormalizeCode(originalCode);
4650
var hash = HashUtils.ComputeHash(code);
47-
var location = block.GetLocation().GetLineSpan().StartLinePosition.Line;
48-
var nbLines = block.GetLocation().GetLineSpan().EndLinePosition.Line - location + 1;
51+
52+
// Compute start line and number of lines efficiently using location spans.
53+
var span = block.GetLocation().GetLineSpan();
54+
var startLine = span.StartLinePosition.Line;
55+
var nbLines = span.EndLinePosition.Line - startLine + 1;
4956

5057
var duplicationCode = new DuplicationCode
5158
{
5259
CodeHash = hash,
5360
FilePath = normalizedPath,
5461
MethodName = method.Identifier.Text,
55-
StartLine = location,
62+
StartLine = startLine,
5663
NbLines = nbLines
5764
};
5865

59-
hashMap.AddOrUpdate(hash, [duplicationCode],
60-
(key, set) =>
66+
// Update the hash map with the new duplication code using thread-safe operations.
67+
hashMap.AddOrUpdate(hash,
68+
key => new HashSet<DuplicationCode> { duplicationCode },
69+
(key, existingSet) =>
6170
{
62-
lock (set)
71+
lock (existingSet)
6372
{
64-
set.Add(duplicationCode);
73+
existingSet.Add(duplicationCode);
6574
}
66-
return set;
75+
return existingSet;
6776
});
6877
}
6978
});
7079

71-
UpdateDuplicationMap();
7280
}
7381

74-
private void UpdateDuplicationMap()
82+
public void UpdateDuplicationMap()
7583
{
7684
lock (duplicationLock)
7785
{
86+
// Clear previous results to avoid stale data.
87+
duplicationMap.Clear();
7888
foreach (var entry in hashMap)
7989
{
8090
if (entry.Value.Count > 1)
@@ -97,15 +107,8 @@ private static IEnumerable<BlockSyntax> ExtractBlocks(MethodDeclarationSyntax me
97107

98108
private static string NormalizeCode(string code)
99109
{
100-
var stringBuilder = new StringBuilder();
101-
foreach (char c in code)
102-
{
103-
if (!char.IsWhiteSpace(c))
104-
{
105-
stringBuilder.Append(c);
106-
}
107-
}
108-
return stringBuilder.ToString();
110+
// Use string.Concat with LINQ to filter out whitespace characters.
111+
return string.Concat(code.Where(c => !char.IsWhiteSpace(c)));
109112
}
110113
}
111114
}

0 commit comments

Comments
 (0)