diff --git a/embedding/embedding_test.go b/embedding/embedding_test.go index e498210..6687ac6 100644 --- a/embedding/embedding_test.go +++ b/embedding/embedding_test.go @@ -25,6 +25,7 @@ import ( "io" "os" "path/filepath" + "strings" "testing" "embed-code/embed-code-go/configuration" @@ -123,6 +124,27 @@ var _ = Describe("Embedding", func() { Expect(embedding.CheckUpToDate(config)).Should(ContainElement(docPath)) }) + It("should ignore embed-code samples inside markdown code fences", func() { + docPath := fmt.Sprintf("%s/embed-code-sample-in-fence.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + Expect(processor.IsUpToDate()).Should(BeTrue()) + }) + + It("should detect markdown fences by triple-or-more backticks only", func() { + docPath := fmt.Sprintf("%s/triple-backticks-only-fence.md", config.DocumentationRoot) + processor := embedding.NewProcessor(docPath, config) + + Expect(processor.Embed()).Error().ShouldNot(HaveOccurred()) + + docContent, err := os.ReadFile(docPath) + Expect(err).ShouldNot(HaveOccurred()) + Expect(strings.Count(string(docContent), "System.out.println(\"Hello world\");")). + Should(Equal(2)) + Expect(processor.IsUpToDate()).Should(BeTrue()) + }) + It("should report all check errors", func() { config.DocIncludes = []string{"missing-closing-tag.md", "unclosed-nested-tag.md"} diff --git a/embedding/parsing/code_fence_end.go b/embedding/parsing/code_fence_end.go index e28c8ee..0a9ad56 100644 --- a/embedding/parsing/code_fence_end.go +++ b/embedding/parsing/code_fence_end.go @@ -37,9 +37,19 @@ func (c CodeFenceEndState) Recognize(context Context) bool { if context.ReachedEOF() { return false } + if !context.CodeFenceStarted { + return false + } indentation := strings.Repeat(" ", context.CodeFenceIndentation) + line := strings.TrimPrefix(context.CurrentLine(), indentation) + if line == context.CurrentLine() && context.CodeFenceIndentation > 0 { + return false + } + if context.CodeFenceMarker == "" { + return false + } - return context.CodeFenceStarted && strings.HasPrefix(context.CurrentLine(), indentation+"```") + return isClosingCodeFence(line, context.CodeFenceMarker) } // Accept adds the current line to the result, resets certain context variables, and moves to @@ -58,6 +68,7 @@ func (c CodeFenceEndState) Accept(context *Context, _ configuration.Configuratio context.ResolveEmbeddingNotFound() } context.CodeFenceStarted = false + context.CodeFenceMarker = "" context.CodeFenceIndentation = 0 context.ToNextLine() @@ -81,3 +92,19 @@ func renderSample(context *Context) error { return nil } + +func isClosingCodeFence(line string, marker string) bool { + if line == "" { + return false + } + markerChar := marker[0] + index := 0 + for index < len(line) && line[index] == markerChar { + index++ + } + if index < len(marker) { + return false + } + + return strings.TrimSpace(line[index:]) == "" +} diff --git a/embedding/parsing/code_fence_start.go b/embedding/parsing/code_fence_start.go index ad8bd47..d50e64d 100644 --- a/embedding/parsing/code_fence_start.go +++ b/embedding/parsing/code_fence_start.go @@ -45,8 +45,10 @@ func (c CodeFenceStartState) Recognize(context Context) bool { // context — a context of the parsing process. func (c CodeFenceStartState) Accept(context *Context, _ configuration.Configuration) error { line := context.CurrentLine() + trimmedLine := strings.TrimSpace(line) context.Result = append(context.Result, line) context.CodeFenceStarted = true + context.CodeFenceMarker = codeFenceMarker(trimmedLine) leadingSpaces := len(line) - len(strings.TrimLeft(line, " ")) context.CodeFenceIndentation = leadingSpaces context.ToNextLine() @@ -56,3 +58,16 @@ func (c CodeFenceStartState) Accept(context *Context, _ configuration.Configurat return nil } + +func codeFenceMarker(line string) string { + if line == "" { + return "" + } + markerChar := line[0] + index := 0 + for index < len(line) && line[index] == markerChar { + index++ + } + + return line[:index] +} diff --git a/embedding/parsing/context.go b/embedding/parsing/context.go index a36633f..8eecb90 100644 --- a/embedding/parsing/context.go +++ b/embedding/parsing/context.go @@ -40,13 +40,17 @@ import ( // // UnacceptedEmbeddings - a list of embedding instructions that are not accepted by the parser. type Context struct { - EmbeddingInstruction *Instruction - MarkdownFilePath string - Result []string - CodeFenceStarted bool - CodeFenceIndentation int - EmbeddingsNotFound []Instruction - UnacceptedEmbeddings []Instruction + EmbeddingInstruction *Instruction + MarkdownFilePath string + Result []string + CodeFenceStarted bool + CodeFenceMarker string + CodeFenceIndentation int + MarkdownFenceStarted bool + MarkdownFenceMarker string + MarkdownFenceIndentation int + EmbeddingsNotFound []Instruction + UnacceptedEmbeddings []Instruction // source - a list of strings representing the original markdown file. source []string // lineIndex - an index of the current line in the markdown file. diff --git a/embedding/parsing/instruction_token.go b/embedding/parsing/instruction_token.go index b3234de..7c65f7c 100644 --- a/embedding/parsing/instruction_token.go +++ b/embedding/parsing/instruction_token.go @@ -52,7 +52,10 @@ func (e InstructionParseError) Error() string { func (e EmbedInstructionTokenState) Recognize(context Context) bool { line := context.CurrentLine() isStatement := strings.HasPrefix(strings.TrimSpace(line), "<"+EmbeddingTag) - if context.EmbeddingInstruction == nil && !context.ReachedEOF() && isStatement { + if context.EmbeddingInstruction == nil && + !context.ReachedEOF() && + !context.MarkdownFenceStarted && + isStatement { return true } diff --git a/embedding/parsing/regular_line.go b/embedding/parsing/regular_line.go index bfe3110..2a3ec20 100644 --- a/embedding/parsing/regular_line.go +++ b/embedding/parsing/regular_line.go @@ -18,7 +18,11 @@ package parsing -import "embed-code/embed-code-go/configuration" +import ( + "strings" + + "embed-code/embed-code-go/configuration" +) // RegularLineState represents a regular line of a markdown. type RegularLineState struct{} @@ -33,8 +37,42 @@ func (r RegularLineState) Recognize(_ Context) bool { // context — a context of the parsing process. func (r RegularLineState) Accept(context *Context, _ configuration.Configuration) error { line := context.CurrentLine() + updateMarkdownFenceContext(context, line) context.Result = append(context.Result, line) context.ToNextLine() return nil } + +func updateMarkdownFenceContext(context *Context, line string) { + if context.EmbeddingInstruction != nil { + return + } + leadingSpaces := len(line) - len(strings.TrimLeft(line, " ")) + trimmedLine := strings.TrimSpace(line) + if !strings.HasPrefix(trimmedLine, "```") { + return + } + marker := codeFenceMarker(trimmedLine) + if len(marker) < 3 { + return + } + if !context.MarkdownFenceStarted { + context.MarkdownFenceStarted = true + context.MarkdownFenceMarker = marker + context.MarkdownFenceIndentation = leadingSpaces + return + } + if context.MarkdownFenceIndentation != leadingSpaces { + return + } + if marker[0] != context.MarkdownFenceMarker[0] || len(marker) < len(context.MarkdownFenceMarker) { + return + } + if strings.TrimSpace(trimmedLine[len(marker):]) != "" { + return + } + context.MarkdownFenceStarted = false + context.MarkdownFenceMarker = "" + context.MarkdownFenceIndentation = 0 +} diff --git a/test/resources/docs/embed-code-sample-in-fence.md b/test/resources/docs/embed-code-sample-in-fence.md new file mode 100644 index 0000000..6fa8960 --- /dev/null +++ b/test/resources/docs/embed-code-sample-in-fence.md @@ -0,0 +1,12 @@ +# Example with a literal `` sample + +````markdown + + +```kotlin +val validationVersion by extra("2.0.0-SNAPSHOT.419") +``` +```` diff --git a/test/resources/docs/triple-backticks-only-fence.md b/test/resources/docs/triple-backticks-only-fence.md new file mode 100644 index 0000000..ee3a3c2 --- /dev/null +++ b/test/resources/docs/triple-backticks-only-fence.md @@ -0,0 +1,9 @@ +### hello + +```kotlin +``` + +aaa hello + +```kotlin +```