Skip to content

Commit 16dc6db

Browse files
clean up
1 parent 11b4468 commit 16dc6db

3 files changed

Lines changed: 142 additions & 140 deletions

File tree

apps/google-docs/functions/agents/documentParserAgent/documentParser.agent.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { FinalEntriesResultSchema, FinalEntriesResult } from './schema';
1818
*/
1919
export interface DocumentParserConfig {
2020
openAiApiKey: string;
21+
// TODO: Update this when we have oauth working
2122
document: unknown; // JSON document from Google Docs API or test data
2223
contentTypes: ContentTypeProps[];
2324
locale?: string;
@@ -109,6 +110,7 @@ EXTRACTION GUIDELINES:
109110
/**
110111
* Extracts plain text content from Google Docs JSON structure
111112
*/
113+
// TODO: Update this to be more robust and bulletproof
112114
function extractTextFromGoogleDocsJson(document: unknown): string {
113115
if (!document || typeof document !== 'object') {
114116
return '';

apps/google-docs/functions/service/entryService.ts

Lines changed: 1 addition & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { PlainClientAPI, EntryProps, ContentTypeProps } from 'contentful-management';
22
import { EntryToCreate } from '../agents/documentParserAgent/schema';
3+
import { markdownToRichText } from './utils/richtext';
34

45
/**
56
* INTEG-3264: Service for creating entries in Contentful using the Contentful Management API
@@ -17,146 +18,6 @@ export interface EntryCreationResult {
1718
}>;
1819
}
1920

20-
function createTextNode(value: string, marks: Array<{ type: 'bold' | 'italic' | 'underline' }>) {
21-
return {
22-
nodeType: 'text',
23-
value,
24-
marks,
25-
data: {},
26-
};
27-
}
28-
29-
function createParagraph(children: any[]) {
30-
return {
31-
nodeType: 'paragraph',
32-
data: {},
33-
content: children,
34-
};
35-
}
36-
37-
function createHeading(level: number, children: any[]) {
38-
const clamped = Math.min(6, Math.max(1, level));
39-
return {
40-
nodeType: `heading-${clamped}`,
41-
data: {},
42-
content: children,
43-
};
44-
}
45-
46-
function markdownToRichText(markdown: string) {
47-
// Normalize simple HTML tags to markdown-like markers we support
48-
// Bold: <strong> or <b> -> **
49-
// Italic: <em> or <i> -> *
50-
// Underline: <u> -> _
51-
let normalized = markdown;
52-
try {
53-
normalized = normalized
54-
.replace(/<strong>([\s\S]*?)<\/strong>/gi, '**$1**')
55-
.replace(/<b>([\s\S]*?)<\/b>/gi, '**$1**')
56-
.replace(/<em>([\s\S]*?)<\/em>/gi, '*$1*')
57-
.replace(/<i>([\s\S]*?)<\/i>/gi, '*$1*')
58-
.replace(/<u>([\s\S]*?)<\/u>/gi, '_$1_');
59-
} catch {
60-
// If any regex fails, fall back to original string
61-
normalized = markdown;
62-
}
63-
64-
// Basic Markdown to Contentful Rich Text for bold (**text**) and italics (*text*)
65-
// Splits into paragraphs by newlines
66-
const lines = normalized.split(/\r?\n/);
67-
const documentChildren: any[] = [];
68-
69-
for (const rawLine of lines) {
70-
if (!rawLine.trim()) {
71-
continue;
72-
}
73-
74-
const nodes: any[] = [];
75-
let buffer = '';
76-
let i = 0;
77-
let bold = false;
78-
let italic = false;
79-
let underline = false;
80-
81-
// Detect Markdown heading at start of line
82-
const headingMatch = rawLine.match(/^\s*(#{1,6})\s+(.*)$/);
83-
// Heuristic: treat lines that are entirely bold as H2 (e.g., **Heading**)
84-
const boldOnlyMatch = headingMatch
85-
? null
86-
: rawLine.match(/^\s*(\*\*|__)\s*([\s\S]*?)\s*\1\s*$/);
87-
const isHeading = Boolean(headingMatch || boldOnlyMatch);
88-
const headingLevel = headingMatch ? (headingMatch[1].length as number) : boldOnlyMatch ? 2 : 0;
89-
const line = headingMatch ? headingMatch[2] : boldOnlyMatch ? boldOnlyMatch[2] : rawLine;
90-
91-
const flushBuffer = () => {
92-
if (buffer.length === 0) return;
93-
const marks: Array<{ type: 'bold' | 'italic' | 'underline' }> = [];
94-
if (bold) marks.push({ type: 'bold' });
95-
if (italic) marks.push({ type: 'italic' });
96-
if (underline) marks.push({ type: 'underline' });
97-
nodes.push(createTextNode(buffer, marks));
98-
buffer = '';
99-
};
100-
101-
while (i < line.length) {
102-
// Toggle bold on '**'
103-
if (line.startsWith('**', i)) {
104-
flushBuffer();
105-
bold = !bold;
106-
i += 2;
107-
continue;
108-
}
109-
// Toggle italic on '*'
110-
if (line[i] === '*') {
111-
// Avoid treating '**' case here
112-
if (!(i + 1 < line.length && line[i + 1] === '*')) {
113-
flushBuffer();
114-
italic = !italic;
115-
i += 1;
116-
continue;
117-
}
118-
}
119-
// Toggle underline on '__' or single '_'
120-
if (line.startsWith('__', i)) {
121-
flushBuffer();
122-
underline = !underline;
123-
i += 2;
124-
continue;
125-
}
126-
if (line[i] === '_') {
127-
// Avoid treating '__' case here
128-
if (!(i + 1 < line.length && line[i + 1] === '_')) {
129-
flushBuffer();
130-
underline = !underline;
131-
i += 1;
132-
continue;
133-
}
134-
}
135-
buffer += line[i];
136-
i += 1;
137-
}
138-
flushBuffer();
139-
140-
if (nodes.length === 0) {
141-
nodes.push(createTextNode('', []));
142-
}
143-
144-
if (isHeading) {
145-
documentChildren.push(createHeading(headingLevel, nodes));
146-
} else {
147-
documentChildren.push(createParagraph(nodes));
148-
}
149-
}
150-
151-
return {
152-
nodeType: 'document',
153-
data: {},
154-
content: documentChildren.length
155-
? documentChildren
156-
: [createParagraph([createTextNode('', [])])],
157-
};
158-
}
159-
16021
function transformFieldsForContentType(
16122
fields: Record<string, Record<string, unknown>>,
16223
contentType: ContentTypeProps | undefined
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
function createTextNode(value: string, marks: Array<{ type: 'bold' | 'italic' | 'underline' }>) {
2+
return {
3+
nodeType: 'text',
4+
value,
5+
marks,
6+
data: {},
7+
};
8+
}
9+
10+
function createParagraph(children: any[]) {
11+
return {
12+
nodeType: 'paragraph',
13+
data: {},
14+
content: children,
15+
};
16+
}
17+
18+
function createHeading(level: number, children: any[]) {
19+
const clamped = Math.min(6, Math.max(1, level));
20+
return {
21+
nodeType: `heading-${clamped}`,
22+
data: {},
23+
content: children,
24+
};
25+
}
26+
27+
export function markdownToRichText(markdown: string) {
28+
// Normalize simple HTML tags to markdown-like markers we support
29+
// Bold: <strong> or <b> -> **
30+
// Italic: <em> or <i> -> *
31+
// Underline: <u> -> _
32+
let normalized = markdown;
33+
try {
34+
normalized = normalized
35+
.replace(/<strong>([\s\S]*?)<\/strong>/gi, '**$1**')
36+
.replace(/<b>([\s\S]*?)<\/b>/gi, '**$1**')
37+
.replace(/<em>([\s\S]*?)<\/em>/gi, '*$1*')
38+
.replace(/<i>([\s\S]*?)<\/i>/gi, '*$1*')
39+
.replace(/<u>([\s\S]*?)<\/u>/gi, '_$1_');
40+
} catch {
41+
// If any regex fails, fall back to original string
42+
normalized = markdown;
43+
}
44+
45+
// Basic Markdown to Contentful Rich Text for bold (**text**) and italics (*text*)
46+
// Splits into paragraphs by newlines
47+
const lines = normalized.split(/\r?\n/);
48+
const documentChildren: any[] = [];
49+
50+
for (const rawLine of lines) {
51+
if (!rawLine.trim()) {
52+
continue;
53+
}
54+
55+
const nodes: any[] = [];
56+
let buffer = '';
57+
let i = 0;
58+
let bold = false;
59+
let italic = false;
60+
let underline = false;
61+
62+
// Detect Markdown heading at start of line
63+
const headingMatch = rawLine.match(/^\s*(#{1,6})\s+(.*)$/);
64+
// Heuristic: treat lines that are entirely bold as H2 (e.g., **Heading**)
65+
const boldOnlyMatch = headingMatch
66+
? null
67+
: rawLine.match(/^\s*(\*\*|__)\s*([\s\S]*?)\s*\1\s*$/);
68+
const isHeading = Boolean(headingMatch || boldOnlyMatch);
69+
const headingLevel = headingMatch ? (headingMatch[1].length as number) : boldOnlyMatch ? 2 : 0;
70+
const line = headingMatch ? headingMatch[2] : boldOnlyMatch ? boldOnlyMatch[2] : rawLine;
71+
72+
const flushBuffer = () => {
73+
if (buffer.length === 0) return;
74+
const marks: Array<{ type: 'bold' | 'italic' | 'underline' }> = [];
75+
if (bold) marks.push({ type: 'bold' });
76+
if (italic) marks.push({ type: 'italic' });
77+
if (underline) marks.push({ type: 'underline' });
78+
nodes.push(createTextNode(buffer, marks));
79+
buffer = '';
80+
};
81+
82+
while (i < line.length) {
83+
// Toggle bold on '**'
84+
if (line.startsWith('**', i)) {
85+
flushBuffer();
86+
bold = !bold;
87+
i += 2;
88+
continue;
89+
}
90+
// Toggle italic on '*'
91+
if (line[i] === '*') {
92+
// Avoid treating '**' case here
93+
if (!(i + 1 < line.length && line[i + 1] === '*')) {
94+
flushBuffer();
95+
italic = !italic;
96+
i += 1;
97+
continue;
98+
}
99+
}
100+
// Toggle underline on '__' or single '_'
101+
if (line.startsWith('__', i)) {
102+
flushBuffer();
103+
underline = !underline;
104+
i += 2;
105+
continue;
106+
}
107+
if (line[i] === '_') {
108+
// Avoid treating '__' case here
109+
if (!(i + 1 < line.length && line[i + 1] === '_')) {
110+
flushBuffer();
111+
underline = !underline;
112+
i += 1;
113+
continue;
114+
}
115+
}
116+
buffer += line[i];
117+
i += 1;
118+
}
119+
flushBuffer();
120+
121+
if (nodes.length === 0) {
122+
nodes.push(createTextNode('', []));
123+
}
124+
125+
if (isHeading) {
126+
documentChildren.push(createHeading(headingLevel, nodes));
127+
} else {
128+
documentChildren.push(createParagraph(nodes));
129+
}
130+
}
131+
132+
return {
133+
nodeType: 'document',
134+
data: {},
135+
content: documentChildren.length
136+
? documentChildren
137+
: [createParagraph([createTextNode('', [])])],
138+
};
139+
}

0 commit comments

Comments
 (0)