Skip to content

Commit 47b0e7f

Browse files
Added the contents
1 parent 70853d2 commit 47b0e7f

2 files changed

Lines changed: 129 additions & 36 deletions

File tree

Document-Processing/Data-Extraction/Smart-Data-Extractor/NET/Features.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,66 @@ using (FileStream stream = new FileStream("Image.png", FileMode.Open, FileAccess
129129

130130
{% endtabs %}
131131

132+
## Extract Data as Stream
133+
134+
To extract structured data from a PDF document and return the output as a stream using the **ExtractDataAsPdfStream** method of the **DataExtractor** class, refer to the following example.
135+
136+
{% tabs %}
137+
138+
{% highlight c# tabtitle="C# [Cross-platform]" %}
139+
140+
using System.IO;
141+
using Syncfusion.SmartDataExtractor;
142+
143+
//Open the input PDF file as a stream.
144+
using (FileStream inputStream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read, FileShare.Read))
145+
{
146+
//Initialize the Smart Data Extractor.
147+
DataExtractor extractor = new DataExtractor();
148+
extractor.EnableFormDetection = true;
149+
extractor.EnableTableDetection = true;
150+
extractor.ConfidenceThreshold = 0.6;
151+
152+
//Extract data and return as a PDF stream.
153+
Stream pdfStream = extractor.ExtractDataAsPdfStream(inputStream);
154+
155+
//Save the extracted PDF stream into an output file.
156+
using (FileStream outputStream = new FileStream("Output.pdf", FileMode.Create, FileAccess.Write))
157+
{
158+
pdfStream.CopyTo(outputStream);
159+
}
160+
}
161+
162+
{% endhighlight %}
163+
164+
{% highlight c# tabtitle="C# [Windows-specific]" %}
165+
166+
using System.IO;
167+
using Syncfusion.SmartDataExtractor;
168+
169+
//Open the input PDF file as a stream.
170+
using (FileStream inputStream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read, FileShare.Read))
171+
{
172+
//Initialize the Smart Data Extractor.
173+
DataExtractor extractor = new DataExtractor();
174+
extractor.EnableFormDetection = true;
175+
extractor.EnableTableDetection = true;
176+
extractor.ConfidenceThreshold = 0.6;
177+
178+
//Extract data and return as a PDF stream.
179+
Stream pdfStream = extractor.ExtractDataAsPdfStream(inputStream);
180+
181+
//Save the extracted PDF stream into an output file.
182+
using (FileStream outputStream = new FileStream("Output.pdf", FileMode.Create, FileAccess.Write))
183+
{
184+
pdfStream.CopyTo(outputStream);
185+
}
186+
}
187+
188+
{% endhighlight %}
189+
190+
{% endtabs %}
191+
132192
## Extract Form Data as JSON
133193

134194
To extract form fields across a PDF document using the **ExtractDataAsJson** method of the **DataExtractor** class with form recognition options, refer to the following code example:

Document-Processing/Data-Extraction/Smart-Table-Extractor/NET/Features.md

Lines changed: 69 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,19 @@ using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess
2727
// Initialize the Smart Table Extractor
2828
TableExtractor extractor = new TableExtractor();
2929

30-
// Set all three options together
30+
//Configure table extraction options such as border-less table detection, page range, and confidence threshold.
3131
TableExtractionOptions options = new TableExtractionOptions();
3232
options.DetectBorderlessTables = true;
3333
options.PageRange = new int[,] { { 1, 5 } };
3434
options.ConfidenceThreshold = 0.75;
3535

36+
//Assign the configured options to the extractor.
3637
extractor.TableExtractionOptions = options;
3738

38-
// Extract and save
39+
//Extract table data from the PDF document as JSON string.
3940
string data = extractor.ExtractTableAsJson(stream);
41+
42+
//Save the extracted JSON data into an output file.
4043
File.WriteAllText("Output.json", data, Encoding.UTF8);
4144
}
4245

@@ -51,19 +54,22 @@ using Syncfusion.SmartTableExtractor;
5154
//Open the input PDF file as a stream.
5255
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
5356
{
54-
// Initialize the Smart Table Extractor
57+
//Initialize the Smart Table Extractor.
5558
TableExtractor extractor = new TableExtractor();
5659

57-
// Set all three options together
60+
//Configure table extraction options such as border-less table detection, page range, and confidence threshold.
5861
TableExtractionOptions options = new TableExtractionOptions();
5962
options.DetectBorderlessTables = true;
6063
options.PageRange = new int[,] { { 1, 5 } };
6164
options.ConfidenceThreshold = 0.75;
6265

66+
//Assign the configured options to the extractor.
6367
extractor.TableExtractionOptions = options;
6468

65-
// Extract and save
69+
//Extract table data from the PDF document as JSON string.
6670
string data = extractor.ExtractTableAsJson(stream);
71+
72+
//Save the extracted JSON data into an output file.
6773
File.WriteAllText("Output.json", data, Encoding.UTF8);
6874
}
6975

@@ -83,19 +89,23 @@ using System.IO;
8389
using System.Text;
8490
using Syncfusion.SmartTableExtractor;
8591

92+
//Open the input PDF file as a stream.
8693
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
8794
{
88-
// Initialize the Smart Table Extractor
95+
//Initialize the Smart Table Extractor.
8996
TableExtractor extractor = new TableExtractor();
9097

91-
// Set DetectBorderlessTables
98+
//Configure the table extraction option to detect border-less tables in the document.
9299
TableExtractionOptions options = new TableExtractionOptions();
93100
options.DetectBorderlessTables = true;
94101

102+
//Assign the configured options to the extractor.
95103
extractor.TableExtractionOptions = options;
96104

97-
// Extract and save
105+
//Extract table data from the PDF document as a JSON string.
98106
string data = extractor.ExtractTableAsJson(stream);
107+
108+
//Save the extracted JSON data into an output file.
99109
File.WriteAllText("Output.json", data, Encoding.UTF8);
100110
}
101111

@@ -107,27 +117,31 @@ using System.IO;
107117
using System.Text;
108118
using Syncfusion.SmartTableExtractor;
109119

120+
//Open the input PDF file as a stream.
110121
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
111122
{
112-
// Initialize the Smart Table Extractor
123+
//Initialize the Smart Table Extractor.
113124
TableExtractor extractor = new TableExtractor();
114125

115-
// Set DetectBorderlessTables
126+
//Configure the table extraction option to detect border-less tables in the document.
116127
TableExtractionOptions options = new TableExtractionOptions();
117128
options.DetectBorderlessTables = true;
118129

130+
//Assign the configured options to the extractor.
119131
extractor.TableExtractionOptions = options;
120132

121-
// Extract and save
133+
//Extract table data from the PDF document as a JSON string.
122134
string data = extractor.ExtractTableAsJson(stream);
135+
136+
//Save the extracted JSON data into an output file.
123137
File.WriteAllText("Output.json", data, Encoding.UTF8);
124138
}
125139

126140
{% endhighlight %}
127141

128142
{% endtabs %}
129143

130-
## Extract Tables Within a Specific Page Range
144+
## Extract Tables within a Specific Page Range
131145

132146
To extract structured table data from a specific range of pages in a PDF document using the **ExtractTableAsJson** method of the **TableExtractor** class, refer to the following code example:
133147

@@ -139,19 +153,23 @@ using System.IO;
139153
using System.Text;
140154
using Syncfusion.SmartTableExtractor;
141155

156+
//Open the input PDF file as a stream.
142157
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
143158
{
144-
// Initialize the Smart Table Extractor
159+
//Initialize the Smart Table Extractor.
145160
TableExtractor extractor = new TableExtractor();
146161

147-
// Set the PageRange
162+
//Configure table extraction options to specify the page range for detection.
148163
TableExtractionOptions options = new TableExtractionOptions();
149164
options.PageRange = new int[,] { { 2, 4 } };
150165

166+
//Assign the configured options to the extractor.
151167
extractor.TableExtractionOptions = options;
152168

153-
// Extract and save
169+
//Extract table data from the specified page range as a JSON string.
154170
string data = extractor.ExtractTableAsJson(stream);
171+
172+
//Save the extracted JSON data into an output file.
155173
File.WriteAllText("Output.json", data, Encoding.UTF8);
156174
}
157175
@@ -163,19 +181,23 @@ using System.IO;
163181
using System.Text;
164182
using Syncfusion.SmartTableExtractor;
165183

184+
//Open the input PDF file as a stream.
166185
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
167186
{
168-
// Initialize the Smart Table Extractor
187+
//Initialize the Smart Table Extractor.
169188
TableExtractor extractor = new TableExtractor();
170189

171-
// Set only PageRange
190+
//Configure table extraction options to specify the page range for detection.
172191
TableExtractionOptions options = new TableExtractionOptions();
173192
options.PageRange = new int[,] { { 2, 4 } };
174193

194+
//Assign the configured options to the extractor.
175195
extractor.TableExtractionOptions = options;
176196

177-
// Extract and save
197+
//Extract table data from the specified page range as a JSON string.
178198
string data = extractor.ExtractTableAsJson(stream);
199+
200+
//Save the extracted JSON data into an output file.
179201
File.WriteAllText("Output.json", data, Encoding.UTF8);
180202
}
181203

@@ -195,19 +217,23 @@ using System.IO;
195217
using System.Text;
196218
using Syncfusion.SmartTableExtractor;
197219

220+
//Open the input PDF file as a stream.
198221
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
199222
{
200-
// Initialize the Smart Table Extractor
223+
//Initialize the Smart Table Extractor.
201224
TableExtractor extractor = new TableExtractor();
202225

203-
// Set ConfidenceThreshold
226+
//Configure table extraction options to set the confidence threshold for detection.
204227
TableExtractionOptions options = new TableExtractionOptions();
205228
options.ConfidenceThreshold = 0.6;
206229

230+
//Assign the configured options to the extractor.
207231
extractor.TableExtractionOptions = options;
208232

209-
// Extract and save
233+
//Extract table data from the PDF document as a JSON string.
210234
string data = extractor.ExtractTableAsJson(stream);
235+
236+
//Save the extracted JSON data into an output file.
211237
File.WriteAllText("Output.json", data, Encoding.UTF8);
212238
}
213239

@@ -219,19 +245,23 @@ using System.IO;
219245
using System.Text;
220246
using Syncfusion.SmartTableExtractor;
221247

248+
//Open the input PDF file as a stream.
222249
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
223250
{
224-
// Initialize the Smart Table Extractor
251+
//Initialize the Smart Table Extractor.
225252
TableExtractor extractor = new TableExtractor();
226253

227-
// Set ConfidenceThreshold
254+
//Configure table extraction options to set the confidence threshold for detection.
228255
TableExtractionOptions options = new TableExtractionOptions();
229256
options.ConfidenceThreshold = 0.6;
230257

258+
//Assign the configured options to the extractor.
231259
extractor.TableExtractionOptions = options;
232260

233-
// Extract and save
261+
//Extract table data from the PDF document as a JSON string.
234262
string data = extractor.ExtractTableAsJson(stream);
263+
264+
//Save the extracted JSON data into an output file.
235265
File.WriteAllText("Output.json", data, Encoding.UTF8);
236266
}
237267

@@ -252,27 +282,29 @@ using System.Text;
252282
using System.Threading;
253283
using Syncfusion.SmartTableExtractor;
254284

285+
//Open the input PDF file as a stream.
255286
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
256287
{
257-
// Declare and configure the extractor and options
288+
//Declare and configure the table extraction options with border-less table detection and confidence threshold.
258289
TableExtractionOptions extractionOptions = new TableExtractionOptions();
259290
extractionOptions.DetectBorderlessTables = true;
260291
extractionOptions.ConfidenceThreshold = 0.6;
261292

293+
//Initialize the Smart Table Extractor and assign the configured options.
262294
TableExtractor tableExtractor = new TableExtractor();
295+
//Assign the configured table extraction options to the extractor.
263296
tableExtractor.TableExtractionOptions = extractionOptions;
264297

265-
// Create cancellation token with timeout
266-
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
298+
//Create a cancellation token with a timeout of 30 seconds to control the async operation.
299+
CancellationTokenSource cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
267300

268-
// Call the async extraction API
301+
//Call the asynchronous extraction API to extract table data as a JSON string.
269302
string data = await tableExtractor.ExtractTableAsJsonAsync(stream, cts.Token);
270303

271-
// Save the extracted data as JSON
304+
//Save the extracted JSON data into an output file.
272305
File.WriteAllText("Output.json", data, Encoding.UTF8);
273306
}
274307

275-
276308
{% endhighlight %}
277309

278310
{% highlight c# tabtitle="C# [Windows-specific]" %}
@@ -282,27 +314,28 @@ using System.Text;
282314
using System.Threading;
283315
using Syncfusion.SmartTableExtractor;
284316

317+
//Open the input PDF file as a stream.
285318
using (FileStream stream = new FileStream("Input.pdf", FileMode.Open, FileAccess.Read))
286319
{
287-
// Declare and configure the extractor and options
320+
//Declare and configure the table extraction options with border-less table detection and confidence threshold.
288321
TableExtractionOptions extractionOptions = new TableExtractionOptions();
289322
extractionOptions.DetectBorderlessTables = true;
290323
extractionOptions.ConfidenceThreshold = 0.6;
291324

325+
//Initialize the Smart Table Extractor and assign the configured options.
292326
TableExtractor tableExtractor = new TableExtractor();
293327
tableExtractor.TableExtractionOptions = extractionOptions;
294328

295-
// Create cancellation token with timeout
296-
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
329+
//Create a cancellation token with a timeout of 30 seconds to control the async operation.
330+
CancellationTokenSource cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
297331

298-
// Call the async extraction API
332+
//Call the asynchronous extraction API to extract table data as a JSON string.
299333
string data = await tableExtractor.ExtractTableAsJsonAsync(stream, cts.Token);
300334

301-
// Save the extracted data as JSON
335+
//Save the extracted JSON data into an output file.
302336
File.WriteAllText("Output.json", data, Encoding.UTF8);
303337
}
304338

305-
306339
{% endhighlight %}
307340

308341
{% endtabs %}

0 commit comments

Comments
 (0)