diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX.sln b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX.sln new file mode 100644 index 00000000..6164b807 --- /dev/null +++ b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.37216.2 d17.14 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Extract_Text_From_PPTX", "Extract_Text_From_PPTX\Extract_Text_From_PPTX.csproj", "{FABFAE4D-0DBF-CD07-6C7B-19144855B5A1}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {FABFAE4D-0DBF-CD07-6C7B-19144855B5A1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FABFAE4D-0DBF-CD07-6C7B-19144855B5A1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FABFAE4D-0DBF-CD07-6C7B-19144855B5A1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FABFAE4D-0DBF-CD07-6C7B-19144855B5A1}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {8780B438-B8DC-4862-828A-A75A14D338CB} + EndGlobalSection +EndGlobal diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Data/Input.pptx b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Data/Input.pptx new file mode 100644 index 00000000..39fc4bff Binary files /dev/null and b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Data/Input.pptx differ diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Extract_Text_From_PPTX.csproj b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Extract_Text_From_PPTX.csproj new file mode 100644 index 00000000..f6e5bd28 --- /dev/null +++ b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Extract_Text_From_PPTX.csproj @@ -0,0 +1,23 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + Always + + + Always + + + + diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/Sample.txt b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/Sample.txt new file mode 100644 index 00000000..004b5085 --- /dev/null +++ b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/Sample.txt @@ -0,0 +1,107 @@ +--- Slide 1 --- +Company History +IMN Solutions PVT LTD is the software company, established in 1987, by George Milton. The company has been listed as the trusted partner for many high-profile organizations since 1988 and got awards for quality production from reputed organizations. +The company product acquired the MCY corporation for 20 billion dollars and became the top revenue maker for the year 2015. +The company is participating in top open source projects in automation industry. +IMN + +--- Slide 2 --- +Product Overview +Adventure Works Cycles, the fictitious company on which the Adventure Works sample databases are based, is a large, multinational manufacturing company. +The company manufactures and sells metal and composite bicycles to North American, European and Asian commercial markets. While its base operation is located in Bothell, Washington with 290 employees, several regional sales teams are located throughout their market base. +In 2000, Adventure Works Cycles bought a small manufacturing plant, Importadores Neptuno, located in Mexico. Importadores Neptuno manufactures several critical subcomponents for the Adventure Works Cycles production line. +These subcomponents are shipped to the Bothell location for final product assembly. In 2001, Importadores Neptuno, became the sole manufacturer and distributor of the touring bicycle productivity group. + +--- Slide 3 --- +Target Vs PERFORMANCE +Month +Product A +Product B +Product C +Product D +Product E +Product F +Average +Target +Jan +20000 +4200 +8000 +12000 +4700 +15000 +10650 +35000 +Feb +8300 +19000 +21000 +15230 +7230 +1800 +12093 +18000 +Mar +4600 +9000 +7500 +8000 +30000 +22000 +13517 +13200 +Apr +3530 +13430 +3550 +10670 +27860 +5414 +10742 +50000 +May +10293 +23760 +10378 +24857 +12104 +21350 +17124 +25460 +Jun +9070 +8218 +23480 +20492 +9103 +12300 +13777 +21600 +Jul +23500 +19230 +87390 +25030 +28000 +11890 +32507 +37800 +Aug +39000 +30301 +78356 +21121 +30443 +23230 +37075 +40900 +Sep +14340 +19403 +89024 +1230 +12561 +29000 +27593 +29800 + diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/gitkeep.txt b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/gitkeep.txt new file mode 100644 index 00000000..5f282702 --- /dev/null +++ b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Output/gitkeep.txt @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Program.cs b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Program.cs new file mode 100644 index 00000000..8f41ebcf --- /dev/null +++ b/PowerPoint-Presentation/Extract_Text_From_PPTX/.NET/Extract_Text_From_PPTX/Program.cs @@ -0,0 +1,128 @@ +using Syncfusion.Presentation; +using System.Text; + +namespace Extract_Text_From_PPTX +{ + class Program + { + + static void Main(string[] args) + { + //Load the PowerPoint presentation + IPresentation presentation = Presentation.Open("../../../Data/Input.pptx"); + //Text collection to store the extracted text + StringBuilder textBuilder = new StringBuilder(); + // Extract text from all slides + for (int i = 0; i < presentation.Slides.Count; i++) + { + ISlide slide = presentation.Slides[i]; + textBuilder.AppendLine($"--- Slide {i + 1} ---"); + + // Extract text from all shapes in the slide + ExtractText(slide.Shapes as IShapes, textBuilder); + + // Extract text from the slide notes body + if (slide.NotesSlide?.NotesTextBody != null) + { + foreach (IParagraph paragraph in slide.NotesSlide.NotesTextBody.Paragraphs) + { + textBuilder.AppendLine(paragraph.Text); + } + } + + // Extract text from the slide notes shapes + if (slide.NotesSlide?.Shapes != null) + { + ExtractText(slide.NotesSlide.Shapes as IShapes, textBuilder); + } + + // Extract text from the layout slide shapes + if (slide.LayoutSlide?.Shapes != null) + { + ExtractText(slide.LayoutSlide.Shapes as IShapes, textBuilder, true); + + // Extract text from the master slide shapes + if (slide.LayoutSlide.MasterSlide?.Shapes != null) + { + ExtractText(slide.LayoutSlide.MasterSlide.Shapes as IShapes, textBuilder, true); + } + } + + textBuilder.AppendLine(); + } + string extractedText = textBuilder.ToString(); + //Write the text collection to a text file + System.IO.File.WriteAllText("../../../Output/Sample.txt", extractedText); + //Dispose the presentation instance + presentation.Close(); + } + public static void ExtractText(IShapes shapes, StringBuilder textBuilder, bool ignorePlaceHolder = false) + { + foreach (IShape shape in shapes) + { + if (shape is ITable) + ExtractTextInTable(shape, textBuilder); + else if (shape is ISmartArt) + ExtractTextInSmartArt(shape, textBuilder); + else if (shape is IGroupShape) + ExtractText((shape as IGroupShape).Shapes, textBuilder, ignorePlaceHolder); + else + ExtractTextInShape(shape, textBuilder, ignorePlaceHolder); + } + } + + public static void ExtractTextInSmartArt(IShape shape, StringBuilder textBuilder) + { + ISmartArt smartArt = shape as ISmartArt; + if (smartArt == null) + return; + + foreach (ISmartArtNode node in smartArt.Nodes) + { + ExtractTextInSmartArtNode(node, textBuilder); + } + } + public static void ExtractTextInShape(IShape shape, StringBuilder textBuilder, bool ignorePlaceHolder) + { + if (shape.TextBody == null || (ignorePlaceHolder && (shape as ISlideItem).SlideItemType == SlideItemType.Placeholder)) + return; + + foreach (IParagraph paragraph in shape.TextBody.Paragraphs) + { + textBuilder.AppendLine(paragraph.Text); + } + } + + public static void ExtractTextInTable(IShape shape, StringBuilder textBuilder) + { + ITable table = shape as ITable; + if (table == null) + return; + + foreach (IRow row in table.Rows) + { + foreach (ICell cell in row.Cells) + { + textBuilder.AppendLine(cell.TextBody.Text); + } + } + } + + public static void ExtractTextInSmartArtNode(ISmartArtNode node, StringBuilder textBuilder) + { + if (node.TextBody != null) + { + foreach (IParagraph paragraph in node.TextBody.Paragraphs) + { + textBuilder.AppendLine(paragraph.Text); + } + } + + // Recursively extract text from child nodes + foreach (ISmartArtNode childNode in node.ChildNodes) + { + ExtractTextInSmartArtNode(childNode, textBuilder); + } + } + } +}