diff --git a/src/FSharp.Data.Html.Core/FSharp.Data.Html.Core.fsproj b/src/FSharp.Data.Html.Core/FSharp.Data.Html.Core.fsproj index 5f7a92c5d..4d7523229 100644 --- a/src/FSharp.Data.Html.Core/FSharp.Data.Html.Core.fsproj +++ b/src/FSharp.Data.Html.Core/FSharp.Data.Html.Core.fsproj @@ -17,6 +17,7 @@ + diff --git a/src/FSharp.Data.Html.Core/HtmlDocumentOperations.fs b/src/FSharp.Data.Html.Core/HtmlDocumentOperations.fs new file mode 100644 index 000000000..3b22217e7 --- /dev/null +++ b/src/FSharp.Data.Html.Core/HtmlDocumentOperations.fs @@ -0,0 +1,315 @@ +namespace FSharp.Data + +open FSharp.Data +open System.Runtime.CompilerServices + +[] +module private DocumentUtils = + + let inline toLower (s: string) = s.ToLowerInvariant() + let inline getNameSet names = names |> Seq.map toLower |> Set.ofSeq + +// -------------------------------------------------------------------------------------- + +[] +/// Module with operations on HTML documents +module HtmlDocument = + + /// Returns the doctype of the document + let docType doc = + match doc with + | HtmlDocument(docType = docType) -> docType + + //// Gets all of the root elements of the document + let elements doc = + match doc with + | HtmlDocument(elements = elements) -> elements + + /// + /// Returns all of the root elements of the document that match the set of names + /// + /// The set of names to match + /// The given document + let inline elementsNamed names doc = + let nameSet = getNameSet names + + doc |> elements |> List.filter (HtmlNode.name >> nameSet.Contains) + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// + /// If a match is found continues down the tree matching child elements + /// The predicate by which to match the nodes to return + /// The given document + let inline descendants recurseOnMatch predicate doc = + doc + |> elements + |> Seq.collect (HtmlNode.descendantsAndSelf recurseOnMatch predicate) + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// + /// If a match is found continues down the tree matching child elements + /// The set of names to match + /// The given document + let inline descendantsNamed recurseOnMatch names doc = + let nameSet = getNameSet names + + doc |> descendants recurseOnMatch (HtmlNode.name >> nameSet.Contains) + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// + /// If a match is found continues down the tree matching child elements + /// The predicate by which to match the nodes to return + /// The given document + let inline descendantsWithPath recurseOnMatch predicate doc = + doc + |> elements + |> Seq.collect (HtmlNode.descendantsAndSelfWithPath recurseOnMatch predicate) + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// + /// If a match is found continues down the tree matching child elements + /// The set of names to match + /// The given document + let inline descendantsNamedWithPath recurseOnMatch names doc = + let nameSet = getNameSet names + + doc |> descendantsWithPath recurseOnMatch (HtmlNode.name >> nameSet.Contains) + + /// + /// Finds the body element of the given document, + /// this throws an exception if no body element exists. + /// + /// The given document + let inline body (x: HtmlDocument) = + match List.ofSeq <| descendantsNamed false [ "body" ] x with + | [] -> failwith "No element body found!" + | body :: _ -> body + + /// + /// Tries to find the body element of the given document. + /// + /// The given document + let inline tryGetBody (x: HtmlDocument) = + match List.ofSeq <| descendantsNamed false [ "body" ] x with + | [] -> None + | body :: _ -> Some body + + /// + /// Finds the html element of the given document, + /// this throws an exception if no html element exists. + /// + /// The given document + let inline html (x: HtmlDocument) = + match List.ofSeq <| descendantsNamed false [ "html" ] x with + | [] -> failwith "No element html found!" + | html :: _ -> html + + /// + /// Tries to find the html element of the given document. + /// + /// The given document + let inline tryGetHtml (x: HtmlDocument) = + match List.ofSeq <| descendantsNamed false [ "html" ] x with + | [] -> None + | html :: _ -> Some html + + +[] +/// Extension methods with operations on HTML documents +type HtmlDocumentExtensions = + + /// + /// Returns all of the root elements of the current document + /// + /// The given document + [] + static member Elements(doc: HtmlDocument) = HtmlDocument.elements doc + + /// + /// Returns all of the root elements in the current document that match the set of names + /// + /// The given document + /// The set of names to match + [] + static member Elements(doc: HtmlDocument, names: seq) = HtmlDocument.elementsNamed names doc + + /// + /// Returns all of the root elements in the current document that match the name + /// + /// The given document + /// The name to match + [] + static member Elements(doc: HtmlDocument, name: string) = HtmlDocument.elementsNamed [ name ] doc + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// + /// The given document + /// The predicate by which to match the nodes to return + /// If a match is found continues down the tree matching child elements + [] + static member Descendants(doc: HtmlDocument, predicate, recurseOnMatch) = + HtmlDocument.descendants recurseOnMatch predicate doc + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// Recurses on match + /// + /// The given document + /// The predicate by which to match the nodes to return + [] + static member Descendants(doc: HtmlDocument, predicate) = + let recurseOnMatch = true + HtmlDocument.descendants recurseOnMatch predicate doc + + /// Gets all of the descendants of this document + /// Recurses on match + [] + static member Descendants(doc: HtmlDocument) = + let recurseOnMatch = true + let predicate = fun _ -> true + HtmlDocument.descendants recurseOnMatch predicate doc + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// + /// The given document + /// The set of names to match + /// If a match is found continues down the tree matching child elements + [] + static member Descendants(doc: HtmlDocument, names: seq, recurseOnMatch) = + HtmlDocument.descendantsNamed recurseOnMatch names doc + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// Recurses on match + /// + /// The given document + /// The set of names to match + [] + static member Descendants(doc: HtmlDocument, names: seq) = + let recurseOnMatch = true + HtmlDocument.descendantsNamed recurseOnMatch names doc + + /// + /// Finds all of the descendant nodes of this document that match the given name + /// + /// The given document + /// The name to match + /// If a match is found continues down the tree matching child elements + [] + static member Descendants(doc: HtmlDocument, name: string, recurseOnMatch) = + HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc + + /// + /// Finds all of the descendant nodes of this document that match the given name + /// Recurses on match + /// + /// The given document + /// The name to match + [] + static member Descendants(doc: HtmlDocument, name: string) = + let recurseOnMatch = true + HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// + /// The given document + /// The predicate by which to match the nodes to return + /// If a match is found continues down the tree matching child elements + [] + static member DescendantsWithPath(doc: HtmlDocument, predicate, recurseOnMatch) = + HtmlDocument.descendantsWithPath recurseOnMatch predicate doc + + /// + /// Gets all of the descendants of this document that statisfy the given predicate + /// Recurses on match + /// + /// The given document + /// The predicate by which to match the nodes to return + [] + static member DescendantsWithPath(doc: HtmlDocument, predicate) = + let recurseOnMatch = true + HtmlDocument.descendantsWithPath recurseOnMatch predicate doc + + /// Gets all of the descendants of this document + /// Recurses on match + [] + static member DescendantsWithPath(doc: HtmlDocument) = + let recurseOnMatch = true + let predicate = fun _ -> true + HtmlDocument.descendantsWithPath recurseOnMatch predicate doc + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// + /// The given document + /// The set of names to match + /// If a match is found continues down the tree matching child elements + [] + static member DescendantsWithPath(doc: HtmlDocument, names: seq, recurseOnMatch) = + HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc + + /// + /// Finds all of the descendant nodes of this document that match the given set of names + /// Recurses on match + /// + /// The given document + /// The set of names to match + [] + static member DescendantsWithPath(doc: HtmlDocument, names: seq) = + let recurseOnMatch = true + HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc + + /// + /// Finds all of the descendant nodes of this document that match the given name + /// + /// The given document + /// The name to match + /// If a match is found continues down the tree matching child elements + [] + static member DescendantsWithPath(doc: HtmlDocument, name: string, recurseOnMatch) = + HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc + + /// + /// Finds all of the descendant nodes of this document that match the given name + /// Recurses on match + /// + /// The given document + /// The name to match + [] + static member DescendantsWithPath(doc: HtmlDocument, name: string) = + let recurseOnMatch = true + HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc + + /// Finds the body element of the given document, + /// this throws an exception if no body element exists. + [] + static member Body(doc: HtmlDocument) = HtmlDocument.body doc + + /// Tries to find the body element of the given document. + [] + static member TryGetBody(doc: HtmlDocument) = HtmlDocument.tryGetBody doc + + /// Finds the html element of the given document, + /// this throws an exception if no html element exists. + [] + static member Html(doc: HtmlDocument) = HtmlDocument.html doc + + /// Tries to find the html element of the given document. + [] + static member TryGetHtml(doc: HtmlDocument) = HtmlDocument.tryGetHtml doc + +// -------------------------------------------------------------------------------------- + +[] +/// Provides the dynamic operator for getting attribute values from HTML elements +module HtmlExtensions = + + /// Gets the value of an attribute from an HTML element + let (?) (node: HtmlNode) name = HtmlNode.attributeValue name node diff --git a/src/FSharp.Data.Html.Core/HtmlOperations.fs b/src/FSharp.Data.Html.Core/HtmlOperations.fs index 34e12c92b..7a172ca33 100644 --- a/src/FSharp.Data.Html.Core/HtmlOperations.fs +++ b/src/FSharp.Data.Html.Core/HtmlOperations.fs @@ -862,308 +862,3 @@ type HtmlNodeExtensions = /// Returns the direct inner text of the current node [] static member DirectInnerText(n: HtmlNode) = HtmlNode.directInnerText n - -// -------------------------------------------------------------------------------------- - -[] -/// Module with operations on HTML documents -module HtmlDocument = - - /// Returns the doctype of the document - let docType doc = - match doc with - | HtmlDocument(docType = docType) -> docType - - //// Gets all of the root elements of the document - let elements doc = - match doc with - | HtmlDocument(elements = elements) -> elements - - /// - /// Returns all of the root elements of the document that match the set of names - /// - /// The set of names to match - /// The given document - let inline elementsNamed names doc = - let nameSet = getNameSet names - - doc |> elements |> List.filter (HtmlNode.name >> nameSet.Contains) - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// - /// If a match is found continues down the tree matching child elements - /// The predicate by which to match the nodes to return - /// The given document - let inline descendants recurseOnMatch predicate doc = - doc - |> elements - |> Seq.collect (HtmlNode.descendantsAndSelf recurseOnMatch predicate) - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// - /// If a match is found continues down the tree matching child elements - /// The set of names to match - /// The given document - let inline descendantsNamed recurseOnMatch names doc = - let nameSet = getNameSet names - - doc |> descendants recurseOnMatch (HtmlNode.name >> nameSet.Contains) - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// - /// If a match is found continues down the tree matching child elements - /// The predicate by which to match the nodes to return - /// The given document - let inline descendantsWithPath recurseOnMatch predicate doc = - doc - |> elements - |> Seq.collect (HtmlNode.descendantsAndSelfWithPath recurseOnMatch predicate) - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// - /// If a match is found continues down the tree matching child elements - /// The set of names to match - /// The given document - let inline descendantsNamedWithPath recurseOnMatch names doc = - let nameSet = getNameSet names - - doc |> descendantsWithPath recurseOnMatch (HtmlNode.name >> nameSet.Contains) - - /// - /// Finds the body element of the given document, - /// this throws an exception if no body element exists. - /// - /// The given document - let inline body (x: HtmlDocument) = - match List.ofSeq <| descendantsNamed false [ "body" ] x with - | [] -> failwith "No element body found!" - | body :: _ -> body - - /// - /// Tries to find the body element of the given document. - /// - /// The given document - let inline tryGetBody (x: HtmlDocument) = - match List.ofSeq <| descendantsNamed false [ "body" ] x with - | [] -> None - | body :: _ -> Some body - - /// - /// Finds the html element of the given document, - /// this throws an exception if no html element exists. - /// - /// The given document - let inline html (x: HtmlDocument) = - match List.ofSeq <| descendantsNamed false [ "html" ] x with - | [] -> failwith "No element html found!" - | html :: _ -> html - - /// - /// Tries to find the html element of the given document. - /// - /// The given document - let inline tryGetHtml (x: HtmlDocument) = - match List.ofSeq <| descendantsNamed false [ "html" ] x with - | [] -> None - | html :: _ -> Some html - - -[] -/// Extension methods with operations on HTML documents -type HtmlDocumentExtensions = - - /// - /// Returns all of the root elements of the current document - /// - /// The given document - [] - static member Elements(doc: HtmlDocument) = HtmlDocument.elements doc - - /// - /// Returns all of the root elements in the current document that match the set of names - /// - /// The given document - /// The set of names to match - [] - static member Elements(doc: HtmlDocument, names: seq) = HtmlDocument.elementsNamed names doc - - /// - /// Returns all of the root elements in the current document that match the name - /// - /// The given document - /// The name to match - [] - static member Elements(doc: HtmlDocument, name: string) = HtmlDocument.elementsNamed [ name ] doc - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// - /// The given document - /// The predicate by which to match the nodes to return - /// If a match is found continues down the tree matching child elements - [] - static member Descendants(doc: HtmlDocument, predicate, recurseOnMatch) = - HtmlDocument.descendants recurseOnMatch predicate doc - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// Recurses on match - /// - /// The given document - /// The predicate by which to match the nodes to return - [] - static member Descendants(doc: HtmlDocument, predicate) = - let recurseOnMatch = true - HtmlDocument.descendants recurseOnMatch predicate doc - - /// Gets all of the descendants of this document - /// Recurses on match - [] - static member Descendants(doc: HtmlDocument) = - let recurseOnMatch = true - let predicate = fun _ -> true - HtmlDocument.descendants recurseOnMatch predicate doc - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// - /// The given document - /// The set of names to match - /// If a match is found continues down the tree matching child elements - [] - static member Descendants(doc: HtmlDocument, names: seq, recurseOnMatch) = - HtmlDocument.descendantsNamed recurseOnMatch names doc - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// Recurses on match - /// - /// The given document - /// The set of names to match - [] - static member Descendants(doc: HtmlDocument, names: seq) = - let recurseOnMatch = true - HtmlDocument.descendantsNamed recurseOnMatch names doc - - /// - /// Finds all of the descendant nodes of this document that match the given name - /// - /// The given document - /// The name to match - /// If a match is found continues down the tree matching child elements - [] - static member Descendants(doc: HtmlDocument, name: string, recurseOnMatch) = - HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc - - /// - /// Finds all of the descendant nodes of this document that match the given name - /// Recurses on match - /// - /// The given document - /// The name to match - [] - static member Descendants(doc: HtmlDocument, name: string) = - let recurseOnMatch = true - HtmlDocument.descendantsNamed recurseOnMatch [ name ] doc - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// - /// The given document - /// The predicate by which to match the nodes to return - /// If a match is found continues down the tree matching child elements - [] - static member DescendantsWithPath(doc: HtmlDocument, predicate, recurseOnMatch) = - HtmlDocument.descendantsWithPath recurseOnMatch predicate doc - - /// - /// Gets all of the descendants of this document that statisfy the given predicate - /// Recurses on match - /// - /// The given document - /// The predicate by which to match the nodes to return - [] - static member DescendantsWithPath(doc: HtmlDocument, predicate) = - let recurseOnMatch = true - HtmlDocument.descendantsWithPath recurseOnMatch predicate doc - - /// Gets all of the descendants of this document - /// Recurses on match - [] - static member DescendantsWithPath(doc: HtmlDocument) = - let recurseOnMatch = true - let predicate = fun _ -> true - HtmlDocument.descendantsWithPath recurseOnMatch predicate doc - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// - /// The given document - /// The set of names to match - /// If a match is found continues down the tree matching child elements - [] - static member DescendantsWithPath(doc: HtmlDocument, names: seq, recurseOnMatch) = - HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc - - /// - /// Finds all of the descendant nodes of this document that match the given set of names - /// Recurses on match - /// - /// The given document - /// The set of names to match - [] - static member DescendantsWithPath(doc: HtmlDocument, names: seq) = - let recurseOnMatch = true - HtmlDocument.descendantsNamedWithPath recurseOnMatch names doc - - /// - /// Finds all of the descendant nodes of this document that match the given name - /// - /// The given document - /// The name to match - /// If a match is found continues down the tree matching child elements - [] - static member DescendantsWithPath(doc: HtmlDocument, name: string, recurseOnMatch) = - HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc - - /// - /// Finds all of the descendant nodes of this document that match the given name - /// Recurses on match - /// - /// The given document - /// The name to match - [] - static member DescendantsWithPath(doc: HtmlDocument, name: string) = - let recurseOnMatch = true - HtmlDocument.descendantsNamedWithPath recurseOnMatch [ name ] doc - - /// Finds the body element of the given document, - /// this throws an exception if no body element exists. - [] - static member Body(doc: HtmlDocument) = HtmlDocument.body doc - - /// Tries to find the body element of the given document. - [] - static member TryGetBody(doc: HtmlDocument) = HtmlDocument.tryGetBody doc - - /// Finds the html element of the given document, - /// this throws an exception if no html element exists. - [] - static member Html(doc: HtmlDocument) = HtmlDocument.html doc - - /// Tries to find the html element of the given document. - [] - static member TryGetHtml(doc: HtmlDocument) = HtmlDocument.tryGetHtml doc - -// -------------------------------------------------------------------------------------- - -[] -/// Provides the dynamic operator for getting attribute values from HTML elements -module HtmlExtensions = - - /// Gets the value of an attribute from an HTML element - let (?) (node: HtmlNode) name = HtmlNode.attributeValue name node