73 lines
1.5 KiB
OCaml
73 lines
1.5 KiB
OCaml
open Printf
|
|
open Nethttp_client
|
|
open Nethtml
|
|
open Netencoding;;
|
|
|
|
|
|
Nettls_gnutls.init();;
|
|
|
|
let extract_string_value document =
|
|
match document with
|
|
| Data(s) -> Some s
|
|
| _ -> None
|
|
|
|
|
|
let find_string_value = function
|
|
| Some s -> true
|
|
| None -> false
|
|
|
|
|
|
let rec get_title_element_from_list doc_list =
|
|
|
|
try
|
|
List.find find_string_value (List.map get_title_element doc_list)
|
|
with
|
|
Not_found -> None
|
|
|
|
and get_title_element document =
|
|
|
|
match document with
|
|
| Element("title", args, sub) ->
|
|
(
|
|
let title_candidates = List.map extract_string_value sub in
|
|
try
|
|
List.find find_string_value title_candidates
|
|
with
|
|
Not_found -> None
|
|
)
|
|
| Element(e, args, sub) -> get_title_element_from_list sub
|
|
| Data(s) -> None
|
|
|
|
|
|
let rec print_document document =
|
|
match document with
|
|
| Element(e, args, sub) ->
|
|
printf "Element: %s\n" e;
|
|
List.iter print_document sub
|
|
| Data(s) -> printf "Data: %s\n" s
|
|
|
|
|
|
let get_http_document body_str =
|
|
let ch = new Netchannels.input_string body_str in
|
|
Nethtml.parse ch
|
|
|
|
let decode_esc_char str =
|
|
Html.decode ~in_enc:`Enc_utf8
|
|
~out_enc:`Enc_utf8
|
|
~entity_base:`Html () str
|
|
|
|
let get_http_title body =
|
|
let ch = new Netchannels.input_string body in
|
|
let doc = Nethtml.parse ch in
|
|
let title = get_title_element_from_list doc in
|
|
match title with
|
|
| Some s -> Some (decode_esc_char s)
|
|
| None -> None
|
|
|
|
|
|
(* TODO: Log errors *)
|
|
let get_body url =
|
|
try Convenience.http_get url with
|
|
| Http_error e -> "http error /o\\"
|
|
| Failure f -> "http fail lol"
|