209c9cf911
Convert “&…” codes to corresponding characters.
80 lines
1.7 KiB
OCaml
80 lines
1.7 KiB
OCaml
open Printf
|
|
open Http_client
|
|
open Https_client
|
|
open Nethtml
|
|
open Netencoding;;
|
|
|
|
Ssl.init();
|
|
Convenience.configure_pipeline
|
|
(fun p ->
|
|
let ctx = Ssl.create_context Ssl.TLSv1 Ssl.Client_context in
|
|
let tct = https_transport_channel_type ctx in
|
|
p # configure_transport https_cb_id tct
|
|
)
|
|
|
|
|
|
let extract_string_value document =
|
|
match document with
|
|
| Data(s) -> Some s
|
|
| _ -> None
|
|
|
|
|
|
let find_string_value = function
|
|
| Some s -> true
|
|
| None -> false
|
|
|
|
|
|
let rec get_title_element_from_list doc_list =
|
|
|
|
try
|
|
List.find find_string_value (List.map get_title_element doc_list)
|
|
with
|
|
Not_found -> None
|
|
|
|
and get_title_element document =
|
|
|
|
match document with
|
|
| Element("title", args, sub) ->
|
|
(
|
|
let title_candidates = List.map extract_string_value sub in
|
|
try
|
|
List.find find_string_value title_candidates
|
|
with
|
|
Not_found -> None
|
|
)
|
|
| Element(e, args, sub) -> get_title_element_from_list sub
|
|
| Data(s) -> None
|
|
|
|
|
|
let rec print_document document =
|
|
match document with
|
|
| Element(e, args, sub) ->
|
|
printf "Element: %s\n" e;
|
|
List.iter print_document sub
|
|
| Data(s) -> printf "Data: %s\n" s
|
|
|
|
|
|
let get_http_document body_str =
|
|
let ch = new Netchannels.input_string body_str in
|
|
Nethtml.parse ch
|
|
|
|
let decode_esc_char str =
|
|
Html.decode ~in_enc:`Enc_utf8
|
|
~out_enc:`Enc_utf8
|
|
~entity_base:`Html () str
|
|
|
|
let get_http_title body =
|
|
let ch = new Netchannels.input_string body in
|
|
let doc = Nethtml.parse ch in
|
|
let title = get_title_element_from_list doc in
|
|
match title with
|
|
| Some s -> Some (decode_esc_char s)
|
|
| None -> None
|
|
|
|
|
|
(* TODO: Log errors *)
|
|
let get_body url =
|
|
try Convenience.http_get url with
|
|
| Http_error e -> "http error /o\\"
|
|
| Failure f -> "http fail lol"
|