2014-03-01 21:50:46 +01:00
|
|
|
open Printf
|
2014-03-01 22:32:28 +01:00
|
|
|
open Http_client
|
2014-03-01 21:50:46 +01:00
|
|
|
open Https_client
|
2014-12-21 17:10:22 +01:00
|
|
|
open Nethtml
|
|
|
|
open Netencoding;;
|
2014-03-01 21:50:46 +01:00
|
|
|
|
|
|
|
Ssl.init();
|
2014-03-01 22:32:28 +01:00
|
|
|
Convenience.configure_pipeline
|
2014-03-01 21:50:46 +01:00
|
|
|
(fun p ->
|
|
|
|
let ctx = Ssl.create_context Ssl.TLSv1 Ssl.Client_context in
|
2014-03-01 22:32:28 +01:00
|
|
|
let tct = https_transport_channel_type ctx in
|
|
|
|
p # configure_transport https_cb_id tct
|
2014-03-01 21:50:46 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
let extract_string_value document =
|
|
|
|
match document with
|
2014-03-03 23:33:21 +01:00
|
|
|
| Data(s) -> Some s
|
|
|
|
| _ -> None
|
2014-03-01 21:50:46 +01:00
|
|
|
|
|
|
|
|
2014-03-03 23:33:21 +01:00
|
|
|
let find_string_value = function
|
2014-12-21 17:10:22 +01:00
|
|
|
| Some s -> true
|
2014-03-03 23:33:21 +01:00
|
|
|
| None -> false
|
|
|
|
|
|
|
|
|
|
|
|
let rec get_title_element_from_list doc_list =
|
|
|
|
|
|
|
|
try
|
|
|
|
List.find find_string_value (List.map get_title_element doc_list)
|
|
|
|
with
|
|
|
|
Not_found -> None
|
|
|
|
|
|
|
|
and get_title_element document =
|
|
|
|
|
2014-03-01 21:50:46 +01:00
|
|
|
match document with
|
2014-03-03 23:33:21 +01:00
|
|
|
| Element("title", args, sub) ->
|
|
|
|
(
|
|
|
|
let title_candidates = List.map extract_string_value sub in
|
|
|
|
try
|
|
|
|
List.find find_string_value title_candidates
|
|
|
|
with
|
|
|
|
Not_found -> None
|
|
|
|
)
|
|
|
|
| Element(e, args, sub) -> get_title_element_from_list sub
|
|
|
|
| Data(s) -> None
|
|
|
|
|
|
|
|
|
|
|
|
let rec print_document document =
|
|
|
|
match document with
|
|
|
|
| Element(e, args, sub) ->
|
|
|
|
printf "Element: %s\n" e;
|
|
|
|
List.iter print_document sub
|
|
|
|
| Data(s) -> printf "Data: %s\n" s
|
|
|
|
|
|
|
|
|
|
|
|
let get_http_document body_str =
|
|
|
|
let ch = new Netchannels.input_string body_str in
|
|
|
|
Nethtml.parse ch
|
2014-03-01 21:50:46 +01:00
|
|
|
|
2014-12-21 17:10:22 +01:00
|
|
|
let decode_esc_char str =
|
|
|
|
Html.decode ~in_enc:`Enc_utf8
|
|
|
|
~out_enc:`Enc_utf8
|
|
|
|
~entity_base:`Html () str
|
2014-03-01 21:50:46 +01:00
|
|
|
|
|
|
|
let get_http_title body =
|
|
|
|
let ch = new Netchannels.input_string body in
|
|
|
|
let doc = Nethtml.parse ch in
|
2014-12-21 17:10:22 +01:00
|
|
|
let title = get_title_element_from_list doc in
|
|
|
|
match title with
|
|
|
|
| Some s -> Some (decode_esc_char s)
|
|
|
|
| None -> None
|
2014-03-01 21:50:46 +01:00
|
|
|
|
|
|
|
|
|
|
|
(* TODO: Log errors *)
|
|
|
|
let get_body url =
|
2014-03-01 22:32:28 +01:00
|
|
|
try Convenience.http_get url with
|
|
|
|
| Http_error e -> "http error /o\\"
|
2014-03-01 21:50:46 +01:00
|
|
|
| Failure f -> "http fail lol"
|