Handle HTML escape characters.
Convert “&…” codes to corresponding characters.
This commit is contained in:
parent
0046f46bbd
commit
209c9cf911
|
@ -1,7 +1,8 @@
|
|||
open Printf
|
||||
open Http_client
|
||||
open Https_client
|
||||
open Nethtml;;
|
||||
open Nethtml
|
||||
open Netencoding;;
|
||||
|
||||
Ssl.init();
|
||||
Convenience.configure_pipeline
|
||||
|
@ -57,11 +58,18 @@ let get_http_document body_str =
|
|||
let ch = new Netchannels.input_string body_str in
|
||||
Nethtml.parse ch
|
||||
|
||||
let decode_esc_char str =
|
||||
Html.decode ~in_enc:`Enc_utf8
|
||||
~out_enc:`Enc_utf8
|
||||
~entity_base:`Html () str
|
||||
|
||||
let get_http_title body =
|
||||
let ch = new Netchannels.input_string body in
|
||||
let doc = Nethtml.parse ch in
|
||||
get_title_element_from_list doc
|
||||
let title = get_title_element_from_list doc in
|
||||
match title with
|
||||
| Some s -> Some (decode_esc_char s)
|
||||
| None -> None
|
||||
|
||||
|
||||
(* TODO: Log errors *)
|
||||
|
|
|
@ -15,6 +15,7 @@ let test_get_title test_ctxt =
|
|||
let title = SimpleHttp.get_http_title body in
|
||||
assert_equal (Some "Fabien") title
|
||||
|
||||
|
||||
let test_remove_spaces test_ctxt =
|
||||
let str = "
|
||||
|
||||
|
@ -26,11 +27,18 @@ let test_remove_spaces test_ctxt =
|
|||
assert_equal (Some "foo bar baz") cleaned_str
|
||||
|
||||
|
||||
let test_escape_characters text_ctx =
|
||||
let html_str = "I'll" in
|
||||
let expected = "I'll" in
|
||||
assert_equal expected (SimpleHttp.decode_esc_char html_str)
|
||||
|
||||
|
||||
(* Name the test cases and group them together *)
|
||||
let suite =
|
||||
"suite">:::
|
||||
["test get_title">:: test_get_title;
|
||||
"test remove_spaces">:: test_remove_spaces]
|
||||
"test remove_spaces">:: test_remove_spaces;
|
||||
"test HTML character escaping">:: test_escape_characters]
|
||||
|
||||
let () =
|
||||
run_test_tt_main suite
|
||||
|
|
Loading…
Reference in a new issue