7#ifndef LEXBOR_HTML_TOKENIZER_STATE_H
8#define LEXBOR_HTML_TOKENIZER_STATE_H
16#define lxb_html_tokenizer_state_begin_set(tkz, v_data) \
19#define lxb_html_tokenizer_state_append_data_m(tkz, v_data) \
21 if (lxb_html_tokenizer_temp_append_data(tkz, v_data)) { \
27#define lxb_html_tokenizer_state_append_m(tkz, v_data, size) \
29 if (lxb_html_tokenizer_temp_append(tkz, (const lxb_char_t *) (v_data), \
37#define lxb_html_tokenizer_state_append_replace_m(tkz) \
39 if (lxb_html_tokenizer_temp_append(tkz, \
40 lexbor_str_res_ansi_replacement_character, \
41 sizeof(lexbor_str_res_ansi_replacement_character) - 1))\
48#define lxb_html_tokenizer_state_set_tag_m(tkz, _start, _end) \
50 const lxb_tag_data_t *tag; \
51 tag = lxb_tag_append_lower(tkz->tags, (_start), (_end) - (_start)); \
53 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \
56 tkz->token->tag_id = tag->tag_id; \
60#define lxb_html_tokenizer_state_set_name_m(tkz) \
62 lxb_dom_attr_data_t *data; \
63 data = lxb_dom_attr_local_name_append(tkz->attrs, tkz->start, \
64 tkz->pos - tkz->start); \
66 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \
69 tkz->token->attr_last->name = data; \
73#define lxb_html_tokenizer_state_set_value_m(tkz) \
75 lxb_html_token_attr_t *attr = tkz->token->attr_last; \
77 attr->value_size = (size_t) (tkz->pos - tkz->start); \
79 attr->value = lexbor_mraw_alloc(tkz->attrs_mraw, attr->value_size + 1);\
80 if (attr->value == NULL) { \
81 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \
84 memcpy(attr->value, tkz->start, attr->value_size); \
85 attr->value[attr->value_size] = 0x00; \
89#define lxb_html_tokenizer_state_token_set_begin(tkz, v_begin) \
91 tkz->pos = tkz->start; \
92 tkz->token->begin = v_begin; \
93 tkz->token->line = tkz->current_line; \
94 tkz->token->column = tkz->current_column; \
98#define lxb_html_tokenizer_state_token_set_end(tkz, v_end) \
99 (tkz->token->end = v_end)
101#define lxb_html_tokenizer_state_token_set_end_down(tkz, v_end, offset) \
103 tkz->token->end = lexbor_in_node_pos_down(tkz->incoming_node, NULL, \
108#define lxb_html_tokenizer_state_token_set_end_oef(tkz) \
109 (tkz->token->end = tkz->last)
111#define lxb_html_tokenizer_state_token_attr_add_m(tkz, attr, v_return) \
113 attr = lxb_html_token_attr_append(tkz->token, tkz->dobj_token_attr); \
114 if (attr == NULL) { \
115 tkz->status = LXB_STATUS_ERROR_MEMORY_ALLOCATION; \
121#define lxb_html_tokenizer_state_token_attr_set_name_begin(tkz, v_begin) \
123 tkz->pos = tkz->start; \
124 tkz->token->attr_last->name_begin = v_begin; \
128#define lxb_html_tokenizer_state_token_attr_set_name_end(tkz, v_end) \
129 (tkz->token->attr_last->name_end = v_end)
131#define lxb_html_tokenizer_state_token_attr_set_name_end_oef(tkz) \
132 (tkz->token->attr_last->name_end = tkz->last)
134#define lxb_html_tokenizer_state_token_attr_set_value_begin(tkz, v_begin) \
136 tkz->pos = tkz->start; \
137 tkz->token->attr_last->value_begin = v_begin; \
141#define lxb_html_tokenizer_state_token_attr_set_value_end(tkz, v_end) \
142 (tkz->token->attr_last->value_end = v_end)
144#define lxb_html_tokenizer_state_token_attr_set_value_end_oef(tkz) \
145 (tkz->token->attr_last->value_end = tkz->last)
147#define _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \
148 tkz->token = tkz->callback_token_done(tkz, tkz->token, \
149 tkz->callback_token_ctx); \
150 if (tkz->token == NULL) { \
151 if (tkz->status == LXB_STATUS_OK) { \
152 tkz->status = LXB_STATUS_ERROR; \
157#define lxb_html_tokenizer_state_token_done_m(tkz, v_end) \
159 if (tkz->token->begin != tkz->token->end) { \
160 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \
162 lxb_html_token_clean(tkz->token); \
163 tkz->pos = tkz->start; \
167#define lxb_html_tokenizer_state_token_done_wo_check_m(tkz, v_end) \
169 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \
170 lxb_html_token_clean(tkz->token); \
174#define lxb_html_tokenizer_state_set_text(tkz) \
176 tkz->token->text_start = tkz->start; \
177 tkz->token->text_end = tkz->pos; \
181#define lxb_html_tokenizer_state_token_emit_text_not_empty_m(tkz, v_end) \
183 if (tkz->token->begin != tkz->token->end) { \
184 tkz->token->tag_id = LXB_TAG__TEXT; \
186 lxb_html_tokenizer_state_set_text(tkz); \
187 _lxb_html_tokenizer_state_token_done_m(tkz, v_end) \
188 lxb_html_token_clean(tkz->token); \
struct lxb_html_tokenizer lxb_html_tokenizer_t
LXB_API const lxb_char_t * lxb_html_tokenizer_state_self_closing_start_tag(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
LXB_API const lxb_char_t * lxb_html_tokenizer_state_char_ref(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
LXB_API const lxb_char_t * lxb_html_tokenizer_state_data_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
LXB_API const lxb_char_t * lxb_html_tokenizer_state_cr(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
LXB_API const lxb_char_t * lxb_html_tokenizer_state_before_attribute_name(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
LXB_API const lxb_char_t * lxb_html_tokenizer_state_plaintext_before(lxb_html_tokenizer_t *tkz, const lxb_char_t *data, const lxb_char_t *end)
unsigned const char * end