rtgui_xml.c 10.0 KB
Newer Older
1
#include <rtgui/rtgui_xml.h>
B
bernard.xiong 已提交
2
#include <rtgui/rtgui_system.h>
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291

/* Internal states that the parser can be in at any given time. */
enum {
	STAT_START = 0,         /* starting base state, default state */
	STAT_TEXT,              /* text state */
	STAT_START_TAG,         /* start tag state */
	STAT_START_TAGNAME,     /* start tagname state */
	STAT_START_TAGNAME_END, /* start tagname ending state */
	STAT_END_TAG,           /* end tag state */
	STAT_END_TAGNAME,       /* end tag tagname state */
	STAT_END_TAGNAME_END,   /* end tag tagname ending */
	STAT_EMPTY_TAG,         /* empty tag state */
	STAT_SPACE,             /* linear whitespace state */
	STAT_ATTR_NAME,         /* attribute name state */
	STAT_ATTR_NAME_END,     /* attribute name ending state */
	STAT_ATTR_VAL,          /* attribute value starting state */
	STAT_ATTR_VAL2,         /* attribute value state */
	STAT_ERROR              /* error state */
};

/* character classes that we will match against; This could be expanded if
   need be, however, we are aiming for simple. */
enum {
	CLASS_TYPE_NONE = 0,       /* matches nothing, a base state */
	CLASS_TYPE_LEFT_ANGLE,     /* matches start tag '<' */
	CLASS_TYPE_SLASH,          /* matches forward slash */
	CLASS_TYPE_RIGHT_ANGLE,    /* matches end tag '>' */
	CLASS_TYPE_EQUALS,         /* matches equals sign */
	CLASS_TYPE_QUOTE,          /* matches double-quotes */
	CLASS_TYPE_LETTERS,        /* matches a-zA-Z letters and digits 0-9 */
	CLASS_TYPE_SPACE,          /* matches whitespace */
	CLASS_TYPE_ANY             /* matches any ASCII character; will match all
								above classes */
};

/* xml state transition table */
struct rtgui_xml_state
{
	rt_uint8_t state;
	rt_uint8_t class_type;
	rt_uint8_t next_state;
	rt_uint8_t event;
};

/* Note: States must be grouped in match order AND grouped together! */
static const struct rtgui_xml_state RTGUI_XML_STATES [] = {
   /* [0-2] starting state, which also serves as the default state in case
	  of error */
   { STAT_START,         CLASS_TYPE_SPACE,        STAT_SPACE,             EVENT_NONE },
   { STAT_START,         CLASS_TYPE_LEFT_ANGLE,   STAT_START_TAG,         EVENT_NONE },
   { STAT_START,         CLASS_TYPE_ANY,          STAT_TEXT,              EVENT_COPY },

   /* [3-5] space state handles linear white space */
   { STAT_SPACE,         CLASS_TYPE_SPACE,        STAT_SPACE,             EVENT_NONE },
   { STAT_SPACE,         CLASS_TYPE_LEFT_ANGLE,   STAT_START_TAG,         EVENT_TEXT },
   { STAT_SPACE,         CLASS_TYPE_ANY,          STAT_TEXT,              EVENT_COPY },

   /* [6-8] handle start tag */
   { STAT_START_TAG,     CLASS_TYPE_LETTERS,      STAT_START_TAGNAME,     EVENT_COPY },
   { STAT_START_TAG,     CLASS_TYPE_SLASH,        STAT_END_TAG,           EVENT_COPY },
   /* below added since some individuals get a little carried away with
	  spacing around tag names, e.g. < tag > */
   { STAT_START_TAG,     CLASS_TYPE_SPACE,        STAT_START_TAG,         EVENT_NONE },

   /* [9-12] handle start tag name */
   { STAT_START_TAGNAME, CLASS_TYPE_LETTERS,      STAT_START_TAGNAME,     EVENT_NONE },
   { STAT_START_TAGNAME, CLASS_TYPE_SPACE,        STAT_START_TAGNAME_END, EVENT_START },
   /* below added for tags without any space between tag and ending
	  slash, e.g., <br/> */
   { STAT_START_TAGNAME, CLASS_TYPE_SLASH,        STAT_EMPTY_TAG,         EVENT_END },
   { STAT_START_TAGNAME, CLASS_TYPE_RIGHT_ANGLE,  STAT_START,             EVENT_START },

   /* [13-16] handle start tag name end */
   { STAT_START_TAGNAME_END,  CLASS_TYPE_LETTERS, STAT_ATTR_NAME,         EVENT_COPY },
   /* below added to handle additional space in between attribute value
	  pairs in start tags, e.g., <tag attr="2" attr2="test" > */
   { STAT_START_TAGNAME_END,  CLASS_TYPE_SPACE,   STAT_START_TAGNAME_END, EVENT_NONE },
   { STAT_START_TAGNAME_END,  CLASS_TYPE_RIGHT_ANGLE, STAT_START,         EVENT_START },
   /* below supports tags that are self-closing, e.g., <br /> */
   { STAT_START_TAGNAME_END,  CLASS_TYPE_SLASH,   STAT_EMPTY_TAG,         EVENT_COPY },

   /* [17] handle empty tags, e.g., <br /> */
   { STAT_EMPTY_TAG,     CLASS_TYPE_RIGHT_ANGLE,  STAT_START,             EVENT_END },

   /* [18] handle end tag, e.g., <tag /> */
   { STAT_END_TAG,       CLASS_TYPE_LETTERS,      STAT_END_TAGNAME,       EVENT_NONE },

   /* [19-21] handle end tag name */
   { STAT_END_TAGNAME,   CLASS_TYPE_LETTERS,      STAT_END_TAGNAME,       EVENT_NONE },
   { STAT_END_TAGNAME,   CLASS_TYPE_RIGHT_ANGLE,  STAT_START,             EVENT_END },
   /* below adds support for spaces at the end of an end tag (before
	  closing bracket) */
   { STAT_END_TAGNAME,   CLASS_TYPE_SPACE,        STAT_END_TAGNAME_END,   EVENT_END },

   /* [22] handle ending of end tag name */
   { STAT_END_TAGNAME_END, CLASS_TYPE_SPACE,      STAT_END_TAGNAME_END,   EVENT_NONE },
   { STAT_END_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE,STAT_START,             EVENT_NONE },

   /* [23-25] handle text */
   { STAT_TEXT,          CLASS_TYPE_SPACE,        STAT_SPACE,             EVENT_NONE },
   { STAT_TEXT,          CLASS_TYPE_LEFT_ANGLE,   STAT_START_TAG,         EVENT_TEXT },
   { STAT_TEXT,          CLASS_TYPE_ANY,          STAT_TEXT,              EVENT_NONE },

   /* [26-30] handle attribute names */
   { STAT_ATTR_NAME,     CLASS_TYPE_LETTERS,      STAT_ATTR_NAME,         EVENT_COPY },
   /* below add support for space before the equals sign, e.g, <tag
	  attr ="2"> */
   { STAT_ATTR_NAME,     CLASS_TYPE_SPACE,        STAT_ATTR_NAME_END,     EVENT_NAME },
   { STAT_ATTR_NAME,     CLASS_TYPE_EQUALS,       STAT_ATTR_VAL,          EVENT_NAME },

   /* [31-33] attribute name end */
   { STAT_ATTR_NAME_END, CLASS_TYPE_SPACE,        STAT_ATTR_NAME_END,     EVENT_NONE },
   { STAT_ATTR_NAME_END, CLASS_TYPE_LETTERS,      STAT_ATTR_NAME,         EVENT_COPY },
   { STAT_ATTR_NAME_END, CLASS_TYPE_EQUALS,       STAT_ATTR_VAL,          EVENT_NONE },

   /* [34-35] handle attribute values, initial quote and spaces */
   { STAT_ATTR_VAL,      CLASS_TYPE_QUOTE,        STAT_ATTR_VAL2,         EVENT_NONE },
   /* below handles initial spaces before quoted attribute value */
   { STAT_ATTR_VAL,      CLASS_TYPE_SPACE,        STAT_ATTR_VAL,          EVENT_NONE },

   /* [36-37] handle actual attribute values */
   { STAT_ATTR_VAL2,     CLASS_TYPE_QUOTE,        STAT_START_TAGNAME_END, EVENT_VAL  },
   { STAT_ATTR_VAL2,     CLASS_TYPE_LETTERS,      STAT_ATTR_VAL2,         EVENT_COPY },
   { STAT_ATTR_VAL2,     CLASS_TYPE_SLASH,        STAT_ATTR_VAL2,         EVENT_NONE },

   /* End of table marker */
   { STAT_ERROR,         CLASS_TYPE_NONE,         STAT_ERROR,             EVENT_NONE }
};

struct rtgui_xml
{
	/* event handler */
	rtgui_xml_event_handler_t event_handler;
	void* user;

	char* buffer;				/* xml buffer */
	rt_size_t buffer_size;		/* buffer size */
	rt_size_t position;			/* current position in buffer */
	rt_uint16_t state, event;	/* current state and event */

	rt_bool_t copy;             /* copy text into tmp buffer */
	rt_bool_t halt;				/* halt parsing of document */
};

rtgui_xml_t* rtgui_xml_create(rt_size_t buffer_size, rtgui_xml_event_handler_t handler,
	void* user)
{
	rtgui_xml_t* xml = (rtgui_xml_t*) rtgui_malloc(sizeof(struct rtgui_xml));
	rt_memset(xml, 0, sizeof(rtgui_xml_t));

	xml->event_handler = handler;
	xml->user = user;

	/* create buffer */
	xml->buffer_size = buffer_size;
	xml->buffer = (char*)rtgui_malloc(xml->buffer_size);
	return xml;
}

void rtgui_xml_destroy(rtgui_xml_t* xml)
{
	if(xml)
	{
		rtgui_free(xml->buffer);
		rtgui_free(xml);
	}
}

const char* rtgui_xml_event_str(rt_uint8_t event)
{
	switch(event)
	{
	case EVENT_START:
		return "start tag";
	case EVENT_END:
		return "end tag";
	case EVENT_TEXT:
		return "text";
	case EVENT_NAME:
		return "attr name";
	case EVENT_VAL:
		return "attr val";
	case EVENT_END_DOC:
		return "end document";
	default:
		break;
	}
	return "err";
}

int rtgui_xml_parse(rtgui_xml_t* xml, const char* buf, rt_size_t len)
{
	int i, j, c, match;

#define is_space(ch)	\
	((rt_uint32_t)(ch - 9) < 5u  ||  ch == ' ')
#define is_alpha(ch)	\
	((rt_uint32_t)((ch | 0x20) - 'a') < 26u)
#define is_digit(ch)	\
	((rt_uint32_t)(ch - '0') < 10u)
#define is_letters(ch)	\
	(is_alpha(ch) || is_digit(ch) || (ch == '.'))

	for(i=0; i<len; i++)
	{
		if(xml->halt) break;

		c = buf[i] & 0xff;

		/* search in state table */
		for(j=0, match = 0; RTGUI_XML_STATES[j].state != STAT_ERROR; j++)
		{
			if(RTGUI_XML_STATES[j].state != xml->state)
				continue;

			switch(RTGUI_XML_STATES[j].class_type)
			{
			case CLASS_TYPE_LETTERS:
				match = is_letters(c);
				break;
			case CLASS_TYPE_LEFT_ANGLE:
				match = (c == '<');
				break;
			case CLASS_TYPE_SLASH:
				match = (c == '/');
				break;
			case CLASS_TYPE_RIGHT_ANGLE:
				match = (c == '>');
				break;
			case CLASS_TYPE_EQUALS:
				match = (c == '=');
				break;
			case CLASS_TYPE_QUOTE:
				match = (c == '"');
				break;
			case CLASS_TYPE_SPACE:
				match = is_space(c);
				break;
			case CLASS_TYPE_ANY:
				match = 1;
				break;
			default:
				break;
			}

			/* we matched a character class */
			if(match)
			{
				if(RTGUI_XML_STATES[j].event == EVENT_COPY)
				{
					xml->copy = RT_TRUE;
				}
				else if(RTGUI_XML_STATES[j].event != EVENT_NONE)
				{
					if(xml->copy == RT_TRUE)
					{
						/* basically we are guaranteed never to have an event of
						   type EVENT_COPY or EVENT_NONE here. */
						xml->event = RTGUI_XML_STATES[j].event;
						xml->buffer[xml->position] = 0; /* make a string */

						if(!xml->event_handler(RTGUI_XML_STATES[j].event,
											  xml->buffer, xml->position ,
											  xml->user))
						{
							xml->halt = 1; /* stop parsing from here out */
						}
						xml->position = 0;
						xml->copy = RT_FALSE;
					}
				}
				if(xml->copy == RT_TRUE)
				{
					/* check to see if we have room; one less for trailing
					   nul */
					if(xml->position < xml->buffer_size-1)
					{
						xml->buffer[xml->position] = buf[i];
						xml->position++;
					}
				}
				xml->state = RTGUI_XML_STATES[j].next_state; /* change state */
				break; /* break out of loop though state search */
			}
		}
	}

	return !xml->halt;
}