Mercurial > minori
comparison dep/pugixml/src/pugixml.cpp @ 55:d10b6c6b432e
add xml lib, we will need to use it eventually
| author | Paper <mrpapersonic@gmail.com> |
|---|---|
| date | Tue, 26 Sep 2023 12:37:08 -0400 |
| parents | |
| children | a45edd073f9e |
comparison
equal
deleted
inserted
replaced
| 54:466ac9870df9 | 55:d10b6c6b432e |
|---|---|
| 1 /** | |
| 2 * pugixml parser - version 1.13 | |
| 3 * -------------------------------------------------------- | |
| 4 * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) | |
| 5 * Report bugs and download new versions at https://pugixml.org/ | |
| 6 * | |
| 7 * This library is distributed under the MIT License. See notice at the end | |
| 8 * of this file. | |
| 9 * | |
| 10 * This work is based on the pugxml parser, which is: | |
| 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) | |
| 12 */ | |
| 13 | |
| 14 #ifndef SOURCE_PUGIXML_CPP | |
| 15 #define SOURCE_PUGIXML_CPP | |
| 16 | |
| 17 #include "pugixml.hpp" | |
| 18 | |
| 19 #include <stdlib.h> | |
| 20 #include <stdio.h> | |
| 21 #include <string.h> | |
| 22 #include <assert.h> | |
| 23 #include <limits.h> | |
| 24 | |
| 25 #ifdef PUGIXML_WCHAR_MODE | |
| 26 # include <wchar.h> | |
| 27 #endif | |
| 28 | |
| 29 #ifndef PUGIXML_NO_XPATH | |
| 30 # include <math.h> | |
| 31 # include <float.h> | |
| 32 #endif | |
| 33 | |
| 34 #ifndef PUGIXML_NO_STL | |
| 35 # include <istream> | |
| 36 # include <ostream> | |
| 37 # include <string> | |
| 38 #endif | |
| 39 | |
| 40 // For placement new | |
| 41 #include <new> | |
| 42 | |
| 43 #ifdef _MSC_VER | |
| 44 # pragma warning(push) | |
| 45 # pragma warning(disable: 4127) // conditional expression is constant | |
| 46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) | |
| 47 # pragma warning(disable: 4702) // unreachable code | |
| 48 # pragma warning(disable: 4996) // this function or variable may be unsafe | |
| 49 #endif | |
| 50 | |
| 51 #if defined(_MSC_VER) && defined(__c2__) | |
| 52 # pragma clang diagnostic push | |
| 53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe | |
| 54 #endif | |
| 55 | |
| 56 #ifdef __INTEL_COMPILER | |
| 57 # pragma warning(disable: 177) // function was declared but never referenced | |
| 58 # pragma warning(disable: 279) // controlling expression is constant | |
| 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" | |
| 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type | |
| 61 #endif | |
| 62 | |
| 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) | |
| 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away | |
| 65 #endif | |
| 66 | |
| 67 #ifdef __BORLANDC__ | |
| 68 # pragma option push | |
| 69 # pragma warn -8008 // condition is always false | |
| 70 # pragma warn -8066 // unreachable code | |
| 71 #endif | |
| 72 | |
| 73 #ifdef __SNC__ | |
| 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug | |
| 75 # pragma diag_suppress=178 // function was declared but never referenced | |
| 76 # pragma diag_suppress=237 // controlling expression is constant | |
| 77 #endif | |
| 78 | |
| 79 #ifdef __TI_COMPILER_VERSION__ | |
| 80 # pragma diag_suppress 179 // function was declared but never referenced | |
| 81 #endif | |
| 82 | |
| 83 // Inlining controls | |
| 84 #if defined(_MSC_VER) && _MSC_VER >= 1300 | |
| 85 # define PUGI__NO_INLINE __declspec(noinline) | |
| 86 #elif defined(__GNUC__) | |
| 87 # define PUGI__NO_INLINE __attribute__((noinline)) | |
| 88 #else | |
| 89 # define PUGI__NO_INLINE | |
| 90 #endif | |
| 91 | |
| 92 // Branch weight controls | |
| 93 #if defined(__GNUC__) && !defined(__c2__) | |
| 94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) | |
| 95 #else | |
| 96 # define PUGI__UNLIKELY(cond) (cond) | |
| 97 #endif | |
| 98 | |
| 99 // Simple static assertion | |
| 100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } | |
| 101 | |
| 102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack | |
| 103 #ifdef __DMC__ | |
| 104 # define PUGI__DMC_VOLATILE volatile | |
| 105 #else | |
| 106 # define PUGI__DMC_VOLATILE | |
| 107 #endif | |
| 108 | |
| 109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings | |
| 110 #if defined(__clang__) && defined(__has_attribute) | |
| 111 # if __has_attribute(no_sanitize) | |
| 112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) | |
| 113 # else | |
| 114 # define PUGI__UNSIGNED_OVERFLOW | |
| 115 # endif | |
| 116 #else | |
| 117 # define PUGI__UNSIGNED_OVERFLOW | |
| 118 #endif | |
| 119 | |
| 120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) | |
| 121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) | |
| 122 using std::memcpy; | |
| 123 using std::memmove; | |
| 124 using std::memset; | |
| 125 #endif | |
| 126 | |
| 127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations | |
| 128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) | |
| 129 # define LLONG_MIN (-LLONG_MAX - 1LL) | |
| 130 # define LLONG_MAX __LONG_LONG_MAX__ | |
| 131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) | |
| 132 #endif | |
| 133 | |
| 134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features | |
| 135 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE) | |
| 136 # define PUGI__MSVC_CRT_VERSION _MSC_VER | |
| 137 #elif defined(_WIN32_WCE) | |
| 138 # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1 | |
| 139 #endif | |
| 140 | |
| 141 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. | |
| 142 #if __cplusplus >= 201103 | |
| 143 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) | |
| 144 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
| 145 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) | |
| 146 #else | |
| 147 # define PUGI__SNPRINTF sprintf | |
| 148 #endif | |
| 149 | |
| 150 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. | |
| 151 #ifdef PUGIXML_HEADER_ONLY | |
| 152 # define PUGI__NS_BEGIN namespace pugi { namespace impl { | |
| 153 # define PUGI__NS_END } } | |
| 154 # define PUGI__FN inline | |
| 155 # define PUGI__FN_NO_INLINE inline | |
| 156 #else | |
| 157 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces | |
| 158 # define PUGI__NS_BEGIN namespace pugi { namespace impl { | |
| 159 # define PUGI__NS_END } } | |
| 160 # else | |
| 161 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { | |
| 162 # define PUGI__NS_END } } } | |
| 163 # endif | |
| 164 # define PUGI__FN | |
| 165 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE | |
| 166 #endif | |
| 167 | |
| 168 // uintptr_t | |
| 169 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) | |
| 170 namespace pugi | |
| 171 { | |
| 172 # ifndef _UINTPTR_T_DEFINED | |
| 173 typedef size_t uintptr_t; | |
| 174 # endif | |
| 175 | |
| 176 typedef unsigned __int8 uint8_t; | |
| 177 typedef unsigned __int16 uint16_t; | |
| 178 typedef unsigned __int32 uint32_t; | |
| 179 } | |
| 180 #else | |
| 181 # include <stdint.h> | |
| 182 #endif | |
| 183 | |
| 184 // Memory allocation | |
| 185 PUGI__NS_BEGIN | |
| 186 PUGI__FN void* default_allocate(size_t size) | |
| 187 { | |
| 188 return malloc(size); | |
| 189 } | |
| 190 | |
| 191 PUGI__FN void default_deallocate(void* ptr) | |
| 192 { | |
| 193 free(ptr); | |
| 194 } | |
| 195 | |
| 196 template <typename T> | |
| 197 struct xml_memory_management_function_storage | |
| 198 { | |
| 199 static allocation_function allocate; | |
| 200 static deallocation_function deallocate; | |
| 201 }; | |
| 202 | |
| 203 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them | |
| 204 // Without a template<> we'll get multiple definitions of the same static | |
| 205 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; | |
| 206 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; | |
| 207 | |
| 208 typedef xml_memory_management_function_storage<int> xml_memory; | |
| 209 PUGI__NS_END | |
| 210 | |
| 211 // String utilities | |
| 212 PUGI__NS_BEGIN | |
| 213 // Get string length | |
| 214 PUGI__FN size_t strlength(const char_t* s) | |
| 215 { | |
| 216 assert(s); | |
| 217 | |
| 218 #ifdef PUGIXML_WCHAR_MODE | |
| 219 return wcslen(s); | |
| 220 #else | |
| 221 return strlen(s); | |
| 222 #endif | |
| 223 } | |
| 224 | |
| 225 // Compare two strings | |
| 226 PUGI__FN bool strequal(const char_t* src, const char_t* dst) | |
| 227 { | |
| 228 assert(src && dst); | |
| 229 | |
| 230 #ifdef PUGIXML_WCHAR_MODE | |
| 231 return wcscmp(src, dst) == 0; | |
| 232 #else | |
| 233 return strcmp(src, dst) == 0; | |
| 234 #endif | |
| 235 } | |
| 236 | |
| 237 // Compare lhs with [rhs_begin, rhs_end) | |
| 238 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) | |
| 239 { | |
| 240 for (size_t i = 0; i < count; ++i) | |
| 241 if (lhs[i] != rhs[i]) | |
| 242 return false; | |
| 243 | |
| 244 return lhs[count] == 0; | |
| 245 } | |
| 246 | |
| 247 // Get length of wide string, even if CRT lacks wide character support | |
| 248 PUGI__FN size_t strlength_wide(const wchar_t* s) | |
| 249 { | |
| 250 assert(s); | |
| 251 | |
| 252 #ifdef PUGIXML_WCHAR_MODE | |
| 253 return wcslen(s); | |
| 254 #else | |
| 255 const wchar_t* end = s; | |
| 256 while (*end) end++; | |
| 257 return static_cast<size_t>(end - s); | |
| 258 #endif | |
| 259 } | |
| 260 PUGI__NS_END | |
| 261 | |
| 262 // auto_ptr-like object for exception recovery | |
| 263 PUGI__NS_BEGIN | |
| 264 template <typename T> struct auto_deleter | |
| 265 { | |
| 266 typedef void (*D)(T*); | |
| 267 | |
| 268 T* data; | |
| 269 D deleter; | |
| 270 | |
| 271 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) | |
| 272 { | |
| 273 } | |
| 274 | |
| 275 ~auto_deleter() | |
| 276 { | |
| 277 if (data) deleter(data); | |
| 278 } | |
| 279 | |
| 280 T* release() | |
| 281 { | |
| 282 T* result = data; | |
| 283 data = 0; | |
| 284 return result; | |
| 285 } | |
| 286 }; | |
| 287 PUGI__NS_END | |
| 288 | |
| 289 #ifdef PUGIXML_COMPACT | |
| 290 PUGI__NS_BEGIN | |
| 291 class compact_hash_table | |
| 292 { | |
| 293 public: | |
| 294 compact_hash_table(): _items(0), _capacity(0), _count(0) | |
| 295 { | |
| 296 } | |
| 297 | |
| 298 void clear() | |
| 299 { | |
| 300 if (_items) | |
| 301 { | |
| 302 xml_memory::deallocate(_items); | |
| 303 _items = 0; | |
| 304 _capacity = 0; | |
| 305 _count = 0; | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 void* find(const void* key) | |
| 310 { | |
| 311 if (_capacity == 0) return 0; | |
| 312 | |
| 313 item_t* item = get_item(key); | |
| 314 assert(item); | |
| 315 assert(item->key == key || (item->key == 0 && item->value == 0)); | |
| 316 | |
| 317 return item->value; | |
| 318 } | |
| 319 | |
| 320 void insert(const void* key, void* value) | |
| 321 { | |
| 322 assert(_capacity != 0 && _count < _capacity - _capacity / 4); | |
| 323 | |
| 324 item_t* item = get_item(key); | |
| 325 assert(item); | |
| 326 | |
| 327 if (item->key == 0) | |
| 328 { | |
| 329 _count++; | |
| 330 item->key = key; | |
| 331 } | |
| 332 | |
| 333 item->value = value; | |
| 334 } | |
| 335 | |
| 336 bool reserve(size_t extra = 16) | |
| 337 { | |
| 338 if (_count + extra >= _capacity - _capacity / 4) | |
| 339 return rehash(_count + extra); | |
| 340 | |
| 341 return true; | |
| 342 } | |
| 343 | |
| 344 private: | |
| 345 struct item_t | |
| 346 { | |
| 347 const void* key; | |
| 348 void* value; | |
| 349 }; | |
| 350 | |
| 351 item_t* _items; | |
| 352 size_t _capacity; | |
| 353 | |
| 354 size_t _count; | |
| 355 | |
| 356 bool rehash(size_t count); | |
| 357 | |
| 358 item_t* get_item(const void* key) | |
| 359 { | |
| 360 assert(key); | |
| 361 assert(_capacity > 0); | |
| 362 | |
| 363 size_t hashmod = _capacity - 1; | |
| 364 size_t bucket = hash(key) & hashmod; | |
| 365 | |
| 366 for (size_t probe = 0; probe <= hashmod; ++probe) | |
| 367 { | |
| 368 item_t& probe_item = _items[bucket]; | |
| 369 | |
| 370 if (probe_item.key == key || probe_item.key == 0) | |
| 371 return &probe_item; | |
| 372 | |
| 373 // hash collision, quadratic probing | |
| 374 bucket = (bucket + probe + 1) & hashmod; | |
| 375 } | |
| 376 | |
| 377 assert(false && "Hash table is full"); // unreachable | |
| 378 return 0; | |
| 379 } | |
| 380 | |
| 381 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) | |
| 382 { | |
| 383 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff); | |
| 384 | |
| 385 // MurmurHash3 32-bit finalizer | |
| 386 h ^= h >> 16; | |
| 387 h *= 0x85ebca6bu; | |
| 388 h ^= h >> 13; | |
| 389 h *= 0xc2b2ae35u; | |
| 390 h ^= h >> 16; | |
| 391 | |
| 392 return h; | |
| 393 } | |
| 394 }; | |
| 395 | |
| 396 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) | |
| 397 { | |
| 398 size_t capacity = 32; | |
| 399 while (count >= capacity - capacity / 4) | |
| 400 capacity *= 2; | |
| 401 | |
| 402 compact_hash_table rt; | |
| 403 rt._capacity = capacity; | |
| 404 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); | |
| 405 | |
| 406 if (!rt._items) | |
| 407 return false; | |
| 408 | |
| 409 memset(rt._items, 0, sizeof(item_t) * capacity); | |
| 410 | |
| 411 for (size_t i = 0; i < _capacity; ++i) | |
| 412 if (_items[i].key) | |
| 413 rt.insert(_items[i].key, _items[i].value); | |
| 414 | |
| 415 if (_items) | |
| 416 xml_memory::deallocate(_items); | |
| 417 | |
| 418 _capacity = capacity; | |
| 419 _items = rt._items; | |
| 420 | |
| 421 assert(_count == rt._count); | |
| 422 | |
| 423 return true; | |
| 424 } | |
| 425 | |
| 426 PUGI__NS_END | |
| 427 #endif | |
| 428 | |
| 429 PUGI__NS_BEGIN | |
| 430 #ifdef PUGIXML_COMPACT | |
| 431 static const uintptr_t xml_memory_block_alignment = 4; | |
| 432 #else | |
| 433 static const uintptr_t xml_memory_block_alignment = sizeof(void*); | |
| 434 #endif | |
| 435 | |
| 436 // extra metadata bits | |
| 437 static const uintptr_t xml_memory_page_contents_shared_mask = 64; | |
| 438 static const uintptr_t xml_memory_page_name_allocated_mask = 32; | |
| 439 static const uintptr_t xml_memory_page_value_allocated_mask = 16; | |
| 440 static const uintptr_t xml_memory_page_type_mask = 15; | |
| 441 | |
| 442 // combined masks for string uniqueness | |
| 443 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; | |
| 444 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; | |
| 445 | |
| 446 #ifdef PUGIXML_COMPACT | |
| 447 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused | |
| 448 #define PUGI__GETPAGE_IMPL(header) (header).get_page() | |
| 449 #else | |
| 450 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) | |
| 451 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
| 452 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) | |
| 453 #endif | |
| 454 | |
| 455 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) | |
| 456 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) | |
| 457 | |
| 458 struct xml_allocator; | |
| 459 | |
| 460 struct xml_memory_page | |
| 461 { | |
| 462 static xml_memory_page* construct(void* memory) | |
| 463 { | |
| 464 xml_memory_page* result = static_cast<xml_memory_page*>(memory); | |
| 465 | |
| 466 result->allocator = 0; | |
| 467 result->prev = 0; | |
| 468 result->next = 0; | |
| 469 result->busy_size = 0; | |
| 470 result->freed_size = 0; | |
| 471 | |
| 472 #ifdef PUGIXML_COMPACT | |
| 473 result->compact_string_base = 0; | |
| 474 result->compact_shared_parent = 0; | |
| 475 result->compact_page_marker = 0; | |
| 476 #endif | |
| 477 | |
| 478 return result; | |
| 479 } | |
| 480 | |
| 481 xml_allocator* allocator; | |
| 482 | |
| 483 xml_memory_page* prev; | |
| 484 xml_memory_page* next; | |
| 485 | |
| 486 size_t busy_size; | |
| 487 size_t freed_size; | |
| 488 | |
| 489 #ifdef PUGIXML_COMPACT | |
| 490 char_t* compact_string_base; | |
| 491 void* compact_shared_parent; | |
| 492 uint32_t* compact_page_marker; | |
| 493 #endif | |
| 494 }; | |
| 495 | |
| 496 static const size_t xml_memory_page_size = | |
| 497 #ifdef PUGIXML_MEMORY_PAGE_SIZE | |
| 498 (PUGIXML_MEMORY_PAGE_SIZE) | |
| 499 #else | |
| 500 32768 | |
| 501 #endif | |
| 502 - sizeof(xml_memory_page); | |
| 503 | |
| 504 struct xml_memory_string_header | |
| 505 { | |
| 506 uint16_t page_offset; // offset from page->data | |
| 507 uint16_t full_size; // 0 if string occupies whole page | |
| 508 }; | |
| 509 | |
| 510 struct xml_allocator | |
| 511 { | |
| 512 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) | |
| 513 { | |
| 514 #ifdef PUGIXML_COMPACT | |
| 515 _hash = 0; | |
| 516 #endif | |
| 517 } | |
| 518 | |
| 519 xml_memory_page* allocate_page(size_t data_size) | |
| 520 { | |
| 521 size_t size = sizeof(xml_memory_page) + data_size; | |
| 522 | |
| 523 // allocate block with some alignment, leaving memory for worst-case padding | |
| 524 void* memory = xml_memory::allocate(size); | |
| 525 if (!memory) return 0; | |
| 526 | |
| 527 // prepare page structure | |
| 528 xml_memory_page* page = xml_memory_page::construct(memory); | |
| 529 assert(page); | |
| 530 | |
| 531 assert(this == _root->allocator); | |
| 532 page->allocator = this; | |
| 533 | |
| 534 return page; | |
| 535 } | |
| 536 | |
| 537 static void deallocate_page(xml_memory_page* page) | |
| 538 { | |
| 539 xml_memory::deallocate(page); | |
| 540 } | |
| 541 | |
| 542 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); | |
| 543 | |
| 544 void* allocate_memory(size_t size, xml_memory_page*& out_page) | |
| 545 { | |
| 546 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) | |
| 547 return allocate_memory_oob(size, out_page); | |
| 548 | |
| 549 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; | |
| 550 | |
| 551 _busy_size += size; | |
| 552 | |
| 553 out_page = _root; | |
| 554 | |
| 555 return buf; | |
| 556 } | |
| 557 | |
| 558 #ifdef PUGIXML_COMPACT | |
| 559 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
| 560 { | |
| 561 void* result = allocate_memory(size + sizeof(uint32_t), out_page); | |
| 562 if (!result) return 0; | |
| 563 | |
| 564 // adjust for marker | |
| 565 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); | |
| 566 | |
| 567 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) | |
| 568 { | |
| 569 // insert new marker | |
| 570 uint32_t* marker = static_cast<uint32_t*>(result); | |
| 571 | |
| 572 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); | |
| 573 out_page->compact_page_marker = marker; | |
| 574 | |
| 575 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block | |
| 576 // this will make sure deallocate_memory correctly tracks the size | |
| 577 out_page->freed_size += sizeof(uint32_t); | |
| 578 | |
| 579 return marker + 1; | |
| 580 } | |
| 581 else | |
| 582 { | |
| 583 // roll back uint32_t part | |
| 584 _busy_size -= sizeof(uint32_t); | |
| 585 | |
| 586 return result; | |
| 587 } | |
| 588 } | |
| 589 #else | |
| 590 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
| 591 { | |
| 592 return allocate_memory(size, out_page); | |
| 593 } | |
| 594 #endif | |
| 595 | |
| 596 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) | |
| 597 { | |
| 598 if (page == _root) page->busy_size = _busy_size; | |
| 599 | |
| 600 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); | |
| 601 (void)!ptr; | |
| 602 | |
| 603 page->freed_size += size; | |
| 604 assert(page->freed_size <= page->busy_size); | |
| 605 | |
| 606 if (page->freed_size == page->busy_size) | |
| 607 { | |
| 608 if (page->next == 0) | |
| 609 { | |
| 610 assert(_root == page); | |
| 611 | |
| 612 // top page freed, just reset sizes | |
| 613 page->busy_size = 0; | |
| 614 page->freed_size = 0; | |
| 615 | |
| 616 #ifdef PUGIXML_COMPACT | |
| 617 // reset compact state to maximize efficiency | |
| 618 page->compact_string_base = 0; | |
| 619 page->compact_shared_parent = 0; | |
| 620 page->compact_page_marker = 0; | |
| 621 #endif | |
| 622 | |
| 623 _busy_size = 0; | |
| 624 } | |
| 625 else | |
| 626 { | |
| 627 assert(_root != page); | |
| 628 assert(page->prev); | |
| 629 | |
| 630 // remove from the list | |
| 631 page->prev->next = page->next; | |
| 632 page->next->prev = page->prev; | |
| 633 | |
| 634 // deallocate | |
| 635 deallocate_page(page); | |
| 636 } | |
| 637 } | |
| 638 } | |
| 639 | |
| 640 char_t* allocate_string(size_t length) | |
| 641 { | |
| 642 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; | |
| 643 | |
| 644 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); | |
| 645 | |
| 646 // allocate memory for string and header block | |
| 647 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); | |
| 648 | |
| 649 // round size up to block alignment boundary | |
| 650 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); | |
| 651 | |
| 652 xml_memory_page* page; | |
| 653 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); | |
| 654 | |
| 655 if (!header) return 0; | |
| 656 | |
| 657 // setup header | |
| 658 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); | |
| 659 | |
| 660 assert(page_offset % xml_memory_block_alignment == 0); | |
| 661 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); | |
| 662 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); | |
| 663 | |
| 664 // full_size == 0 for large strings that occupy the whole page | |
| 665 assert(full_size % xml_memory_block_alignment == 0); | |
| 666 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); | |
| 667 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); | |
| 668 | |
| 669 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
| 670 // header is guaranteed a pointer-sized alignment, which should be enough for char_t | |
| 671 return static_cast<char_t*>(static_cast<void*>(header + 1)); | |
| 672 } | |
| 673 | |
| 674 void deallocate_string(char_t* string) | |
| 675 { | |
| 676 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
| 677 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string | |
| 678 | |
| 679 // get header | |
| 680 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; | |
| 681 assert(header); | |
| 682 | |
| 683 // deallocate | |
| 684 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; | |
| 685 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); | |
| 686 | |
| 687 // if full_size == 0 then this string occupies the whole page | |
| 688 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; | |
| 689 | |
| 690 deallocate_memory(header, full_size, page); | |
| 691 } | |
| 692 | |
| 693 bool reserve() | |
| 694 { | |
| 695 #ifdef PUGIXML_COMPACT | |
| 696 return _hash->reserve(); | |
| 697 #else | |
| 698 return true; | |
| 699 #endif | |
| 700 } | |
| 701 | |
| 702 xml_memory_page* _root; | |
| 703 size_t _busy_size; | |
| 704 | |
| 705 #ifdef PUGIXML_COMPACT | |
| 706 compact_hash_table* _hash; | |
| 707 #endif | |
| 708 }; | |
| 709 | |
| 710 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) | |
| 711 { | |
| 712 const size_t large_allocation_threshold = xml_memory_page_size / 4; | |
| 713 | |
| 714 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); | |
| 715 out_page = page; | |
| 716 | |
| 717 if (!page) return 0; | |
| 718 | |
| 719 if (size <= large_allocation_threshold) | |
| 720 { | |
| 721 _root->busy_size = _busy_size; | |
| 722 | |
| 723 // insert page at the end of linked list | |
| 724 page->prev = _root; | |
| 725 _root->next = page; | |
| 726 _root = page; | |
| 727 | |
| 728 _busy_size = size; | |
| 729 } | |
| 730 else | |
| 731 { | |
| 732 // insert page before the end of linked list, so that it is deleted as soon as possible | |
| 733 // the last page is not deleted even if it's empty (see deallocate_memory) | |
| 734 assert(_root->prev); | |
| 735 | |
| 736 page->prev = _root->prev; | |
| 737 page->next = _root; | |
| 738 | |
| 739 _root->prev->next = page; | |
| 740 _root->prev = page; | |
| 741 | |
| 742 page->busy_size = size; | |
| 743 } | |
| 744 | |
| 745 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); | |
| 746 } | |
| 747 PUGI__NS_END | |
| 748 | |
| 749 #ifdef PUGIXML_COMPACT | |
| 750 PUGI__NS_BEGIN | |
| 751 static const uintptr_t compact_alignment_log2 = 2; | |
| 752 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; | |
| 753 | |
| 754 class compact_header | |
| 755 { | |
| 756 public: | |
| 757 compact_header(xml_memory_page* page, unsigned int flags) | |
| 758 { | |
| 759 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); | |
| 760 | |
| 761 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); | |
| 762 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); | |
| 763 | |
| 764 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); | |
| 765 _flags = static_cast<unsigned char>(flags); | |
| 766 } | |
| 767 | |
| 768 void operator&=(uintptr_t mod) | |
| 769 { | |
| 770 _flags &= static_cast<unsigned char>(mod); | |
| 771 } | |
| 772 | |
| 773 void operator|=(uintptr_t mod) | |
| 774 { | |
| 775 _flags |= static_cast<unsigned char>(mod); | |
| 776 } | |
| 777 | |
| 778 uintptr_t operator&(uintptr_t mod) const | |
| 779 { | |
| 780 return _flags & mod; | |
| 781 } | |
| 782 | |
| 783 xml_memory_page* get_page() const | |
| 784 { | |
| 785 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
| 786 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); | |
| 787 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); | |
| 788 | |
| 789 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); | |
| 790 } | |
| 791 | |
| 792 private: | |
| 793 unsigned char _page; | |
| 794 unsigned char _flags; | |
| 795 }; | |
| 796 | |
| 797 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) | |
| 798 { | |
| 799 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); | |
| 800 | |
| 801 return header->get_page(); | |
| 802 } | |
| 803 | |
| 804 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) | |
| 805 { | |
| 806 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); | |
| 807 } | |
| 808 | |
| 809 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) | |
| 810 { | |
| 811 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); | |
| 812 } | |
| 813 | |
| 814 template <typename T, int header_offset, int start = -126> class compact_pointer | |
| 815 { | |
| 816 public: | |
| 817 compact_pointer(): _data(0) | |
| 818 { | |
| 819 } | |
| 820 | |
| 821 void operator=(const compact_pointer& rhs) | |
| 822 { | |
| 823 *this = rhs + 0; | |
| 824 } | |
| 825 | |
| 826 void operator=(T* value) | |
| 827 { | |
| 828 if (value) | |
| 829 { | |
| 830 // value is guaranteed to be compact-aligned; 'this' is not | |
| 831 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
| 832 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
| 833 // compensate for arithmetic shift rounding for negative values | |
| 834 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
| 835 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; | |
| 836 | |
| 837 if (static_cast<uintptr_t>(offset) <= 253) | |
| 838 _data = static_cast<unsigned char>(offset + 1); | |
| 839 else | |
| 840 { | |
| 841 compact_set_value<header_offset>(this, value); | |
| 842 | |
| 843 _data = 255; | |
| 844 } | |
| 845 } | |
| 846 else | |
| 847 _data = 0; | |
| 848 } | |
| 849 | |
| 850 operator T*() const | |
| 851 { | |
| 852 if (_data) | |
| 853 { | |
| 854 if (_data < 255) | |
| 855 { | |
| 856 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
| 857 | |
| 858 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); | |
| 859 } | |
| 860 else | |
| 861 return compact_get_value<header_offset, T>(this); | |
| 862 } | |
| 863 else | |
| 864 return 0; | |
| 865 } | |
| 866 | |
| 867 T* operator->() const | |
| 868 { | |
| 869 return *this; | |
| 870 } | |
| 871 | |
| 872 private: | |
| 873 unsigned char _data; | |
| 874 }; | |
| 875 | |
| 876 template <typename T, int header_offset> class compact_pointer_parent | |
| 877 { | |
| 878 public: | |
| 879 compact_pointer_parent(): _data(0) | |
| 880 { | |
| 881 } | |
| 882 | |
| 883 void operator=(const compact_pointer_parent& rhs) | |
| 884 { | |
| 885 *this = rhs + 0; | |
| 886 } | |
| 887 | |
| 888 void operator=(T* value) | |
| 889 { | |
| 890 if (value) | |
| 891 { | |
| 892 // value is guaranteed to be compact-aligned; 'this' is not | |
| 893 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
| 894 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
| 895 // compensate for arithmetic shift behavior for negative values | |
| 896 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
| 897 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; | |
| 898 | |
| 899 if (static_cast<uintptr_t>(offset) <= 65533) | |
| 900 { | |
| 901 _data = static_cast<unsigned short>(offset + 1); | |
| 902 } | |
| 903 else | |
| 904 { | |
| 905 xml_memory_page* page = compact_get_page(this, header_offset); | |
| 906 | |
| 907 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) | |
| 908 page->compact_shared_parent = value; | |
| 909 | |
| 910 if (page->compact_shared_parent == value) | |
| 911 { | |
| 912 _data = 65534; | |
| 913 } | |
| 914 else | |
| 915 { | |
| 916 compact_set_value<header_offset>(this, value); | |
| 917 | |
| 918 _data = 65535; | |
| 919 } | |
| 920 } | |
| 921 } | |
| 922 else | |
| 923 { | |
| 924 _data = 0; | |
| 925 } | |
| 926 } | |
| 927 | |
| 928 operator T*() const | |
| 929 { | |
| 930 if (_data) | |
| 931 { | |
| 932 if (_data < 65534) | |
| 933 { | |
| 934 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
| 935 | |
| 936 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); | |
| 937 } | |
| 938 else if (_data == 65534) | |
| 939 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); | |
| 940 else | |
| 941 return compact_get_value<header_offset, T>(this); | |
| 942 } | |
| 943 else | |
| 944 return 0; | |
| 945 } | |
| 946 | |
| 947 T* operator->() const | |
| 948 { | |
| 949 return *this; | |
| 950 } | |
| 951 | |
| 952 private: | |
| 953 uint16_t _data; | |
| 954 }; | |
| 955 | |
| 956 template <int header_offset, int base_offset> class compact_string | |
| 957 { | |
| 958 public: | |
| 959 compact_string(): _data(0) | |
| 960 { | |
| 961 } | |
| 962 | |
| 963 void operator=(const compact_string& rhs) | |
| 964 { | |
| 965 *this = rhs + 0; | |
| 966 } | |
| 967 | |
| 968 void operator=(char_t* value) | |
| 969 { | |
| 970 if (value) | |
| 971 { | |
| 972 xml_memory_page* page = compact_get_page(this, header_offset); | |
| 973 | |
| 974 if (PUGI__UNLIKELY(page->compact_string_base == 0)) | |
| 975 page->compact_string_base = value; | |
| 976 | |
| 977 ptrdiff_t offset = value - page->compact_string_base; | |
| 978 | |
| 979 if (static_cast<uintptr_t>(offset) < (65535 << 7)) | |
| 980 { | |
| 981 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
| 982 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); | |
| 983 | |
| 984 if (*base == 0) | |
| 985 { | |
| 986 *base = static_cast<uint16_t>((offset >> 7) + 1); | |
| 987 _data = static_cast<unsigned char>((offset & 127) + 1); | |
| 988 } | |
| 989 else | |
| 990 { | |
| 991 ptrdiff_t remainder = offset - ((*base - 1) << 7); | |
| 992 | |
| 993 if (static_cast<uintptr_t>(remainder) <= 253) | |
| 994 { | |
| 995 _data = static_cast<unsigned char>(remainder + 1); | |
| 996 } | |
| 997 else | |
| 998 { | |
| 999 compact_set_value<header_offset>(this, value); | |
| 1000 | |
| 1001 _data = 255; | |
| 1002 } | |
| 1003 } | |
| 1004 } | |
| 1005 else | |
| 1006 { | |
| 1007 compact_set_value<header_offset>(this, value); | |
| 1008 | |
| 1009 _data = 255; | |
| 1010 } | |
| 1011 } | |
| 1012 else | |
| 1013 { | |
| 1014 _data = 0; | |
| 1015 } | |
| 1016 } | |
| 1017 | |
| 1018 operator char_t*() const | |
| 1019 { | |
| 1020 if (_data) | |
| 1021 { | |
| 1022 if (_data < 255) | |
| 1023 { | |
| 1024 xml_memory_page* page = compact_get_page(this, header_offset); | |
| 1025 | |
| 1026 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
| 1027 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); | |
| 1028 assert(*base); | |
| 1029 | |
| 1030 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); | |
| 1031 | |
| 1032 return page->compact_string_base + offset; | |
| 1033 } | |
| 1034 else | |
| 1035 { | |
| 1036 return compact_get_value<header_offset, char_t>(this); | |
| 1037 } | |
| 1038 } | |
| 1039 else | |
| 1040 return 0; | |
| 1041 } | |
| 1042 | |
| 1043 private: | |
| 1044 unsigned char _data; | |
| 1045 }; | |
| 1046 PUGI__NS_END | |
| 1047 #endif | |
| 1048 | |
| 1049 #ifdef PUGIXML_COMPACT | |
| 1050 namespace pugi | |
| 1051 { | |
| 1052 struct xml_attribute_struct | |
| 1053 { | |
| 1054 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) | |
| 1055 { | |
| 1056 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); | |
| 1057 } | |
| 1058 | |
| 1059 impl::compact_header header; | |
| 1060 | |
| 1061 uint16_t namevalue_base; | |
| 1062 | |
| 1063 impl::compact_string<4, 2> name; | |
| 1064 impl::compact_string<5, 3> value; | |
| 1065 | |
| 1066 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; | |
| 1067 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; | |
| 1068 }; | |
| 1069 | |
| 1070 struct xml_node_struct | |
| 1071 { | |
| 1072 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) | |
| 1073 { | |
| 1074 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); | |
| 1075 } | |
| 1076 | |
| 1077 impl::compact_header header; | |
| 1078 | |
| 1079 uint16_t namevalue_base; | |
| 1080 | |
| 1081 impl::compact_string<4, 2> name; | |
| 1082 impl::compact_string<5, 3> value; | |
| 1083 | |
| 1084 impl::compact_pointer_parent<xml_node_struct, 6> parent; | |
| 1085 | |
| 1086 impl::compact_pointer<xml_node_struct, 8, 0> first_child; | |
| 1087 | |
| 1088 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; | |
| 1089 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; | |
| 1090 | |
| 1091 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; | |
| 1092 }; | |
| 1093 } | |
| 1094 #else | |
| 1095 namespace pugi | |
| 1096 { | |
| 1097 struct xml_attribute_struct | |
| 1098 { | |
| 1099 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) | |
| 1100 { | |
| 1101 header = PUGI__GETHEADER_IMPL(this, page, 0); | |
| 1102 } | |
| 1103 | |
| 1104 uintptr_t header; | |
| 1105 | |
| 1106 char_t* name; | |
| 1107 char_t* value; | |
| 1108 | |
| 1109 xml_attribute_struct* prev_attribute_c; | |
| 1110 xml_attribute_struct* next_attribute; | |
| 1111 }; | |
| 1112 | |
| 1113 struct xml_node_struct | |
| 1114 { | |
| 1115 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) | |
| 1116 { | |
| 1117 header = PUGI__GETHEADER_IMPL(this, page, type); | |
| 1118 } | |
| 1119 | |
| 1120 uintptr_t header; | |
| 1121 | |
| 1122 char_t* name; | |
| 1123 char_t* value; | |
| 1124 | |
| 1125 xml_node_struct* parent; | |
| 1126 | |
| 1127 xml_node_struct* first_child; | |
| 1128 | |
| 1129 xml_node_struct* prev_sibling_c; | |
| 1130 xml_node_struct* next_sibling; | |
| 1131 | |
| 1132 xml_attribute_struct* first_attribute; | |
| 1133 }; | |
| 1134 } | |
| 1135 #endif | |
| 1136 | |
| 1137 PUGI__NS_BEGIN | |
| 1138 struct xml_extra_buffer | |
| 1139 { | |
| 1140 char_t* buffer; | |
| 1141 xml_extra_buffer* next; | |
| 1142 }; | |
| 1143 | |
| 1144 struct xml_document_struct: public xml_node_struct, public xml_allocator | |
| 1145 { | |
| 1146 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) | |
| 1147 { | |
| 1148 } | |
| 1149 | |
| 1150 const char_t* buffer; | |
| 1151 | |
| 1152 xml_extra_buffer* extra_buffers; | |
| 1153 | |
| 1154 #ifdef PUGIXML_COMPACT | |
| 1155 compact_hash_table hash; | |
| 1156 #endif | |
| 1157 }; | |
| 1158 | |
| 1159 template <typename Object> inline xml_allocator& get_allocator(const Object* object) | |
| 1160 { | |
| 1161 assert(object); | |
| 1162 | |
| 1163 return *PUGI__GETPAGE(object)->allocator; | |
| 1164 } | |
| 1165 | |
| 1166 template <typename Object> inline xml_document_struct& get_document(const Object* object) | |
| 1167 { | |
| 1168 assert(object); | |
| 1169 | |
| 1170 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); | |
| 1171 } | |
| 1172 PUGI__NS_END | |
| 1173 | |
| 1174 // Low-level DOM operations | |
| 1175 PUGI__NS_BEGIN | |
| 1176 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) | |
| 1177 { | |
| 1178 xml_memory_page* page; | |
| 1179 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); | |
| 1180 if (!memory) return 0; | |
| 1181 | |
| 1182 return new (memory) xml_attribute_struct(page); | |
| 1183 } | |
| 1184 | |
| 1185 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) | |
| 1186 { | |
| 1187 xml_memory_page* page; | |
| 1188 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); | |
| 1189 if (!memory) return 0; | |
| 1190 | |
| 1191 return new (memory) xml_node_struct(page, type); | |
| 1192 } | |
| 1193 | |
| 1194 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) | |
| 1195 { | |
| 1196 if (a->header & impl::xml_memory_page_name_allocated_mask) | |
| 1197 alloc.deallocate_string(a->name); | |
| 1198 | |
| 1199 if (a->header & impl::xml_memory_page_value_allocated_mask) | |
| 1200 alloc.deallocate_string(a->value); | |
| 1201 | |
| 1202 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); | |
| 1203 } | |
| 1204 | |
| 1205 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) | |
| 1206 { | |
| 1207 if (n->header & impl::xml_memory_page_name_allocated_mask) | |
| 1208 alloc.deallocate_string(n->name); | |
| 1209 | |
| 1210 if (n->header & impl::xml_memory_page_value_allocated_mask) | |
| 1211 alloc.deallocate_string(n->value); | |
| 1212 | |
| 1213 for (xml_attribute_struct* attr = n->first_attribute; attr; ) | |
| 1214 { | |
| 1215 xml_attribute_struct* next = attr->next_attribute; | |
| 1216 | |
| 1217 destroy_attribute(attr, alloc); | |
| 1218 | |
| 1219 attr = next; | |
| 1220 } | |
| 1221 | |
| 1222 for (xml_node_struct* child = n->first_child; child; ) | |
| 1223 { | |
| 1224 xml_node_struct* next = child->next_sibling; | |
| 1225 | |
| 1226 destroy_node(child, alloc); | |
| 1227 | |
| 1228 child = next; | |
| 1229 } | |
| 1230 | |
| 1231 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); | |
| 1232 } | |
| 1233 | |
| 1234 inline void append_node(xml_node_struct* child, xml_node_struct* node) | |
| 1235 { | |
| 1236 child->parent = node; | |
| 1237 | |
| 1238 xml_node_struct* head = node->first_child; | |
| 1239 | |
| 1240 if (head) | |
| 1241 { | |
| 1242 xml_node_struct* tail = head->prev_sibling_c; | |
| 1243 | |
| 1244 tail->next_sibling = child; | |
| 1245 child->prev_sibling_c = tail; | |
| 1246 head->prev_sibling_c = child; | |
| 1247 } | |
| 1248 else | |
| 1249 { | |
| 1250 node->first_child = child; | |
| 1251 child->prev_sibling_c = child; | |
| 1252 } | |
| 1253 } | |
| 1254 | |
| 1255 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) | |
| 1256 { | |
| 1257 child->parent = node; | |
| 1258 | |
| 1259 xml_node_struct* head = node->first_child; | |
| 1260 | |
| 1261 if (head) | |
| 1262 { | |
| 1263 child->prev_sibling_c = head->prev_sibling_c; | |
| 1264 head->prev_sibling_c = child; | |
| 1265 } | |
| 1266 else | |
| 1267 child->prev_sibling_c = child; | |
| 1268 | |
| 1269 child->next_sibling = head; | |
| 1270 node->first_child = child; | |
| 1271 } | |
| 1272 | |
| 1273 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) | |
| 1274 { | |
| 1275 xml_node_struct* parent = node->parent; | |
| 1276 | |
| 1277 child->parent = parent; | |
| 1278 | |
| 1279 xml_node_struct* next = node->next_sibling; | |
| 1280 | |
| 1281 if (next) | |
| 1282 next->prev_sibling_c = child; | |
| 1283 else | |
| 1284 parent->first_child->prev_sibling_c = child; | |
| 1285 | |
| 1286 child->next_sibling = next; | |
| 1287 child->prev_sibling_c = node; | |
| 1288 | |
| 1289 node->next_sibling = child; | |
| 1290 } | |
| 1291 | |
| 1292 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) | |
| 1293 { | |
| 1294 xml_node_struct* parent = node->parent; | |
| 1295 | |
| 1296 child->parent = parent; | |
| 1297 | |
| 1298 xml_node_struct* prev = node->prev_sibling_c; | |
| 1299 | |
| 1300 if (prev->next_sibling) | |
| 1301 prev->next_sibling = child; | |
| 1302 else | |
| 1303 parent->first_child = child; | |
| 1304 | |
| 1305 child->prev_sibling_c = prev; | |
| 1306 child->next_sibling = node; | |
| 1307 | |
| 1308 node->prev_sibling_c = child; | |
| 1309 } | |
| 1310 | |
| 1311 inline void remove_node(xml_node_struct* node) | |
| 1312 { | |
| 1313 xml_node_struct* parent = node->parent; | |
| 1314 | |
| 1315 xml_node_struct* next = node->next_sibling; | |
| 1316 xml_node_struct* prev = node->prev_sibling_c; | |
| 1317 | |
| 1318 if (next) | |
| 1319 next->prev_sibling_c = prev; | |
| 1320 else | |
| 1321 parent->first_child->prev_sibling_c = prev; | |
| 1322 | |
| 1323 if (prev->next_sibling) | |
| 1324 prev->next_sibling = next; | |
| 1325 else | |
| 1326 parent->first_child = next; | |
| 1327 | |
| 1328 node->parent = 0; | |
| 1329 node->prev_sibling_c = 0; | |
| 1330 node->next_sibling = 0; | |
| 1331 } | |
| 1332 | |
| 1333 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
| 1334 { | |
| 1335 xml_attribute_struct* head = node->first_attribute; | |
| 1336 | |
| 1337 if (head) | |
| 1338 { | |
| 1339 xml_attribute_struct* tail = head->prev_attribute_c; | |
| 1340 | |
| 1341 tail->next_attribute = attr; | |
| 1342 attr->prev_attribute_c = tail; | |
| 1343 head->prev_attribute_c = attr; | |
| 1344 } | |
| 1345 else | |
| 1346 { | |
| 1347 node->first_attribute = attr; | |
| 1348 attr->prev_attribute_c = attr; | |
| 1349 } | |
| 1350 } | |
| 1351 | |
| 1352 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
| 1353 { | |
| 1354 xml_attribute_struct* head = node->first_attribute; | |
| 1355 | |
| 1356 if (head) | |
| 1357 { | |
| 1358 attr->prev_attribute_c = head->prev_attribute_c; | |
| 1359 head->prev_attribute_c = attr; | |
| 1360 } | |
| 1361 else | |
| 1362 attr->prev_attribute_c = attr; | |
| 1363 | |
| 1364 attr->next_attribute = head; | |
| 1365 node->first_attribute = attr; | |
| 1366 } | |
| 1367 | |
| 1368 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
| 1369 { | |
| 1370 xml_attribute_struct* next = place->next_attribute; | |
| 1371 | |
| 1372 if (next) | |
| 1373 next->prev_attribute_c = attr; | |
| 1374 else | |
| 1375 node->first_attribute->prev_attribute_c = attr; | |
| 1376 | |
| 1377 attr->next_attribute = next; | |
| 1378 attr->prev_attribute_c = place; | |
| 1379 place->next_attribute = attr; | |
| 1380 } | |
| 1381 | |
| 1382 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
| 1383 { | |
| 1384 xml_attribute_struct* prev = place->prev_attribute_c; | |
| 1385 | |
| 1386 if (prev->next_attribute) | |
| 1387 prev->next_attribute = attr; | |
| 1388 else | |
| 1389 node->first_attribute = attr; | |
| 1390 | |
| 1391 attr->prev_attribute_c = prev; | |
| 1392 attr->next_attribute = place; | |
| 1393 place->prev_attribute_c = attr; | |
| 1394 } | |
| 1395 | |
| 1396 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
| 1397 { | |
| 1398 xml_attribute_struct* next = attr->next_attribute; | |
| 1399 xml_attribute_struct* prev = attr->prev_attribute_c; | |
| 1400 | |
| 1401 if (next) | |
| 1402 next->prev_attribute_c = prev; | |
| 1403 else | |
| 1404 node->first_attribute->prev_attribute_c = prev; | |
| 1405 | |
| 1406 if (prev->next_attribute) | |
| 1407 prev->next_attribute = next; | |
| 1408 else | |
| 1409 node->first_attribute = next; | |
| 1410 | |
| 1411 attr->prev_attribute_c = 0; | |
| 1412 attr->next_attribute = 0; | |
| 1413 } | |
| 1414 | |
| 1415 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) | |
| 1416 { | |
| 1417 if (!alloc.reserve()) return 0; | |
| 1418 | |
| 1419 xml_node_struct* child = allocate_node(alloc, type); | |
| 1420 if (!child) return 0; | |
| 1421 | |
| 1422 append_node(child, node); | |
| 1423 | |
| 1424 return child; | |
| 1425 } | |
| 1426 | |
| 1427 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) | |
| 1428 { | |
| 1429 if (!alloc.reserve()) return 0; | |
| 1430 | |
| 1431 xml_attribute_struct* attr = allocate_attribute(alloc); | |
| 1432 if (!attr) return 0; | |
| 1433 | |
| 1434 append_attribute(attr, node); | |
| 1435 | |
| 1436 return attr; | |
| 1437 } | |
| 1438 PUGI__NS_END | |
| 1439 | |
| 1440 // Helper classes for code generation | |
| 1441 PUGI__NS_BEGIN | |
| 1442 struct opt_false | |
| 1443 { | |
| 1444 enum { value = 0 }; | |
| 1445 }; | |
| 1446 | |
| 1447 struct opt_true | |
| 1448 { | |
| 1449 enum { value = 1 }; | |
| 1450 }; | |
| 1451 PUGI__NS_END | |
| 1452 | |
| 1453 // Unicode utilities | |
| 1454 PUGI__NS_BEGIN | |
| 1455 inline uint16_t endian_swap(uint16_t value) | |
| 1456 { | |
| 1457 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); | |
| 1458 } | |
| 1459 | |
| 1460 inline uint32_t endian_swap(uint32_t value) | |
| 1461 { | |
| 1462 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); | |
| 1463 } | |
| 1464 | |
| 1465 struct utf8_counter | |
| 1466 { | |
| 1467 typedef size_t value_type; | |
| 1468 | |
| 1469 static value_type low(value_type result, uint32_t ch) | |
| 1470 { | |
| 1471 // U+0000..U+007F | |
| 1472 if (ch < 0x80) return result + 1; | |
| 1473 // U+0080..U+07FF | |
| 1474 else if (ch < 0x800) return result + 2; | |
| 1475 // U+0800..U+FFFF | |
| 1476 else return result + 3; | |
| 1477 } | |
| 1478 | |
| 1479 static value_type high(value_type result, uint32_t) | |
| 1480 { | |
| 1481 // U+10000..U+10FFFF | |
| 1482 return result + 4; | |
| 1483 } | |
| 1484 }; | |
| 1485 | |
| 1486 struct utf8_writer | |
| 1487 { | |
| 1488 typedef uint8_t* value_type; | |
| 1489 | |
| 1490 static value_type low(value_type result, uint32_t ch) | |
| 1491 { | |
| 1492 // U+0000..U+007F | |
| 1493 if (ch < 0x80) | |
| 1494 { | |
| 1495 *result = static_cast<uint8_t>(ch); | |
| 1496 return result + 1; | |
| 1497 } | |
| 1498 // U+0080..U+07FF | |
| 1499 else if (ch < 0x800) | |
| 1500 { | |
| 1501 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); | |
| 1502 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
| 1503 return result + 2; | |
| 1504 } | |
| 1505 // U+0800..U+FFFF | |
| 1506 else | |
| 1507 { | |
| 1508 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); | |
| 1509 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
| 1510 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
| 1511 return result + 3; | |
| 1512 } | |
| 1513 } | |
| 1514 | |
| 1515 static value_type high(value_type result, uint32_t ch) | |
| 1516 { | |
| 1517 // U+10000..U+10FFFF | |
| 1518 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); | |
| 1519 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); | |
| 1520 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
| 1521 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
| 1522 return result + 4; | |
| 1523 } | |
| 1524 | |
| 1525 static value_type any(value_type result, uint32_t ch) | |
| 1526 { | |
| 1527 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
| 1528 } | |
| 1529 }; | |
| 1530 | |
| 1531 struct utf16_counter | |
| 1532 { | |
| 1533 typedef size_t value_type; | |
| 1534 | |
| 1535 static value_type low(value_type result, uint32_t) | |
| 1536 { | |
| 1537 return result + 1; | |
| 1538 } | |
| 1539 | |
| 1540 static value_type high(value_type result, uint32_t) | |
| 1541 { | |
| 1542 return result + 2; | |
| 1543 } | |
| 1544 }; | |
| 1545 | |
| 1546 struct utf16_writer | |
| 1547 { | |
| 1548 typedef uint16_t* value_type; | |
| 1549 | |
| 1550 static value_type low(value_type result, uint32_t ch) | |
| 1551 { | |
| 1552 *result = static_cast<uint16_t>(ch); | |
| 1553 | |
| 1554 return result + 1; | |
| 1555 } | |
| 1556 | |
| 1557 static value_type high(value_type result, uint32_t ch) | |
| 1558 { | |
| 1559 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; | |
| 1560 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; | |
| 1561 | |
| 1562 result[0] = static_cast<uint16_t>(0xD800 + msh); | |
| 1563 result[1] = static_cast<uint16_t>(0xDC00 + lsh); | |
| 1564 | |
| 1565 return result + 2; | |
| 1566 } | |
| 1567 | |
| 1568 static value_type any(value_type result, uint32_t ch) | |
| 1569 { | |
| 1570 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
| 1571 } | |
| 1572 }; | |
| 1573 | |
| 1574 struct utf32_counter | |
| 1575 { | |
| 1576 typedef size_t value_type; | |
| 1577 | |
| 1578 static value_type low(value_type result, uint32_t) | |
| 1579 { | |
| 1580 return result + 1; | |
| 1581 } | |
| 1582 | |
| 1583 static value_type high(value_type result, uint32_t) | |
| 1584 { | |
| 1585 return result + 1; | |
| 1586 } | |
| 1587 }; | |
| 1588 | |
| 1589 struct utf32_writer | |
| 1590 { | |
| 1591 typedef uint32_t* value_type; | |
| 1592 | |
| 1593 static value_type low(value_type result, uint32_t ch) | |
| 1594 { | |
| 1595 *result = ch; | |
| 1596 | |
| 1597 return result + 1; | |
| 1598 } | |
| 1599 | |
| 1600 static value_type high(value_type result, uint32_t ch) | |
| 1601 { | |
| 1602 *result = ch; | |
| 1603 | |
| 1604 return result + 1; | |
| 1605 } | |
| 1606 | |
| 1607 static value_type any(value_type result, uint32_t ch) | |
| 1608 { | |
| 1609 *result = ch; | |
| 1610 | |
| 1611 return result + 1; | |
| 1612 } | |
| 1613 }; | |
| 1614 | |
| 1615 struct latin1_writer | |
| 1616 { | |
| 1617 typedef uint8_t* value_type; | |
| 1618 | |
| 1619 static value_type low(value_type result, uint32_t ch) | |
| 1620 { | |
| 1621 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); | |
| 1622 | |
| 1623 return result + 1; | |
| 1624 } | |
| 1625 | |
| 1626 static value_type high(value_type result, uint32_t ch) | |
| 1627 { | |
| 1628 (void)ch; | |
| 1629 | |
| 1630 *result = '?'; | |
| 1631 | |
| 1632 return result + 1; | |
| 1633 } | |
| 1634 }; | |
| 1635 | |
| 1636 struct utf8_decoder | |
| 1637 { | |
| 1638 typedef uint8_t type; | |
| 1639 | |
| 1640 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
| 1641 { | |
| 1642 const uint8_t utf8_byte_mask = 0x3f; | |
| 1643 | |
| 1644 while (size) | |
| 1645 { | |
| 1646 uint8_t lead = *data; | |
| 1647 | |
| 1648 // 0xxxxxxx -> U+0000..U+007F | |
| 1649 if (lead < 0x80) | |
| 1650 { | |
| 1651 result = Traits::low(result, lead); | |
| 1652 data += 1; | |
| 1653 size -= 1; | |
| 1654 | |
| 1655 // process aligned single-byte (ascii) blocks | |
| 1656 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) | |
| 1657 { | |
| 1658 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
| 1659 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) | |
| 1660 { | |
| 1661 result = Traits::low(result, data[0]); | |
| 1662 result = Traits::low(result, data[1]); | |
| 1663 result = Traits::low(result, data[2]); | |
| 1664 result = Traits::low(result, data[3]); | |
| 1665 data += 4; | |
| 1666 size -= 4; | |
| 1667 } | |
| 1668 } | |
| 1669 } | |
| 1670 // 110xxxxx -> U+0080..U+07FF | |
| 1671 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) | |
| 1672 { | |
| 1673 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); | |
| 1674 data += 2; | |
| 1675 size -= 2; | |
| 1676 } | |
| 1677 // 1110xxxx -> U+0800-U+FFFF | |
| 1678 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) | |
| 1679 { | |
| 1680 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); | |
| 1681 data += 3; | |
| 1682 size -= 3; | |
| 1683 } | |
| 1684 // 11110xxx -> U+10000..U+10FFFF | |
| 1685 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) | |
| 1686 { | |
| 1687 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); | |
| 1688 data += 4; | |
| 1689 size -= 4; | |
| 1690 } | |
| 1691 // 10xxxxxx or 11111xxx -> invalid | |
| 1692 else | |
| 1693 { | |
| 1694 data += 1; | |
| 1695 size -= 1; | |
| 1696 } | |
| 1697 } | |
| 1698 | |
| 1699 return result; | |
| 1700 } | |
| 1701 }; | |
| 1702 | |
| 1703 template <typename opt_swap> struct utf16_decoder | |
| 1704 { | |
| 1705 typedef uint16_t type; | |
| 1706 | |
| 1707 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) | |
| 1708 { | |
| 1709 while (size) | |
| 1710 { | |
| 1711 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
| 1712 | |
| 1713 // U+0000..U+D7FF | |
| 1714 if (lead < 0xD800) | |
| 1715 { | |
| 1716 result = Traits::low(result, lead); | |
| 1717 data += 1; | |
| 1718 size -= 1; | |
| 1719 } | |
| 1720 // U+E000..U+FFFF | |
| 1721 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) | |
| 1722 { | |
| 1723 result = Traits::low(result, lead); | |
| 1724 data += 1; | |
| 1725 size -= 1; | |
| 1726 } | |
| 1727 // surrogate pair lead | |
| 1728 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) | |
| 1729 { | |
| 1730 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; | |
| 1731 | |
| 1732 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) | |
| 1733 { | |
| 1734 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); | |
| 1735 data += 2; | |
| 1736 size -= 2; | |
| 1737 } | |
| 1738 else | |
| 1739 { | |
| 1740 data += 1; | |
| 1741 size -= 1; | |
| 1742 } | |
| 1743 } | |
| 1744 else | |
| 1745 { | |
| 1746 data += 1; | |
| 1747 size -= 1; | |
| 1748 } | |
| 1749 } | |
| 1750 | |
| 1751 return result; | |
| 1752 } | |
| 1753 }; | |
| 1754 | |
| 1755 template <typename opt_swap> struct utf32_decoder | |
| 1756 { | |
| 1757 typedef uint32_t type; | |
| 1758 | |
| 1759 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) | |
| 1760 { | |
| 1761 while (size) | |
| 1762 { | |
| 1763 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
| 1764 | |
| 1765 // U+0000..U+FFFF | |
| 1766 if (lead < 0x10000) | |
| 1767 { | |
| 1768 result = Traits::low(result, lead); | |
| 1769 data += 1; | |
| 1770 size -= 1; | |
| 1771 } | |
| 1772 // U+10000..U+10FFFF | |
| 1773 else | |
| 1774 { | |
| 1775 result = Traits::high(result, lead); | |
| 1776 data += 1; | |
| 1777 size -= 1; | |
| 1778 } | |
| 1779 } | |
| 1780 | |
| 1781 return result; | |
| 1782 } | |
| 1783 }; | |
| 1784 | |
| 1785 struct latin1_decoder | |
| 1786 { | |
| 1787 typedef uint8_t type; | |
| 1788 | |
| 1789 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
| 1790 { | |
| 1791 while (size) | |
| 1792 { | |
| 1793 result = Traits::low(result, *data); | |
| 1794 data += 1; | |
| 1795 size -= 1; | |
| 1796 } | |
| 1797 | |
| 1798 return result; | |
| 1799 } | |
| 1800 }; | |
| 1801 | |
| 1802 template <size_t size> struct wchar_selector; | |
| 1803 | |
| 1804 template <> struct wchar_selector<2> | |
| 1805 { | |
| 1806 typedef uint16_t type; | |
| 1807 typedef utf16_counter counter; | |
| 1808 typedef utf16_writer writer; | |
| 1809 typedef utf16_decoder<opt_false> decoder; | |
| 1810 }; | |
| 1811 | |
| 1812 template <> struct wchar_selector<4> | |
| 1813 { | |
| 1814 typedef uint32_t type; | |
| 1815 typedef utf32_counter counter; | |
| 1816 typedef utf32_writer writer; | |
| 1817 typedef utf32_decoder<opt_false> decoder; | |
| 1818 }; | |
| 1819 | |
| 1820 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; | |
| 1821 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; | |
| 1822 | |
| 1823 struct wchar_decoder | |
| 1824 { | |
| 1825 typedef wchar_t type; | |
| 1826 | |
| 1827 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) | |
| 1828 { | |
| 1829 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; | |
| 1830 | |
| 1831 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); | |
| 1832 } | |
| 1833 }; | |
| 1834 | |
| 1835 #ifdef PUGIXML_WCHAR_MODE | |
| 1836 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) | |
| 1837 { | |
| 1838 for (size_t i = 0; i < length; ++i) | |
| 1839 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); | |
| 1840 } | |
| 1841 #endif | |
| 1842 PUGI__NS_END | |
| 1843 | |
| 1844 PUGI__NS_BEGIN | |
| 1845 enum chartype_t | |
| 1846 { | |
| 1847 ct_parse_pcdata = 1, // \0, &, \r, < | |
| 1848 ct_parse_attr = 2, // \0, &, \r, ', " | |
| 1849 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab | |
| 1850 ct_space = 8, // \r, \n, space, tab | |
| 1851 ct_parse_cdata = 16, // \0, ], >, \r | |
| 1852 ct_parse_comment = 32, // \0, -, >, \r | |
| 1853 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . | |
| 1854 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : | |
| 1855 }; | |
| 1856 | |
| 1857 static const unsigned char chartype_table[256] = | |
| 1858 { | |
| 1859 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 | |
| 1860 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 | |
| 1861 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 | |
| 1862 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 | |
| 1863 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 | |
| 1864 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 | |
| 1865 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 | |
| 1866 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 | |
| 1867 | |
| 1868 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ | |
| 1869 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1870 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1871 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1872 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1873 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1874 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
| 1875 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 | |
| 1876 }; | |
| 1877 | |
| 1878 enum chartypex_t | |
| 1879 { | |
| 1880 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > | |
| 1881 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' | |
| 1882 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ | |
| 1883 ctx_digit = 8, // 0-9 | |
| 1884 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . | |
| 1885 }; | |
| 1886 | |
| 1887 static const unsigned char chartypex_table[256] = | |
| 1888 { | |
| 1889 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 | |
| 1890 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 | |
| 1891 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 | |
| 1892 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 | |
| 1893 | |
| 1894 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 | |
| 1895 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 | |
| 1896 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 | |
| 1897 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 | |
| 1898 | |
| 1899 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ | |
| 1900 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1901 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1902 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1903 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1904 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1905 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
| 1906 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 | |
| 1907 }; | |
| 1908 | |
| 1909 #ifdef PUGIXML_WCHAR_MODE | |
| 1910 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) | |
| 1911 #else | |
| 1912 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) | |
| 1913 #endif | |
| 1914 | |
| 1915 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) | |
| 1916 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) | |
| 1917 | |
| 1918 PUGI__FN bool is_little_endian() | |
| 1919 { | |
| 1920 unsigned int ui = 1; | |
| 1921 | |
| 1922 return *reinterpret_cast<unsigned char*>(&ui) == 1; | |
| 1923 } | |
| 1924 | |
| 1925 PUGI__FN xml_encoding get_wchar_encoding() | |
| 1926 { | |
| 1927 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | |
| 1928 | |
| 1929 if (sizeof(wchar_t) == 2) | |
| 1930 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 1931 else | |
| 1932 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 1933 } | |
| 1934 | |
| 1935 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) | |
| 1936 { | |
| 1937 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } | |
| 1938 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } | |
| 1939 | |
| 1940 // check if we have a non-empty XML declaration | |
| 1941 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) | |
| 1942 return false; | |
| 1943 | |
| 1944 // scan XML declaration until the encoding field | |
| 1945 for (size_t i = 6; i + 1 < size; ++i) | |
| 1946 { | |
| 1947 // declaration can not contain ? in quoted values | |
| 1948 if (data[i] == '?') | |
| 1949 return false; | |
| 1950 | |
| 1951 if (data[i] == 'e' && data[i + 1] == 'n') | |
| 1952 { | |
| 1953 size_t offset = i; | |
| 1954 | |
| 1955 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed | |
| 1956 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); | |
| 1957 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); | |
| 1958 | |
| 1959 // S? = S? | |
| 1960 PUGI__SCANCHARTYPE(ct_space); | |
| 1961 PUGI__SCANCHAR('='); | |
| 1962 PUGI__SCANCHARTYPE(ct_space); | |
| 1963 | |
| 1964 // the only two valid delimiters are ' and " | |
| 1965 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; | |
| 1966 | |
| 1967 PUGI__SCANCHAR(delimiter); | |
| 1968 | |
| 1969 size_t start = offset; | |
| 1970 | |
| 1971 out_encoding = data + offset; | |
| 1972 | |
| 1973 PUGI__SCANCHARTYPE(ct_symbol); | |
| 1974 | |
| 1975 out_length = offset - start; | |
| 1976 | |
| 1977 PUGI__SCANCHAR(delimiter); | |
| 1978 | |
| 1979 return true; | |
| 1980 } | |
| 1981 } | |
| 1982 | |
| 1983 return false; | |
| 1984 | |
| 1985 #undef PUGI__SCANCHAR | |
| 1986 #undef PUGI__SCANCHARTYPE | |
| 1987 } | |
| 1988 | |
| 1989 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) | |
| 1990 { | |
| 1991 // skip encoding autodetection if input buffer is too small | |
| 1992 if (size < 4) return encoding_utf8; | |
| 1993 | |
| 1994 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; | |
| 1995 | |
| 1996 // look for BOM in first few bytes | |
| 1997 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; | |
| 1998 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
| 1999 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; | |
| 2000 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; | |
| 2001 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; | |
| 2002 | |
| 2003 // look for <, <? or <?xm in various encodings | |
| 2004 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; | |
| 2005 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
| 2006 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; | |
| 2007 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; | |
| 2008 | |
| 2009 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) | |
| 2010 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; | |
| 2011 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; | |
| 2012 | |
| 2013 // no known BOM detected; parse declaration | |
| 2014 const uint8_t* enc = 0; | |
| 2015 size_t enc_length = 0; | |
| 2016 | |
| 2017 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) | |
| 2018 { | |
| 2019 // iso-8859-1 (case-insensitive) | |
| 2020 if (enc_length == 10 | |
| 2021 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' | |
| 2022 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' | |
| 2023 && enc[8] == '-' && enc[9] == '1') | |
| 2024 return encoding_latin1; | |
| 2025 | |
| 2026 // latin1 (case-insensitive) | |
| 2027 if (enc_length == 6 | |
| 2028 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' | |
| 2029 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' | |
| 2030 && enc[5] == '1') | |
| 2031 return encoding_latin1; | |
| 2032 } | |
| 2033 | |
| 2034 return encoding_utf8; | |
| 2035 } | |
| 2036 | |
| 2037 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) | |
| 2038 { | |
| 2039 // replace wchar encoding with utf implementation | |
| 2040 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
| 2041 | |
| 2042 // replace utf16 encoding with utf16 with specific endianness | |
| 2043 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 2044 | |
| 2045 // replace utf32 encoding with utf32 with specific endianness | |
| 2046 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 2047 | |
| 2048 // only do autodetection if no explicit encoding is requested | |
| 2049 if (encoding != encoding_auto) return encoding; | |
| 2050 | |
| 2051 // try to guess encoding (based on XML specification, Appendix F.1) | |
| 2052 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
| 2053 | |
| 2054 return guess_buffer_encoding(data, size); | |
| 2055 } | |
| 2056 | |
| 2057 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
| 2058 { | |
| 2059 size_t length = size / sizeof(char_t); | |
| 2060 | |
| 2061 if (is_mutable) | |
| 2062 { | |
| 2063 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); | |
| 2064 out_length = length; | |
| 2065 } | |
| 2066 else | |
| 2067 { | |
| 2068 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 2069 if (!buffer) return false; | |
| 2070 | |
| 2071 if (contents) | |
| 2072 memcpy(buffer, contents, length * sizeof(char_t)); | |
| 2073 else | |
| 2074 assert(length == 0); | |
| 2075 | |
| 2076 buffer[length] = 0; | |
| 2077 | |
| 2078 out_buffer = buffer; | |
| 2079 out_length = length + 1; | |
| 2080 } | |
| 2081 | |
| 2082 return true; | |
| 2083 } | |
| 2084 | |
| 2085 #ifdef PUGIXML_WCHAR_MODE | |
| 2086 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) | |
| 2087 { | |
| 2088 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || | |
| 2089 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); | |
| 2090 } | |
| 2091 | |
| 2092 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
| 2093 { | |
| 2094 const char_t* data = static_cast<const char_t*>(contents); | |
| 2095 size_t length = size / sizeof(char_t); | |
| 2096 | |
| 2097 if (is_mutable) | |
| 2098 { | |
| 2099 char_t* buffer = const_cast<char_t*>(data); | |
| 2100 | |
| 2101 convert_wchar_endian_swap(buffer, data, length); | |
| 2102 | |
| 2103 out_buffer = buffer; | |
| 2104 out_length = length; | |
| 2105 } | |
| 2106 else | |
| 2107 { | |
| 2108 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 2109 if (!buffer) return false; | |
| 2110 | |
| 2111 convert_wchar_endian_swap(buffer, data, length); | |
| 2112 buffer[length] = 0; | |
| 2113 | |
| 2114 out_buffer = buffer; | |
| 2115 out_length = length + 1; | |
| 2116 } | |
| 2117 | |
| 2118 return true; | |
| 2119 } | |
| 2120 | |
| 2121 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
| 2122 { | |
| 2123 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
| 2124 size_t data_length = size / sizeof(typename D::type); | |
| 2125 | |
| 2126 // first pass: get length in wchar_t units | |
| 2127 size_t length = D::process(data, data_length, 0, wchar_counter()); | |
| 2128 | |
| 2129 // allocate buffer of suitable length | |
| 2130 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 2131 if (!buffer) return false; | |
| 2132 | |
| 2133 // second pass: convert utf16 input to wchar_t | |
| 2134 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); | |
| 2135 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); | |
| 2136 | |
| 2137 assert(oend == obegin + length); | |
| 2138 *oend = 0; | |
| 2139 | |
| 2140 out_buffer = buffer; | |
| 2141 out_length = length + 1; | |
| 2142 | |
| 2143 return true; | |
| 2144 } | |
| 2145 | |
| 2146 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
| 2147 { | |
| 2148 // get native encoding | |
| 2149 xml_encoding wchar_encoding = get_wchar_encoding(); | |
| 2150 | |
| 2151 // fast path: no conversion required | |
| 2152 if (encoding == wchar_encoding) | |
| 2153 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
| 2154 | |
| 2155 // only endian-swapping is required | |
| 2156 if (need_endian_swap_utf(encoding, wchar_encoding)) | |
| 2157 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); | |
| 2158 | |
| 2159 // source encoding is utf8 | |
| 2160 if (encoding == encoding_utf8) | |
| 2161 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); | |
| 2162 | |
| 2163 // source encoding is utf16 | |
| 2164 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
| 2165 { | |
| 2166 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 2167 | |
| 2168 return (native_encoding == encoding) ? | |
| 2169 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
| 2170 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
| 2171 } | |
| 2172 | |
| 2173 // source encoding is utf32 | |
| 2174 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
| 2175 { | |
| 2176 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 2177 | |
| 2178 return (native_encoding == encoding) ? | |
| 2179 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
| 2180 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
| 2181 } | |
| 2182 | |
| 2183 // source encoding is latin1 | |
| 2184 if (encoding == encoding_latin1) | |
| 2185 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); | |
| 2186 | |
| 2187 assert(false && "Invalid encoding"); // unreachable | |
| 2188 return false; | |
| 2189 } | |
| 2190 #else | |
| 2191 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
| 2192 { | |
| 2193 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
| 2194 size_t data_length = size / sizeof(typename D::type); | |
| 2195 | |
| 2196 // first pass: get length in utf8 units | |
| 2197 size_t length = D::process(data, data_length, 0, utf8_counter()); | |
| 2198 | |
| 2199 // allocate buffer of suitable length | |
| 2200 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 2201 if (!buffer) return false; | |
| 2202 | |
| 2203 // second pass: convert utf16 input to utf8 | |
| 2204 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
| 2205 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); | |
| 2206 | |
| 2207 assert(oend == obegin + length); | |
| 2208 *oend = 0; | |
| 2209 | |
| 2210 out_buffer = buffer; | |
| 2211 out_length = length + 1; | |
| 2212 | |
| 2213 return true; | |
| 2214 } | |
| 2215 | |
| 2216 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) | |
| 2217 { | |
| 2218 for (size_t i = 0; i < size; ++i) | |
| 2219 if (data[i] > 127) | |
| 2220 return i; | |
| 2221 | |
| 2222 return size; | |
| 2223 } | |
| 2224 | |
| 2225 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
| 2226 { | |
| 2227 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
| 2228 size_t data_length = size; | |
| 2229 | |
| 2230 // get size of prefix that does not need utf8 conversion | |
| 2231 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); | |
| 2232 assert(prefix_length <= data_length); | |
| 2233 | |
| 2234 const uint8_t* postfix = data + prefix_length; | |
| 2235 size_t postfix_length = data_length - prefix_length; | |
| 2236 | |
| 2237 // if no conversion is needed, just return the original buffer | |
| 2238 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
| 2239 | |
| 2240 // first pass: get length in utf8 units | |
| 2241 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); | |
| 2242 | |
| 2243 // allocate buffer of suitable length | |
| 2244 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 2245 if (!buffer) return false; | |
| 2246 | |
| 2247 // second pass: convert latin1 input to utf8 | |
| 2248 memcpy(buffer, data, prefix_length); | |
| 2249 | |
| 2250 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
| 2251 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); | |
| 2252 | |
| 2253 assert(oend == obegin + length); | |
| 2254 *oend = 0; | |
| 2255 | |
| 2256 out_buffer = buffer; | |
| 2257 out_length = length + 1; | |
| 2258 | |
| 2259 return true; | |
| 2260 } | |
| 2261 | |
| 2262 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
| 2263 { | |
| 2264 // fast path: no conversion required | |
| 2265 if (encoding == encoding_utf8) | |
| 2266 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
| 2267 | |
| 2268 // source encoding is utf16 | |
| 2269 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
| 2270 { | |
| 2271 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 2272 | |
| 2273 return (native_encoding == encoding) ? | |
| 2274 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
| 2275 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
| 2276 } | |
| 2277 | |
| 2278 // source encoding is utf32 | |
| 2279 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
| 2280 { | |
| 2281 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 2282 | |
| 2283 return (native_encoding == encoding) ? | |
| 2284 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
| 2285 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
| 2286 } | |
| 2287 | |
| 2288 // source encoding is latin1 | |
| 2289 if (encoding == encoding_latin1) | |
| 2290 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); | |
| 2291 | |
| 2292 assert(false && "Invalid encoding"); // unreachable | |
| 2293 return false; | |
| 2294 } | |
| 2295 #endif | |
| 2296 | |
| 2297 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) | |
| 2298 { | |
| 2299 // get length in utf8 characters | |
| 2300 return wchar_decoder::process(str, length, 0, utf8_counter()); | |
| 2301 } | |
| 2302 | |
| 2303 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) | |
| 2304 { | |
| 2305 // convert to utf8 | |
| 2306 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); | |
| 2307 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); | |
| 2308 | |
| 2309 assert(begin + size == end); | |
| 2310 (void)!end; | |
| 2311 (void)!size; | |
| 2312 } | |
| 2313 | |
| 2314 #ifndef PUGIXML_NO_STL | |
| 2315 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) | |
| 2316 { | |
| 2317 // first pass: get length in utf8 characters | |
| 2318 size_t size = as_utf8_begin(str, length); | |
| 2319 | |
| 2320 // allocate resulting string | |
| 2321 std::string result; | |
| 2322 result.resize(size); | |
| 2323 | |
| 2324 // second pass: convert to utf8 | |
| 2325 if (size > 0) as_utf8_end(&result[0], size, str, length); | |
| 2326 | |
| 2327 return result; | |
| 2328 } | |
| 2329 | |
| 2330 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) | |
| 2331 { | |
| 2332 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); | |
| 2333 | |
| 2334 // first pass: get length in wchar_t units | |
| 2335 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); | |
| 2336 | |
| 2337 // allocate resulting string | |
| 2338 std::basic_string<wchar_t> result; | |
| 2339 result.resize(length); | |
| 2340 | |
| 2341 // second pass: convert to wchar_t | |
| 2342 if (length > 0) | |
| 2343 { | |
| 2344 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); | |
| 2345 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); | |
| 2346 | |
| 2347 assert(begin + length == end); | |
| 2348 (void)!end; | |
| 2349 } | |
| 2350 | |
| 2351 return result; | |
| 2352 } | |
| 2353 #endif | |
| 2354 | |
| 2355 template <typename Header> | |
| 2356 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) | |
| 2357 { | |
| 2358 // never reuse shared memory | |
| 2359 if (header & xml_memory_page_contents_shared_mask) return false; | |
| 2360 | |
| 2361 size_t target_length = strlength(target); | |
| 2362 | |
| 2363 // always reuse document buffer memory if possible | |
| 2364 if ((header & header_mask) == 0) return target_length >= length; | |
| 2365 | |
| 2366 // reuse heap memory if waste is not too great | |
| 2367 const size_t reuse_threshold = 32; | |
| 2368 | |
| 2369 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); | |
| 2370 } | |
| 2371 | |
| 2372 template <typename String, typename Header> | |
| 2373 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) | |
| 2374 { | |
| 2375 if (source_length == 0) | |
| 2376 { | |
| 2377 // empty string and null pointer are equivalent, so just deallocate old memory | |
| 2378 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; | |
| 2379 | |
| 2380 if (header & header_mask) alloc->deallocate_string(dest); | |
| 2381 | |
| 2382 // mark the string as not allocated | |
| 2383 dest = 0; | |
| 2384 header &= ~header_mask; | |
| 2385 | |
| 2386 return true; | |
| 2387 } | |
| 2388 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) | |
| 2389 { | |
| 2390 // we can reuse old buffer, so just copy the new data (including zero terminator) | |
| 2391 memcpy(dest, source, source_length * sizeof(char_t)); | |
| 2392 dest[source_length] = 0; | |
| 2393 | |
| 2394 return true; | |
| 2395 } | |
| 2396 else | |
| 2397 { | |
| 2398 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; | |
| 2399 | |
| 2400 if (!alloc->reserve()) return false; | |
| 2401 | |
| 2402 // allocate new buffer | |
| 2403 char_t* buf = alloc->allocate_string(source_length + 1); | |
| 2404 if (!buf) return false; | |
| 2405 | |
| 2406 // copy the string (including zero terminator) | |
| 2407 memcpy(buf, source, source_length * sizeof(char_t)); | |
| 2408 buf[source_length] = 0; | |
| 2409 | |
| 2410 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) | |
| 2411 if (header & header_mask) alloc->deallocate_string(dest); | |
| 2412 | |
| 2413 // the string is now allocated, so set the flag | |
| 2414 dest = buf; | |
| 2415 header |= header_mask; | |
| 2416 | |
| 2417 return true; | |
| 2418 } | |
| 2419 } | |
| 2420 | |
| 2421 struct gap | |
| 2422 { | |
| 2423 char_t* end; | |
| 2424 size_t size; | |
| 2425 | |
| 2426 gap(): end(0), size(0) | |
| 2427 { | |
| 2428 } | |
| 2429 | |
| 2430 // Push new gap, move s count bytes further (skipping the gap). | |
| 2431 // Collapse previous gap. | |
| 2432 void push(char_t*& s, size_t count) | |
| 2433 { | |
| 2434 if (end) // there was a gap already; collapse it | |
| 2435 { | |
| 2436 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) | |
| 2437 assert(s >= end); | |
| 2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
| 2439 } | |
| 2440 | |
| 2441 s += count; // end of current gap | |
| 2442 | |
| 2443 // "merge" two gaps | |
| 2444 end = s; | |
| 2445 size += count; | |
| 2446 } | |
| 2447 | |
| 2448 // Collapse all gaps, return past-the-end pointer | |
| 2449 char_t* flush(char_t* s) | |
| 2450 { | |
| 2451 if (end) | |
| 2452 { | |
| 2453 // Move [old_gap_end, current_pos) to [old_gap_start, ...) | |
| 2454 assert(s >= end); | |
| 2455 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
| 2456 | |
| 2457 return s - size; | |
| 2458 } | |
| 2459 else return s; | |
| 2460 } | |
| 2461 }; | |
| 2462 | |
| 2463 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) | |
| 2464 { | |
| 2465 char_t* stre = s + 1; | |
| 2466 | |
| 2467 switch (*stre) | |
| 2468 { | |
| 2469 case '#': // &#... | |
| 2470 { | |
| 2471 unsigned int ucsc = 0; | |
| 2472 | |
| 2473 if (stre[1] == 'x') // &#x... (hex code) | |
| 2474 { | |
| 2475 stre += 2; | |
| 2476 | |
| 2477 char_t ch = *stre; | |
| 2478 | |
| 2479 if (ch == ';') return stre; | |
| 2480 | |
| 2481 for (;;) | |
| 2482 { | |
| 2483 if (static_cast<unsigned int>(ch - '0') <= 9) | |
| 2484 ucsc = 16 * ucsc + (ch - '0'); | |
| 2485 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) | |
| 2486 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); | |
| 2487 else if (ch == ';') | |
| 2488 break; | |
| 2489 else // cancel | |
| 2490 return stre; | |
| 2491 | |
| 2492 ch = *++stre; | |
| 2493 } | |
| 2494 | |
| 2495 ++stre; | |
| 2496 } | |
| 2497 else // &#... (dec code) | |
| 2498 { | |
| 2499 char_t ch = *++stre; | |
| 2500 | |
| 2501 if (ch == ';') return stre; | |
| 2502 | |
| 2503 for (;;) | |
| 2504 { | |
| 2505 if (static_cast<unsigned int>(ch - '0') <= 9) | |
| 2506 ucsc = 10 * ucsc + (ch - '0'); | |
| 2507 else if (ch == ';') | |
| 2508 break; | |
| 2509 else // cancel | |
| 2510 return stre; | |
| 2511 | |
| 2512 ch = *++stre; | |
| 2513 } | |
| 2514 | |
| 2515 ++stre; | |
| 2516 } | |
| 2517 | |
| 2518 #ifdef PUGIXML_WCHAR_MODE | |
| 2519 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); | |
| 2520 #else | |
| 2521 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); | |
| 2522 #endif | |
| 2523 | |
| 2524 g.push(s, stre - s); | |
| 2525 return stre; | |
| 2526 } | |
| 2527 | |
| 2528 case 'a': // &a | |
| 2529 { | |
| 2530 ++stre; | |
| 2531 | |
| 2532 if (*stre == 'm') // &am | |
| 2533 { | |
| 2534 if (*++stre == 'p' && *++stre == ';') // & | |
| 2535 { | |
| 2536 *s++ = '&'; | |
| 2537 ++stre; | |
| 2538 | |
| 2539 g.push(s, stre - s); | |
| 2540 return stre; | |
| 2541 } | |
| 2542 } | |
| 2543 else if (*stre == 'p') // &ap | |
| 2544 { | |
| 2545 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' | |
| 2546 { | |
| 2547 *s++ = '\''; | |
| 2548 ++stre; | |
| 2549 | |
| 2550 g.push(s, stre - s); | |
| 2551 return stre; | |
| 2552 } | |
| 2553 } | |
| 2554 break; | |
| 2555 } | |
| 2556 | |
| 2557 case 'g': // &g | |
| 2558 { | |
| 2559 if (*++stre == 't' && *++stre == ';') // > | |
| 2560 { | |
| 2561 *s++ = '>'; | |
| 2562 ++stre; | |
| 2563 | |
| 2564 g.push(s, stre - s); | |
| 2565 return stre; | |
| 2566 } | |
| 2567 break; | |
| 2568 } | |
| 2569 | |
| 2570 case 'l': // &l | |
| 2571 { | |
| 2572 if (*++stre == 't' && *++stre == ';') // < | |
| 2573 { | |
| 2574 *s++ = '<'; | |
| 2575 ++stre; | |
| 2576 | |
| 2577 g.push(s, stre - s); | |
| 2578 return stre; | |
| 2579 } | |
| 2580 break; | |
| 2581 } | |
| 2582 | |
| 2583 case 'q': // &q | |
| 2584 { | |
| 2585 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " | |
| 2586 { | |
| 2587 *s++ = '"'; | |
| 2588 ++stre; | |
| 2589 | |
| 2590 g.push(s, stre - s); | |
| 2591 return stre; | |
| 2592 } | |
| 2593 break; | |
| 2594 } | |
| 2595 | |
| 2596 default: | |
| 2597 break; | |
| 2598 } | |
| 2599 | |
| 2600 return stre; | |
| 2601 } | |
| 2602 | |
| 2603 // Parser utilities | |
| 2604 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) | |
| 2605 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } | |
| 2606 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) | |
| 2607 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } | |
| 2608 #define PUGI__POPNODE() { cursor = cursor->parent; } | |
| 2609 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } | |
| 2610 #define PUGI__SCANWHILE(X) { while (X) ++s; } | |
| 2611 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } | |
| 2612 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } | |
| 2613 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) | |
| 2614 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } | |
| 2615 | |
| 2616 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) | |
| 2617 { | |
| 2618 gap g; | |
| 2619 | |
| 2620 while (true) | |
| 2621 { | |
| 2622 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); | |
| 2623 | |
| 2624 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
| 2625 { | |
| 2626 *s++ = '\n'; // replace first one with 0x0a | |
| 2627 | |
| 2628 if (*s == '\n') g.push(s, 1); | |
| 2629 } | |
| 2630 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here | |
| 2631 { | |
| 2632 *g.flush(s) = 0; | |
| 2633 | |
| 2634 return s + (s[2] == '>' ? 3 : 2); | |
| 2635 } | |
| 2636 else if (*s == 0) | |
| 2637 { | |
| 2638 return 0; | |
| 2639 } | |
| 2640 else ++s; | |
| 2641 } | |
| 2642 } | |
| 2643 | |
| 2644 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) | |
| 2645 { | |
| 2646 gap g; | |
| 2647 | |
| 2648 while (true) | |
| 2649 { | |
| 2650 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); | |
| 2651 | |
| 2652 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
| 2653 { | |
| 2654 *s++ = '\n'; // replace first one with 0x0a | |
| 2655 | |
| 2656 if (*s == '\n') g.push(s, 1); | |
| 2657 } | |
| 2658 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here | |
| 2659 { | |
| 2660 *g.flush(s) = 0; | |
| 2661 | |
| 2662 return s + 1; | |
| 2663 } | |
| 2664 else if (*s == 0) | |
| 2665 { | |
| 2666 return 0; | |
| 2667 } | |
| 2668 else ++s; | |
| 2669 } | |
| 2670 } | |
| 2671 | |
| 2672 typedef char_t* (*strconv_pcdata_t)(char_t*); | |
| 2673 | |
| 2674 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl | |
| 2675 { | |
| 2676 static char_t* parse(char_t* s) | |
| 2677 { | |
| 2678 gap g; | |
| 2679 | |
| 2680 char_t* begin = s; | |
| 2681 | |
| 2682 while (true) | |
| 2683 { | |
| 2684 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); | |
| 2685 | |
| 2686 if (*s == '<') // PCDATA ends here | |
| 2687 { | |
| 2688 char_t* end = g.flush(s); | |
| 2689 | |
| 2690 if (opt_trim::value) | |
| 2691 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) | |
| 2692 --end; | |
| 2693 | |
| 2694 *end = 0; | |
| 2695 | |
| 2696 return s + 1; | |
| 2697 } | |
| 2698 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
| 2699 { | |
| 2700 *s++ = '\n'; // replace first one with 0x0a | |
| 2701 | |
| 2702 if (*s == '\n') g.push(s, 1); | |
| 2703 } | |
| 2704 else if (opt_escape::value && *s == '&') | |
| 2705 { | |
| 2706 s = strconv_escape(s, g); | |
| 2707 } | |
| 2708 else if (*s == 0) | |
| 2709 { | |
| 2710 char_t* end = g.flush(s); | |
| 2711 | |
| 2712 if (opt_trim::value) | |
| 2713 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) | |
| 2714 --end; | |
| 2715 | |
| 2716 *end = 0; | |
| 2717 | |
| 2718 return s; | |
| 2719 } | |
| 2720 else ++s; | |
| 2721 } | |
| 2722 } | |
| 2723 }; | |
| 2724 | |
| 2725 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) | |
| 2726 { | |
| 2727 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); | |
| 2728 | |
| 2729 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above | |
| 2730 { | |
| 2731 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; | |
| 2732 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; | |
| 2733 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; | |
| 2734 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; | |
| 2735 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; | |
| 2736 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; | |
| 2737 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; | |
| 2738 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; | |
| 2739 default: assert(false); return 0; // unreachable | |
| 2740 } | |
| 2741 } | |
| 2742 | |
| 2743 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); | |
| 2744 | |
| 2745 template <typename opt_escape> struct strconv_attribute_impl | |
| 2746 { | |
| 2747 static char_t* parse_wnorm(char_t* s, char_t end_quote) | |
| 2748 { | |
| 2749 gap g; | |
| 2750 | |
| 2751 // trim leading whitespaces | |
| 2752 if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
| 2753 { | |
| 2754 char_t* str = s; | |
| 2755 | |
| 2756 do ++str; | |
| 2757 while (PUGI__IS_CHARTYPE(*str, ct_space)); | |
| 2758 | |
| 2759 g.push(s, str - s); | |
| 2760 } | |
| 2761 | |
| 2762 while (true) | |
| 2763 { | |
| 2764 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); | |
| 2765 | |
| 2766 if (*s == end_quote) | |
| 2767 { | |
| 2768 char_t* str = g.flush(s); | |
| 2769 | |
| 2770 do *str-- = 0; | |
| 2771 while (PUGI__IS_CHARTYPE(*str, ct_space)); | |
| 2772 | |
| 2773 return s + 1; | |
| 2774 } | |
| 2775 else if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
| 2776 { | |
| 2777 *s++ = ' '; | |
| 2778 | |
| 2779 if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
| 2780 { | |
| 2781 char_t* str = s + 1; | |
| 2782 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; | |
| 2783 | |
| 2784 g.push(s, str - s); | |
| 2785 } | |
| 2786 } | |
| 2787 else if (opt_escape::value && *s == '&') | |
| 2788 { | |
| 2789 s = strconv_escape(s, g); | |
| 2790 } | |
| 2791 else if (!*s) | |
| 2792 { | |
| 2793 return 0; | |
| 2794 } | |
| 2795 else ++s; | |
| 2796 } | |
| 2797 } | |
| 2798 | |
| 2799 static char_t* parse_wconv(char_t* s, char_t end_quote) | |
| 2800 { | |
| 2801 gap g; | |
| 2802 | |
| 2803 while (true) | |
| 2804 { | |
| 2805 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); | |
| 2806 | |
| 2807 if (*s == end_quote) | |
| 2808 { | |
| 2809 *g.flush(s) = 0; | |
| 2810 | |
| 2811 return s + 1; | |
| 2812 } | |
| 2813 else if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
| 2814 { | |
| 2815 if (*s == '\r') | |
| 2816 { | |
| 2817 *s++ = ' '; | |
| 2818 | |
| 2819 if (*s == '\n') g.push(s, 1); | |
| 2820 } | |
| 2821 else *s++ = ' '; | |
| 2822 } | |
| 2823 else if (opt_escape::value && *s == '&') | |
| 2824 { | |
| 2825 s = strconv_escape(s, g); | |
| 2826 } | |
| 2827 else if (!*s) | |
| 2828 { | |
| 2829 return 0; | |
| 2830 } | |
| 2831 else ++s; | |
| 2832 } | |
| 2833 } | |
| 2834 | |
| 2835 static char_t* parse_eol(char_t* s, char_t end_quote) | |
| 2836 { | |
| 2837 gap g; | |
| 2838 | |
| 2839 while (true) | |
| 2840 { | |
| 2841 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); | |
| 2842 | |
| 2843 if (*s == end_quote) | |
| 2844 { | |
| 2845 *g.flush(s) = 0; | |
| 2846 | |
| 2847 return s + 1; | |
| 2848 } | |
| 2849 else if (*s == '\r') | |
| 2850 { | |
| 2851 *s++ = '\n'; | |
| 2852 | |
| 2853 if (*s == '\n') g.push(s, 1); | |
| 2854 } | |
| 2855 else if (opt_escape::value && *s == '&') | |
| 2856 { | |
| 2857 s = strconv_escape(s, g); | |
| 2858 } | |
| 2859 else if (!*s) | |
| 2860 { | |
| 2861 return 0; | |
| 2862 } | |
| 2863 else ++s; | |
| 2864 } | |
| 2865 } | |
| 2866 | |
| 2867 static char_t* parse_simple(char_t* s, char_t end_quote) | |
| 2868 { | |
| 2869 gap g; | |
| 2870 | |
| 2871 while (true) | |
| 2872 { | |
| 2873 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); | |
| 2874 | |
| 2875 if (*s == end_quote) | |
| 2876 { | |
| 2877 *g.flush(s) = 0; | |
| 2878 | |
| 2879 return s + 1; | |
| 2880 } | |
| 2881 else if (opt_escape::value && *s == '&') | |
| 2882 { | |
| 2883 s = strconv_escape(s, g); | |
| 2884 } | |
| 2885 else if (!*s) | |
| 2886 { | |
| 2887 return 0; | |
| 2888 } | |
| 2889 else ++s; | |
| 2890 } | |
| 2891 } | |
| 2892 }; | |
| 2893 | |
| 2894 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) | |
| 2895 { | |
| 2896 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); | |
| 2897 | |
| 2898 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above | |
| 2899 { | |
| 2900 case 0: return strconv_attribute_impl<opt_false>::parse_simple; | |
| 2901 case 1: return strconv_attribute_impl<opt_true>::parse_simple; | |
| 2902 case 2: return strconv_attribute_impl<opt_false>::parse_eol; | |
| 2903 case 3: return strconv_attribute_impl<opt_true>::parse_eol; | |
| 2904 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; | |
| 2905 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; | |
| 2906 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; | |
| 2907 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; | |
| 2908 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
| 2909 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
| 2910 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
| 2911 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
| 2912 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
| 2913 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
| 2914 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
| 2915 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
| 2916 default: assert(false); return 0; // unreachable | |
| 2917 } | |
| 2918 } | |
| 2919 | |
| 2920 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) | |
| 2921 { | |
| 2922 xml_parse_result result; | |
| 2923 result.status = status; | |
| 2924 result.offset = offset; | |
| 2925 | |
| 2926 return result; | |
| 2927 } | |
| 2928 | |
| 2929 struct xml_parser | |
| 2930 { | |
| 2931 xml_allocator* alloc; | |
| 2932 char_t* error_offset; | |
| 2933 xml_parse_status error_status; | |
| 2934 | |
| 2935 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) | |
| 2936 { | |
| 2937 } | |
| 2938 | |
| 2939 // DOCTYPE consists of nested sections of the following possible types: | |
| 2940 // <!-- ... -->, <? ... ?>, "...", '...' | |
| 2941 // <![...]]> | |
| 2942 // <!...> | |
| 2943 // First group can not contain nested groups | |
| 2944 // Second group can contain nested groups of the same type | |
| 2945 // Third group can contain all other groups | |
| 2946 char_t* parse_doctype_primitive(char_t* s) | |
| 2947 { | |
| 2948 if (*s == '"' || *s == '\'') | |
| 2949 { | |
| 2950 // quoted string | |
| 2951 char_t ch = *s++; | |
| 2952 PUGI__SCANFOR(*s == ch); | |
| 2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 2954 | |
| 2955 s++; | |
| 2956 } | |
| 2957 else if (s[0] == '<' && s[1] == '?') | |
| 2958 { | |
| 2959 // <? ... ?> | |
| 2960 s += 2; | |
| 2961 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype | |
| 2962 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 2963 | |
| 2964 s += 2; | |
| 2965 } | |
| 2966 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') | |
| 2967 { | |
| 2968 s += 4; | |
| 2969 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype | |
| 2970 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 2971 | |
| 2972 s += 3; | |
| 2973 } | |
| 2974 else PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 2975 | |
| 2976 return s; | |
| 2977 } | |
| 2978 | |
| 2979 char_t* parse_doctype_ignore(char_t* s) | |
| 2980 { | |
| 2981 size_t depth = 0; | |
| 2982 | |
| 2983 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); | |
| 2984 s += 3; | |
| 2985 | |
| 2986 while (*s) | |
| 2987 { | |
| 2988 if (s[0] == '<' && s[1] == '!' && s[2] == '[') | |
| 2989 { | |
| 2990 // nested ignore section | |
| 2991 s += 3; | |
| 2992 depth++; | |
| 2993 } | |
| 2994 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') | |
| 2995 { | |
| 2996 // ignore section end | |
| 2997 s += 3; | |
| 2998 | |
| 2999 if (depth == 0) | |
| 3000 return s; | |
| 3001 | |
| 3002 depth--; | |
| 3003 } | |
| 3004 else s++; | |
| 3005 } | |
| 3006 | |
| 3007 PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 3008 } | |
| 3009 | |
| 3010 char_t* parse_doctype_group(char_t* s, char_t endch) | |
| 3011 { | |
| 3012 size_t depth = 0; | |
| 3013 | |
| 3014 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); | |
| 3015 s += 2; | |
| 3016 | |
| 3017 while (*s) | |
| 3018 { | |
| 3019 if (s[0] == '<' && s[1] == '!' && s[2] != '-') | |
| 3020 { | |
| 3021 if (s[2] == '[') | |
| 3022 { | |
| 3023 // ignore | |
| 3024 s = parse_doctype_ignore(s); | |
| 3025 if (!s) return s; | |
| 3026 } | |
| 3027 else | |
| 3028 { | |
| 3029 // some control group | |
| 3030 s += 2; | |
| 3031 depth++; | |
| 3032 } | |
| 3033 } | |
| 3034 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') | |
| 3035 { | |
| 3036 // unknown tag (forbidden), or some primitive group | |
| 3037 s = parse_doctype_primitive(s); | |
| 3038 if (!s) return s; | |
| 3039 } | |
| 3040 else if (*s == '>') | |
| 3041 { | |
| 3042 if (depth == 0) | |
| 3043 return s; | |
| 3044 | |
| 3045 depth--; | |
| 3046 s++; | |
| 3047 } | |
| 3048 else s++; | |
| 3049 } | |
| 3050 | |
| 3051 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 3052 | |
| 3053 return s; | |
| 3054 } | |
| 3055 | |
| 3056 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) | |
| 3057 { | |
| 3058 // parse node contents, starting with exclamation mark | |
| 3059 ++s; | |
| 3060 | |
| 3061 if (*s == '-') // '<!-...' | |
| 3062 { | |
| 3063 ++s; | |
| 3064 | |
| 3065 if (*s == '-') // '<!--...' | |
| 3066 { | |
| 3067 ++s; | |
| 3068 | |
| 3069 if (PUGI__OPTSET(parse_comments)) | |
| 3070 { | |
| 3071 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. | |
| 3072 cursor->value = s; // Save the offset. | |
| 3073 } | |
| 3074 | |
| 3075 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) | |
| 3076 { | |
| 3077 s = strconv_comment(s, endch); | |
| 3078 | |
| 3079 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); | |
| 3080 } | |
| 3081 else | |
| 3082 { | |
| 3083 // Scan for terminating '-->'. | |
| 3084 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); | |
| 3085 PUGI__CHECK_ERROR(status_bad_comment, s); | |
| 3086 | |
| 3087 if (PUGI__OPTSET(parse_comments)) | |
| 3088 *s = 0; // Zero-terminate this segment at the first terminating '-'. | |
| 3089 | |
| 3090 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. | |
| 3091 } | |
| 3092 } | |
| 3093 else PUGI__THROW_ERROR(status_bad_comment, s); | |
| 3094 } | |
| 3095 else if (*s == '[') | |
| 3096 { | |
| 3097 // '<![CDATA[...' | |
| 3098 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') | |
| 3099 { | |
| 3100 ++s; | |
| 3101 | |
| 3102 if (PUGI__OPTSET(parse_cdata)) | |
| 3103 { | |
| 3104 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. | |
| 3105 cursor->value = s; // Save the offset. | |
| 3106 | |
| 3107 if (PUGI__OPTSET(parse_eol)) | |
| 3108 { | |
| 3109 s = strconv_cdata(s, endch); | |
| 3110 | |
| 3111 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); | |
| 3112 } | |
| 3113 else | |
| 3114 { | |
| 3115 // Scan for terminating ']]>'. | |
| 3116 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); | |
| 3117 PUGI__CHECK_ERROR(status_bad_cdata, s); | |
| 3118 | |
| 3119 *s++ = 0; // Zero-terminate this segment. | |
| 3120 } | |
| 3121 } | |
| 3122 else // Flagged for discard, but we still have to scan for the terminator. | |
| 3123 { | |
| 3124 // Scan for terminating ']]>'. | |
| 3125 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); | |
| 3126 PUGI__CHECK_ERROR(status_bad_cdata, s); | |
| 3127 | |
| 3128 ++s; | |
| 3129 } | |
| 3130 | |
| 3131 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. | |
| 3132 } | |
| 3133 else PUGI__THROW_ERROR(status_bad_cdata, s); | |
| 3134 } | |
| 3135 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) | |
| 3136 { | |
| 3137 s -= 2; | |
| 3138 | |
| 3139 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); | |
| 3140 | |
| 3141 char_t* mark = s + 9; | |
| 3142 | |
| 3143 s = parse_doctype_group(s, endch); | |
| 3144 if (!s) return s; | |
| 3145 | |
| 3146 assert((*s == 0 && endch == '>') || *s == '>'); | |
| 3147 if (*s) *s++ = 0; | |
| 3148 | |
| 3149 if (PUGI__OPTSET(parse_doctype)) | |
| 3150 { | |
| 3151 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; | |
| 3152 | |
| 3153 PUGI__PUSHNODE(node_doctype); | |
| 3154 | |
| 3155 cursor->value = mark; | |
| 3156 } | |
| 3157 } | |
| 3158 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); | |
| 3159 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); | |
| 3160 else PUGI__THROW_ERROR(status_unrecognized_tag, s); | |
| 3161 | |
| 3162 return s; | |
| 3163 } | |
| 3164 | |
| 3165 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) | |
| 3166 { | |
| 3167 // load into registers | |
| 3168 xml_node_struct* cursor = ref_cursor; | |
| 3169 char_t ch = 0; | |
| 3170 | |
| 3171 // parse node contents, starting with question mark | |
| 3172 ++s; | |
| 3173 | |
| 3174 // read PI target | |
| 3175 char_t* target = s; | |
| 3176 | |
| 3177 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); | |
| 3178 | |
| 3179 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); | |
| 3180 PUGI__CHECK_ERROR(status_bad_pi, s); | |
| 3181 | |
| 3182 // determine node type; stricmp / strcasecmp is not portable | |
| 3183 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; | |
| 3184 | |
| 3185 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) | |
| 3186 { | |
| 3187 if (declaration) | |
| 3188 { | |
| 3189 // disallow non top-level declarations | |
| 3190 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); | |
| 3191 | |
| 3192 PUGI__PUSHNODE(node_declaration); | |
| 3193 } | |
| 3194 else | |
| 3195 { | |
| 3196 PUGI__PUSHNODE(node_pi); | |
| 3197 } | |
| 3198 | |
| 3199 cursor->name = target; | |
| 3200 | |
| 3201 PUGI__ENDSEG(); | |
| 3202 | |
| 3203 // parse value/attributes | |
| 3204 if (ch == '?') | |
| 3205 { | |
| 3206 // empty node | |
| 3207 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); | |
| 3208 s += (*s == '>'); | |
| 3209 | |
| 3210 PUGI__POPNODE(); | |
| 3211 } | |
| 3212 else if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
| 3213 { | |
| 3214 PUGI__SKIPWS(); | |
| 3215 | |
| 3216 // scan for tag end | |
| 3217 char_t* value = s; | |
| 3218 | |
| 3219 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); | |
| 3220 PUGI__CHECK_ERROR(status_bad_pi, s); | |
| 3221 | |
| 3222 if (declaration) | |
| 3223 { | |
| 3224 // replace ending ? with / so that 'element' terminates properly | |
| 3225 *s = '/'; | |
| 3226 | |
| 3227 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES | |
| 3228 s = value; | |
| 3229 } | |
| 3230 else | |
| 3231 { | |
| 3232 // store value and step over > | |
| 3233 cursor->value = value; | |
| 3234 | |
| 3235 PUGI__POPNODE(); | |
| 3236 | |
| 3237 PUGI__ENDSEG(); | |
| 3238 | |
| 3239 s += (*s == '>'); | |
| 3240 } | |
| 3241 } | |
| 3242 else PUGI__THROW_ERROR(status_bad_pi, s); | |
| 3243 } | |
| 3244 else | |
| 3245 { | |
| 3246 // scan for tag end | |
| 3247 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); | |
| 3248 PUGI__CHECK_ERROR(status_bad_pi, s); | |
| 3249 | |
| 3250 s += (s[1] == '>' ? 2 : 1); | |
| 3251 } | |
| 3252 | |
| 3253 // store from registers | |
| 3254 ref_cursor = cursor; | |
| 3255 | |
| 3256 return s; | |
| 3257 } | |
| 3258 | |
| 3259 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) | |
| 3260 { | |
| 3261 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); | |
| 3262 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); | |
| 3263 | |
| 3264 char_t ch = 0; | |
| 3265 xml_node_struct* cursor = root; | |
| 3266 char_t* mark = s; | |
| 3267 | |
| 3268 while (*s != 0) | |
| 3269 { | |
| 3270 if (*s == '<') | |
| 3271 { | |
| 3272 ++s; | |
| 3273 | |
| 3274 LOC_TAG: | |
| 3275 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' | |
| 3276 { | |
| 3277 PUGI__PUSHNODE(node_element); // Append a new node to the tree. | |
| 3278 | |
| 3279 cursor->name = s; | |
| 3280 | |
| 3281 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
| 3282 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. | |
| 3283 | |
| 3284 if (ch == '>') | |
| 3285 { | |
| 3286 // end of tag | |
| 3287 } | |
| 3288 else if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
| 3289 { | |
| 3290 LOC_ATTRIBUTES: | |
| 3291 while (true) | |
| 3292 { | |
| 3293 PUGI__SKIPWS(); // Eat any whitespace. | |
| 3294 | |
| 3295 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... | |
| 3296 { | |
| 3297 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. | |
| 3298 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); | |
| 3299 | |
| 3300 a->name = s; // Save the offset. | |
| 3301 | |
| 3302 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
| 3303 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. | |
| 3304 | |
| 3305 if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
| 3306 { | |
| 3307 PUGI__SKIPWS(); // Eat any whitespace. | |
| 3308 | |
| 3309 ch = *s; | |
| 3310 ++s; | |
| 3311 } | |
| 3312 | |
| 3313 if (ch == '=') // '<... #=...' | |
| 3314 { | |
| 3315 PUGI__SKIPWS(); // Eat any whitespace. | |
| 3316 | |
| 3317 if (*s == '"' || *s == '\'') // '<... #="...' | |
| 3318 { | |
| 3319 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. | |
| 3320 ++s; // Step over the quote. | |
| 3321 a->value = s; // Save the offset. | |
| 3322 | |
| 3323 s = strconv_attribute(s, ch); | |
| 3324 | |
| 3325 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); | |
| 3326 | |
| 3327 // After this line the loop continues from the start; | |
| 3328 // Whitespaces, / and > are ok, symbols and EOF are wrong, | |
| 3329 // everything else will be detected | |
| 3330 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); | |
| 3331 } | |
| 3332 else PUGI__THROW_ERROR(status_bad_attribute, s); | |
| 3333 } | |
| 3334 else PUGI__THROW_ERROR(status_bad_attribute, s); | |
| 3335 } | |
| 3336 else if (*s == '/') | |
| 3337 { | |
| 3338 ++s; | |
| 3339 | |
| 3340 if (*s == '>') | |
| 3341 { | |
| 3342 PUGI__POPNODE(); | |
| 3343 s++; | |
| 3344 break; | |
| 3345 } | |
| 3346 else if (*s == 0 && endch == '>') | |
| 3347 { | |
| 3348 PUGI__POPNODE(); | |
| 3349 break; | |
| 3350 } | |
| 3351 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
| 3352 } | |
| 3353 else if (*s == '>') | |
| 3354 { | |
| 3355 ++s; | |
| 3356 | |
| 3357 break; | |
| 3358 } | |
| 3359 else if (*s == 0 && endch == '>') | |
| 3360 { | |
| 3361 break; | |
| 3362 } | |
| 3363 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
| 3364 } | |
| 3365 | |
| 3366 // !!! | |
| 3367 } | |
| 3368 else if (ch == '/') // '<#.../' | |
| 3369 { | |
| 3370 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); | |
| 3371 | |
| 3372 PUGI__POPNODE(); // Pop. | |
| 3373 | |
| 3374 s += (*s == '>'); | |
| 3375 } | |
| 3376 else if (ch == 0) | |
| 3377 { | |
| 3378 // we stepped over null terminator, backtrack & handle closing tag | |
| 3379 --s; | |
| 3380 | |
| 3381 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); | |
| 3382 } | |
| 3383 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
| 3384 } | |
| 3385 else if (*s == '/') | |
| 3386 { | |
| 3387 ++s; | |
| 3388 | |
| 3389 mark = s; | |
| 3390 | |
| 3391 char_t* name = cursor->name; | |
| 3392 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
| 3393 | |
| 3394 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) | |
| 3395 { | |
| 3396 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
| 3397 } | |
| 3398 | |
| 3399 if (*name) | |
| 3400 { | |
| 3401 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); | |
| 3402 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
| 3403 } | |
| 3404 | |
| 3405 PUGI__POPNODE(); // Pop. | |
| 3406 | |
| 3407 PUGI__SKIPWS(); | |
| 3408 | |
| 3409 if (*s == 0) | |
| 3410 { | |
| 3411 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); | |
| 3412 } | |
| 3413 else | |
| 3414 { | |
| 3415 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); | |
| 3416 ++s; | |
| 3417 } | |
| 3418 } | |
| 3419 else if (*s == '?') // '<?...' | |
| 3420 { | |
| 3421 s = parse_question(s, cursor, optmsk, endch); | |
| 3422 if (!s) return s; | |
| 3423 | |
| 3424 assert(cursor); | |
| 3425 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; | |
| 3426 } | |
| 3427 else if (*s == '!') // '<!...' | |
| 3428 { | |
| 3429 s = parse_exclamation(s, cursor, optmsk, endch); | |
| 3430 if (!s) return s; | |
| 3431 } | |
| 3432 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); | |
| 3433 else PUGI__THROW_ERROR(status_unrecognized_tag, s); | |
| 3434 } | |
| 3435 else | |
| 3436 { | |
| 3437 mark = s; // Save this offset while searching for a terminator. | |
| 3438 | |
| 3439 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. | |
| 3440 | |
| 3441 if (*s == '<' || !*s) | |
| 3442 { | |
| 3443 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one | |
| 3444 assert(mark != s); | |
| 3445 | |
| 3446 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) | |
| 3447 { | |
| 3448 continue; | |
| 3449 } | |
| 3450 else if (PUGI__OPTSET(parse_ws_pcdata_single)) | |
| 3451 { | |
| 3452 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; | |
| 3453 } | |
| 3454 } | |
| 3455 | |
| 3456 if (!PUGI__OPTSET(parse_trim_pcdata)) | |
| 3457 s = mark; | |
| 3458 | |
| 3459 if (cursor->parent || PUGI__OPTSET(parse_fragment)) | |
| 3460 { | |
| 3461 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) | |
| 3462 { | |
| 3463 cursor->value = s; // Save the offset. | |
| 3464 } | |
| 3465 else | |
| 3466 { | |
| 3467 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. | |
| 3468 | |
| 3469 cursor->value = s; // Save the offset. | |
| 3470 | |
| 3471 PUGI__POPNODE(); // Pop since this is a standalone. | |
| 3472 } | |
| 3473 | |
| 3474 s = strconv_pcdata(s); | |
| 3475 | |
| 3476 if (!*s) break; | |
| 3477 } | |
| 3478 else | |
| 3479 { | |
| 3480 PUGI__SCANFOR(*s == '<'); // '...<' | |
| 3481 if (!*s) break; | |
| 3482 | |
| 3483 ++s; | |
| 3484 } | |
| 3485 | |
| 3486 // We're after '<' | |
| 3487 goto LOC_TAG; | |
| 3488 } | |
| 3489 } | |
| 3490 | |
| 3491 // check that last tag is closed | |
| 3492 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); | |
| 3493 | |
| 3494 return s; | |
| 3495 } | |
| 3496 | |
| 3497 #ifdef PUGIXML_WCHAR_MODE | |
| 3498 static char_t* parse_skip_bom(char_t* s) | |
| 3499 { | |
| 3500 unsigned int bom = 0xfeff; | |
| 3501 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; | |
| 3502 } | |
| 3503 #else | |
| 3504 static char_t* parse_skip_bom(char_t* s) | |
| 3505 { | |
| 3506 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; | |
| 3507 } | |
| 3508 #endif | |
| 3509 | |
| 3510 static bool has_element_node_siblings(xml_node_struct* node) | |
| 3511 { | |
| 3512 while (node) | |
| 3513 { | |
| 3514 if (PUGI__NODETYPE(node) == node_element) return true; | |
| 3515 | |
| 3516 node = node->next_sibling; | |
| 3517 } | |
| 3518 | |
| 3519 return false; | |
| 3520 } | |
| 3521 | |
| 3522 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) | |
| 3523 { | |
| 3524 // early-out for empty documents | |
| 3525 if (length == 0) | |
| 3526 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); | |
| 3527 | |
| 3528 // get last child of the root before parsing | |
| 3529 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; | |
| 3530 | |
| 3531 // create parser on stack | |
| 3532 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); | |
| 3533 | |
| 3534 // save last character and make buffer zero-terminated (speeds up parsing) | |
| 3535 char_t endch = buffer[length - 1]; | |
| 3536 buffer[length - 1] = 0; | |
| 3537 | |
| 3538 // skip BOM to make sure it does not end up as part of parse output | |
| 3539 char_t* buffer_data = parse_skip_bom(buffer); | |
| 3540 | |
| 3541 // perform actual parsing | |
| 3542 parser.parse_tree(buffer_data, root, optmsk, endch); | |
| 3543 | |
| 3544 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); | |
| 3545 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); | |
| 3546 | |
| 3547 if (result) | |
| 3548 { | |
| 3549 // since we removed last character, we have to handle the only possible false positive (stray <) | |
| 3550 if (endch == '<') | |
| 3551 return make_parse_result(status_unrecognized_tag, length - 1); | |
| 3552 | |
| 3553 // check if there are any element nodes parsed | |
| 3554 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; | |
| 3555 | |
| 3556 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) | |
| 3557 return make_parse_result(status_no_document_element, length - 1); | |
| 3558 } | |
| 3559 else | |
| 3560 { | |
| 3561 // roll back offset if it occurs on a null terminator in the source buffer | |
| 3562 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) | |
| 3563 result.offset--; | |
| 3564 } | |
| 3565 | |
| 3566 return result; | |
| 3567 } | |
| 3568 }; | |
| 3569 | |
| 3570 // Output facilities | |
| 3571 PUGI__FN xml_encoding get_write_native_encoding() | |
| 3572 { | |
| 3573 #ifdef PUGIXML_WCHAR_MODE | |
| 3574 return get_wchar_encoding(); | |
| 3575 #else | |
| 3576 return encoding_utf8; | |
| 3577 #endif | |
| 3578 } | |
| 3579 | |
| 3580 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) | |
| 3581 { | |
| 3582 // replace wchar encoding with utf implementation | |
| 3583 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
| 3584 | |
| 3585 // replace utf16 encoding with utf16 with specific endianness | |
| 3586 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 3587 | |
| 3588 // replace utf32 encoding with utf32 with specific endianness | |
| 3589 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 3590 | |
| 3591 // only do autodetection if no explicit encoding is requested | |
| 3592 if (encoding != encoding_auto) return encoding; | |
| 3593 | |
| 3594 // assume utf8 encoding | |
| 3595 return encoding_utf8; | |
| 3596 } | |
| 3597 | |
| 3598 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) | |
| 3599 { | |
| 3600 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
| 3601 | |
| 3602 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
| 3603 | |
| 3604 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
| 3605 } | |
| 3606 | |
| 3607 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) | |
| 3608 { | |
| 3609 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
| 3610 | |
| 3611 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
| 3612 | |
| 3613 if (opt_swap) | |
| 3614 { | |
| 3615 for (typename T::value_type i = dest; i != end; ++i) | |
| 3616 *i = endian_swap(*i); | |
| 3617 } | |
| 3618 | |
| 3619 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
| 3620 } | |
| 3621 | |
| 3622 #ifdef PUGIXML_WCHAR_MODE | |
| 3623 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) | |
| 3624 { | |
| 3625 if (length < 1) return 0; | |
| 3626 | |
| 3627 // discard last character if it's the lead of a surrogate pair | |
| 3628 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; | |
| 3629 } | |
| 3630 | |
| 3631 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
| 3632 { | |
| 3633 // only endian-swapping is required | |
| 3634 if (need_endian_swap_utf(encoding, get_wchar_encoding())) | |
| 3635 { | |
| 3636 convert_wchar_endian_swap(r_char, data, length); | |
| 3637 | |
| 3638 return length * sizeof(char_t); | |
| 3639 } | |
| 3640 | |
| 3641 // convert to utf8 | |
| 3642 if (encoding == encoding_utf8) | |
| 3643 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); | |
| 3644 | |
| 3645 // convert to utf16 | |
| 3646 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
| 3647 { | |
| 3648 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 3649 | |
| 3650 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); | |
| 3651 } | |
| 3652 | |
| 3653 // convert to utf32 | |
| 3654 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
| 3655 { | |
| 3656 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 3657 | |
| 3658 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); | |
| 3659 } | |
| 3660 | |
| 3661 // convert to latin1 | |
| 3662 if (encoding == encoding_latin1) | |
| 3663 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); | |
| 3664 | |
| 3665 assert(false && "Invalid encoding"); // unreachable | |
| 3666 return 0; | |
| 3667 } | |
| 3668 #else | |
| 3669 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) | |
| 3670 { | |
| 3671 if (length < 5) return 0; | |
| 3672 | |
| 3673 for (size_t i = 1; i <= 4; ++i) | |
| 3674 { | |
| 3675 uint8_t ch = static_cast<uint8_t>(data[length - i]); | |
| 3676 | |
| 3677 // either a standalone character or a leading one | |
| 3678 if ((ch & 0xc0) != 0x80) return length - i; | |
| 3679 } | |
| 3680 | |
| 3681 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk | |
| 3682 return length; | |
| 3683 } | |
| 3684 | |
| 3685 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
| 3686 { | |
| 3687 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
| 3688 { | |
| 3689 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
| 3690 | |
| 3691 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); | |
| 3692 } | |
| 3693 | |
| 3694 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
| 3695 { | |
| 3696 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
| 3697 | |
| 3698 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); | |
| 3699 } | |
| 3700 | |
| 3701 if (encoding == encoding_latin1) | |
| 3702 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); | |
| 3703 | |
| 3704 assert(false && "Invalid encoding"); // unreachable | |
| 3705 return 0; | |
| 3706 } | |
| 3707 #endif | |
| 3708 | |
| 3709 class xml_buffered_writer | |
| 3710 { | |
| 3711 xml_buffered_writer(const xml_buffered_writer&); | |
| 3712 xml_buffered_writer& operator=(const xml_buffered_writer&); | |
| 3713 | |
| 3714 public: | |
| 3715 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) | |
| 3716 { | |
| 3717 PUGI__STATIC_ASSERT(bufcapacity >= 8); | |
| 3718 } | |
| 3719 | |
| 3720 size_t flush() | |
| 3721 { | |
| 3722 flush(buffer, bufsize); | |
| 3723 bufsize = 0; | |
| 3724 return 0; | |
| 3725 } | |
| 3726 | |
| 3727 void flush(const char_t* data, size_t size) | |
| 3728 { | |
| 3729 if (size == 0) return; | |
| 3730 | |
| 3731 // fast path, just write data | |
| 3732 if (encoding == get_write_native_encoding()) | |
| 3733 writer.write(data, size * sizeof(char_t)); | |
| 3734 else | |
| 3735 { | |
| 3736 // convert chunk | |
| 3737 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); | |
| 3738 assert(result <= sizeof(scratch)); | |
| 3739 | |
| 3740 // write data | |
| 3741 writer.write(scratch.data_u8, result); | |
| 3742 } | |
| 3743 } | |
| 3744 | |
| 3745 void write_direct(const char_t* data, size_t length) | |
| 3746 { | |
| 3747 // flush the remaining buffer contents | |
| 3748 flush(); | |
| 3749 | |
| 3750 // handle large chunks | |
| 3751 if (length > bufcapacity) | |
| 3752 { | |
| 3753 if (encoding == get_write_native_encoding()) | |
| 3754 { | |
| 3755 // fast path, can just write data chunk | |
| 3756 writer.write(data, length * sizeof(char_t)); | |
| 3757 return; | |
| 3758 } | |
| 3759 | |
| 3760 // need to convert in suitable chunks | |
| 3761 while (length > bufcapacity) | |
| 3762 { | |
| 3763 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer | |
| 3764 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) | |
| 3765 size_t chunk_size = get_valid_length(data, bufcapacity); | |
| 3766 assert(chunk_size); | |
| 3767 | |
| 3768 // convert chunk and write | |
| 3769 flush(data, chunk_size); | |
| 3770 | |
| 3771 // iterate | |
| 3772 data += chunk_size; | |
| 3773 length -= chunk_size; | |
| 3774 } | |
| 3775 | |
| 3776 // small tail is copied below | |
| 3777 bufsize = 0; | |
| 3778 } | |
| 3779 | |
| 3780 memcpy(buffer + bufsize, data, length * sizeof(char_t)); | |
| 3781 bufsize += length; | |
| 3782 } | |
| 3783 | |
| 3784 void write_buffer(const char_t* data, size_t length) | |
| 3785 { | |
| 3786 size_t offset = bufsize; | |
| 3787 | |
| 3788 if (offset + length <= bufcapacity) | |
| 3789 { | |
| 3790 memcpy(buffer + offset, data, length * sizeof(char_t)); | |
| 3791 bufsize = offset + length; | |
| 3792 } | |
| 3793 else | |
| 3794 { | |
| 3795 write_direct(data, length); | |
| 3796 } | |
| 3797 } | |
| 3798 | |
| 3799 void write_string(const char_t* data) | |
| 3800 { | |
| 3801 // write the part of the string that fits in the buffer | |
| 3802 size_t offset = bufsize; | |
| 3803 | |
| 3804 while (*data && offset < bufcapacity) | |
| 3805 buffer[offset++] = *data++; | |
| 3806 | |
| 3807 // write the rest | |
| 3808 if (offset < bufcapacity) | |
| 3809 { | |
| 3810 bufsize = offset; | |
| 3811 } | |
| 3812 else | |
| 3813 { | |
| 3814 // backtrack a bit if we have split the codepoint | |
| 3815 size_t length = offset - bufsize; | |
| 3816 size_t extra = length - get_valid_length(data - length, length); | |
| 3817 | |
| 3818 bufsize = offset - extra; | |
| 3819 | |
| 3820 write_direct(data - extra, strlength(data) + extra); | |
| 3821 } | |
| 3822 } | |
| 3823 | |
| 3824 void write(char_t d0) | |
| 3825 { | |
| 3826 size_t offset = bufsize; | |
| 3827 if (offset > bufcapacity - 1) offset = flush(); | |
| 3828 | |
| 3829 buffer[offset + 0] = d0; | |
| 3830 bufsize = offset + 1; | |
| 3831 } | |
| 3832 | |
| 3833 void write(char_t d0, char_t d1) | |
| 3834 { | |
| 3835 size_t offset = bufsize; | |
| 3836 if (offset > bufcapacity - 2) offset = flush(); | |
| 3837 | |
| 3838 buffer[offset + 0] = d0; | |
| 3839 buffer[offset + 1] = d1; | |
| 3840 bufsize = offset + 2; | |
| 3841 } | |
| 3842 | |
| 3843 void write(char_t d0, char_t d1, char_t d2) | |
| 3844 { | |
| 3845 size_t offset = bufsize; | |
| 3846 if (offset > bufcapacity - 3) offset = flush(); | |
| 3847 | |
| 3848 buffer[offset + 0] = d0; | |
| 3849 buffer[offset + 1] = d1; | |
| 3850 buffer[offset + 2] = d2; | |
| 3851 bufsize = offset + 3; | |
| 3852 } | |
| 3853 | |
| 3854 void write(char_t d0, char_t d1, char_t d2, char_t d3) | |
| 3855 { | |
| 3856 size_t offset = bufsize; | |
| 3857 if (offset > bufcapacity - 4) offset = flush(); | |
| 3858 | |
| 3859 buffer[offset + 0] = d0; | |
| 3860 buffer[offset + 1] = d1; | |
| 3861 buffer[offset + 2] = d2; | |
| 3862 buffer[offset + 3] = d3; | |
| 3863 bufsize = offset + 4; | |
| 3864 } | |
| 3865 | |
| 3866 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) | |
| 3867 { | |
| 3868 size_t offset = bufsize; | |
| 3869 if (offset > bufcapacity - 5) offset = flush(); | |
| 3870 | |
| 3871 buffer[offset + 0] = d0; | |
| 3872 buffer[offset + 1] = d1; | |
| 3873 buffer[offset + 2] = d2; | |
| 3874 buffer[offset + 3] = d3; | |
| 3875 buffer[offset + 4] = d4; | |
| 3876 bufsize = offset + 5; | |
| 3877 } | |
| 3878 | |
| 3879 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) | |
| 3880 { | |
| 3881 size_t offset = bufsize; | |
| 3882 if (offset > bufcapacity - 6) offset = flush(); | |
| 3883 | |
| 3884 buffer[offset + 0] = d0; | |
| 3885 buffer[offset + 1] = d1; | |
| 3886 buffer[offset + 2] = d2; | |
| 3887 buffer[offset + 3] = d3; | |
| 3888 buffer[offset + 4] = d4; | |
| 3889 buffer[offset + 5] = d5; | |
| 3890 bufsize = offset + 6; | |
| 3891 } | |
| 3892 | |
| 3893 // utf8 maximum expansion: x4 (-> utf32) | |
| 3894 // utf16 maximum expansion: x2 (-> utf32) | |
| 3895 // utf32 maximum expansion: x1 | |
| 3896 enum | |
| 3897 { | |
| 3898 bufcapacitybytes = | |
| 3899 #ifdef PUGIXML_MEMORY_OUTPUT_STACK | |
| 3900 PUGIXML_MEMORY_OUTPUT_STACK | |
| 3901 #else | |
| 3902 10240 | |
| 3903 #endif | |
| 3904 , | |
| 3905 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) | |
| 3906 }; | |
| 3907 | |
| 3908 char_t buffer[bufcapacity]; | |
| 3909 | |
| 3910 union | |
| 3911 { | |
| 3912 uint8_t data_u8[4 * bufcapacity]; | |
| 3913 uint16_t data_u16[2 * bufcapacity]; | |
| 3914 uint32_t data_u32[bufcapacity]; | |
| 3915 char_t data_char[bufcapacity]; | |
| 3916 } scratch; | |
| 3917 | |
| 3918 xml_writer& writer; | |
| 3919 size_t bufsize; | |
| 3920 xml_encoding encoding; | |
| 3921 }; | |
| 3922 | |
| 3923 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
| 3924 { | |
| 3925 while (*s) | |
| 3926 { | |
| 3927 const char_t* prev = s; | |
| 3928 | |
| 3929 // While *s is a usual symbol | |
| 3930 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); | |
| 3931 | |
| 3932 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
| 3933 | |
| 3934 switch (*s) | |
| 3935 { | |
| 3936 case 0: break; | |
| 3937 case '&': | |
| 3938 writer.write('&', 'a', 'm', 'p', ';'); | |
| 3939 ++s; | |
| 3940 break; | |
| 3941 case '<': | |
| 3942 writer.write('&', 'l', 't', ';'); | |
| 3943 ++s; | |
| 3944 break; | |
| 3945 case '>': | |
| 3946 writer.write('&', 'g', 't', ';'); | |
| 3947 ++s; | |
| 3948 break; | |
| 3949 case '"': | |
| 3950 if (flags & format_attribute_single_quote) | |
| 3951 writer.write('"'); | |
| 3952 else | |
| 3953 writer.write('&', 'q', 'u', 'o', 't', ';'); | |
| 3954 ++s; | |
| 3955 break; | |
| 3956 case '\'': | |
| 3957 if (flags & format_attribute_single_quote) | |
| 3958 writer.write('&', 'a', 'p', 'o', 's', ';'); | |
| 3959 else | |
| 3960 writer.write('\''); | |
| 3961 ++s; | |
| 3962 break; | |
| 3963 default: // s is not a usual symbol | |
| 3964 { | |
| 3965 unsigned int ch = static_cast<unsigned int>(*s++); | |
| 3966 assert(ch < 32); | |
| 3967 | |
| 3968 if (!(flags & format_skip_control_chars)) | |
| 3969 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); | |
| 3970 } | |
| 3971 } | |
| 3972 } | |
| 3973 } | |
| 3974 | |
| 3975 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
| 3976 { | |
| 3977 if (flags & format_no_escapes) | |
| 3978 writer.write_string(s); | |
| 3979 else | |
| 3980 text_output_escaped(writer, s, type, flags); | |
| 3981 } | |
| 3982 | |
| 3983 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) | |
| 3984 { | |
| 3985 do | |
| 3986 { | |
| 3987 writer.write('<', '!', '[', 'C', 'D'); | |
| 3988 writer.write('A', 'T', 'A', '['); | |
| 3989 | |
| 3990 const char_t* prev = s; | |
| 3991 | |
| 3992 // look for ]]> sequence - we can't output it as is since it terminates CDATA | |
| 3993 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; | |
| 3994 | |
| 3995 // skip ]] if we stopped at ]]>, > will go to the next CDATA section | |
| 3996 if (*s) s += 2; | |
| 3997 | |
| 3998 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
| 3999 | |
| 4000 writer.write(']', ']', '>'); | |
| 4001 } | |
| 4002 while (*s); | |
| 4003 } | |
| 4004 | |
| 4005 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) | |
| 4006 { | |
| 4007 switch (indent_length) | |
| 4008 { | |
| 4009 case 1: | |
| 4010 { | |
| 4011 for (unsigned int i = 0; i < depth; ++i) | |
| 4012 writer.write(indent[0]); | |
| 4013 break; | |
| 4014 } | |
| 4015 | |
| 4016 case 2: | |
| 4017 { | |
| 4018 for (unsigned int i = 0; i < depth; ++i) | |
| 4019 writer.write(indent[0], indent[1]); | |
| 4020 break; | |
| 4021 } | |
| 4022 | |
| 4023 case 3: | |
| 4024 { | |
| 4025 for (unsigned int i = 0; i < depth; ++i) | |
| 4026 writer.write(indent[0], indent[1], indent[2]); | |
| 4027 break; | |
| 4028 } | |
| 4029 | |
| 4030 case 4: | |
| 4031 { | |
| 4032 for (unsigned int i = 0; i < depth; ++i) | |
| 4033 writer.write(indent[0], indent[1], indent[2], indent[3]); | |
| 4034 break; | |
| 4035 } | |
| 4036 | |
| 4037 default: | |
| 4038 { | |
| 4039 for (unsigned int i = 0; i < depth; ++i) | |
| 4040 writer.write_buffer(indent, indent_length); | |
| 4041 } | |
| 4042 } | |
| 4043 } | |
| 4044 | |
| 4045 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) | |
| 4046 { | |
| 4047 writer.write('<', '!', '-', '-'); | |
| 4048 | |
| 4049 while (*s) | |
| 4050 { | |
| 4051 const char_t* prev = s; | |
| 4052 | |
| 4053 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body | |
| 4054 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; | |
| 4055 | |
| 4056 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
| 4057 | |
| 4058 if (*s) | |
| 4059 { | |
| 4060 assert(*s == '-'); | |
| 4061 | |
| 4062 writer.write('-', ' '); | |
| 4063 ++s; | |
| 4064 } | |
| 4065 } | |
| 4066 | |
| 4067 writer.write('-', '-', '>'); | |
| 4068 } | |
| 4069 | |
| 4070 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) | |
| 4071 { | |
| 4072 while (*s) | |
| 4073 { | |
| 4074 const char_t* prev = s; | |
| 4075 | |
| 4076 // look for ?> sequence - we can't output it since ?> terminates PI | |
| 4077 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; | |
| 4078 | |
| 4079 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
| 4080 | |
| 4081 if (*s) | |
| 4082 { | |
| 4083 assert(s[0] == '?' && s[1] == '>'); | |
| 4084 | |
| 4085 writer.write('?', ' ', '>'); | |
| 4086 s += 2; | |
| 4087 } | |
| 4088 } | |
| 4089 } | |
| 4090 | |
| 4091 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
| 4092 { | |
| 4093 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
| 4094 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"'; | |
| 4095 | |
| 4096 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
| 4097 { | |
| 4098 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) | |
| 4099 { | |
| 4100 writer.write('\n'); | |
| 4101 | |
| 4102 text_output_indent(writer, indent, indent_length, depth + 1); | |
| 4103 } | |
| 4104 else | |
| 4105 { | |
| 4106 writer.write(' '); | |
| 4107 } | |
| 4108 | |
| 4109 writer.write_string(a->name ? a->name + 0 : default_name); | |
| 4110 writer.write('=', enquotation_char); | |
| 4111 | |
| 4112 if (a->value) | |
| 4113 text_output(writer, a->value, ctx_special_attr, flags); | |
| 4114 | |
| 4115 writer.write(enquotation_char); | |
| 4116 } | |
| 4117 } | |
| 4118 | |
| 4119 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
| 4120 { | |
| 4121 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
| 4122 const char_t* name = node->name ? node->name + 0 : default_name; | |
| 4123 | |
| 4124 writer.write('<'); | |
| 4125 writer.write_string(name); | |
| 4126 | |
| 4127 if (node->first_attribute) | |
| 4128 node_output_attributes(writer, node, indent, indent_length, flags, depth); | |
| 4129 | |
| 4130 // element nodes can have value if parse_embed_pcdata was used | |
| 4131 if (!node->value) | |
| 4132 { | |
| 4133 if (!node->first_child) | |
| 4134 { | |
| 4135 if (flags & format_no_empty_element_tags) | |
| 4136 { | |
| 4137 writer.write('>', '<', '/'); | |
| 4138 writer.write_string(name); | |
| 4139 writer.write('>'); | |
| 4140 | |
| 4141 return false; | |
| 4142 } | |
| 4143 else | |
| 4144 { | |
| 4145 if ((flags & format_raw) == 0) | |
| 4146 writer.write(' '); | |
| 4147 | |
| 4148 writer.write('/', '>'); | |
| 4149 | |
| 4150 return false; | |
| 4151 } | |
| 4152 } | |
| 4153 else | |
| 4154 { | |
| 4155 writer.write('>'); | |
| 4156 | |
| 4157 return true; | |
| 4158 } | |
| 4159 } | |
| 4160 else | |
| 4161 { | |
| 4162 writer.write('>'); | |
| 4163 | |
| 4164 text_output(writer, node->value, ctx_special_pcdata, flags); | |
| 4165 | |
| 4166 if (!node->first_child) | |
| 4167 { | |
| 4168 writer.write('<', '/'); | |
| 4169 writer.write_string(name); | |
| 4170 writer.write('>'); | |
| 4171 | |
| 4172 return false; | |
| 4173 } | |
| 4174 else | |
| 4175 { | |
| 4176 return true; | |
| 4177 } | |
| 4178 } | |
| 4179 } | |
| 4180 | |
| 4181 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) | |
| 4182 { | |
| 4183 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
| 4184 const char_t* name = node->name ? node->name + 0 : default_name; | |
| 4185 | |
| 4186 writer.write('<', '/'); | |
| 4187 writer.write_string(name); | |
| 4188 writer.write('>'); | |
| 4189 } | |
| 4190 | |
| 4191 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) | |
| 4192 { | |
| 4193 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
| 4194 | |
| 4195 switch (PUGI__NODETYPE(node)) | |
| 4196 { | |
| 4197 case node_pcdata: | |
| 4198 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); | |
| 4199 break; | |
| 4200 | |
| 4201 case node_cdata: | |
| 4202 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
| 4203 break; | |
| 4204 | |
| 4205 case node_comment: | |
| 4206 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
| 4207 break; | |
| 4208 | |
| 4209 case node_pi: | |
| 4210 writer.write('<', '?'); | |
| 4211 writer.write_string(node->name ? node->name + 0 : default_name); | |
| 4212 | |
| 4213 if (node->value) | |
| 4214 { | |
| 4215 writer.write(' '); | |
| 4216 node_output_pi_value(writer, node->value); | |
| 4217 } | |
| 4218 | |
| 4219 writer.write('?', '>'); | |
| 4220 break; | |
| 4221 | |
| 4222 case node_declaration: | |
| 4223 writer.write('<', '?'); | |
| 4224 writer.write_string(node->name ? node->name + 0 : default_name); | |
| 4225 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); | |
| 4226 writer.write('?', '>'); | |
| 4227 break; | |
| 4228 | |
| 4229 case node_doctype: | |
| 4230 writer.write('<', '!', 'D', 'O', 'C'); | |
| 4231 writer.write('T', 'Y', 'P', 'E'); | |
| 4232 | |
| 4233 if (node->value) | |
| 4234 { | |
| 4235 writer.write(' '); | |
| 4236 writer.write_string(node->value); | |
| 4237 } | |
| 4238 | |
| 4239 writer.write('>'); | |
| 4240 break; | |
| 4241 | |
| 4242 default: | |
| 4243 assert(false && "Invalid node type"); // unreachable | |
| 4244 } | |
| 4245 } | |
| 4246 | |
| 4247 enum indent_flags_t | |
| 4248 { | |
| 4249 indent_newline = 1, | |
| 4250 indent_indent = 2 | |
| 4251 }; | |
| 4252 | |
| 4253 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) | |
| 4254 { | |
| 4255 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; | |
| 4256 unsigned int indent_flags = indent_indent; | |
| 4257 | |
| 4258 xml_node_struct* node = root; | |
| 4259 | |
| 4260 do | |
| 4261 { | |
| 4262 assert(node); | |
| 4263 | |
| 4264 // begin writing current node | |
| 4265 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) | |
| 4266 { | |
| 4267 node_output_simple(writer, node, flags); | |
| 4268 | |
| 4269 indent_flags = 0; | |
| 4270 } | |
| 4271 else | |
| 4272 { | |
| 4273 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
| 4274 writer.write('\n'); | |
| 4275 | |
| 4276 if ((indent_flags & indent_indent) && indent_length) | |
| 4277 text_output_indent(writer, indent, indent_length, depth); | |
| 4278 | |
| 4279 if (PUGI__NODETYPE(node) == node_element) | |
| 4280 { | |
| 4281 indent_flags = indent_newline | indent_indent; | |
| 4282 | |
| 4283 if (node_output_start(writer, node, indent, indent_length, flags, depth)) | |
| 4284 { | |
| 4285 // element nodes can have value if parse_embed_pcdata was used | |
| 4286 if (node->value) | |
| 4287 indent_flags = 0; | |
| 4288 | |
| 4289 node = node->first_child; | |
| 4290 depth++; | |
| 4291 continue; | |
| 4292 } | |
| 4293 } | |
| 4294 else if (PUGI__NODETYPE(node) == node_document) | |
| 4295 { | |
| 4296 indent_flags = indent_indent; | |
| 4297 | |
| 4298 if (node->first_child) | |
| 4299 { | |
| 4300 node = node->first_child; | |
| 4301 continue; | |
| 4302 } | |
| 4303 } | |
| 4304 else | |
| 4305 { | |
| 4306 node_output_simple(writer, node, flags); | |
| 4307 | |
| 4308 indent_flags = indent_newline | indent_indent; | |
| 4309 } | |
| 4310 } | |
| 4311 | |
| 4312 // continue to the next node | |
| 4313 while (node != root) | |
| 4314 { | |
| 4315 if (node->next_sibling) | |
| 4316 { | |
| 4317 node = node->next_sibling; | |
| 4318 break; | |
| 4319 } | |
| 4320 | |
| 4321 node = node->parent; | |
| 4322 | |
| 4323 // write closing node | |
| 4324 if (PUGI__NODETYPE(node) == node_element) | |
| 4325 { | |
| 4326 depth--; | |
| 4327 | |
| 4328 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
| 4329 writer.write('\n'); | |
| 4330 | |
| 4331 if ((indent_flags & indent_indent) && indent_length) | |
| 4332 text_output_indent(writer, indent, indent_length, depth); | |
| 4333 | |
| 4334 node_output_end(writer, node); | |
| 4335 | |
| 4336 indent_flags = indent_newline | indent_indent; | |
| 4337 } | |
| 4338 } | |
| 4339 } | |
| 4340 while (node != root); | |
| 4341 | |
| 4342 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
| 4343 writer.write('\n'); | |
| 4344 } | |
| 4345 | |
| 4346 PUGI__FN bool has_declaration(xml_node_struct* node) | |
| 4347 { | |
| 4348 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) | |
| 4349 { | |
| 4350 xml_node_type type = PUGI__NODETYPE(child); | |
| 4351 | |
| 4352 if (type == node_declaration) return true; | |
| 4353 if (type == node_element) return false; | |
| 4354 } | |
| 4355 | |
| 4356 return false; | |
| 4357 } | |
| 4358 | |
| 4359 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) | |
| 4360 { | |
| 4361 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
| 4362 if (a == attr) | |
| 4363 return true; | |
| 4364 | |
| 4365 return false; | |
| 4366 } | |
| 4367 | |
| 4368 PUGI__FN bool allow_insert_attribute(xml_node_type parent) | |
| 4369 { | |
| 4370 return parent == node_element || parent == node_declaration; | |
| 4371 } | |
| 4372 | |
| 4373 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) | |
| 4374 { | |
| 4375 if (parent != node_document && parent != node_element) return false; | |
| 4376 if (child == node_document || child == node_null) return false; | |
| 4377 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; | |
| 4378 | |
| 4379 return true; | |
| 4380 } | |
| 4381 | |
| 4382 PUGI__FN bool allow_move(xml_node parent, xml_node child) | |
| 4383 { | |
| 4384 // check that child can be a child of parent | |
| 4385 if (!allow_insert_child(parent.type(), child.type())) | |
| 4386 return false; | |
| 4387 | |
| 4388 // check that node is not moved between documents | |
| 4389 if (parent.root() != child.root()) | |
| 4390 return false; | |
| 4391 | |
| 4392 // check that new parent is not in the child subtree | |
| 4393 xml_node cur = parent; | |
| 4394 | |
| 4395 while (cur) | |
| 4396 { | |
| 4397 if (cur == child) | |
| 4398 return false; | |
| 4399 | |
| 4400 cur = cur.parent(); | |
| 4401 } | |
| 4402 | |
| 4403 return true; | |
| 4404 } | |
| 4405 | |
| 4406 template <typename String, typename Header> | |
| 4407 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) | |
| 4408 { | |
| 4409 assert(!dest && (header & header_mask) == 0); | |
| 4410 | |
| 4411 if (source) | |
| 4412 { | |
| 4413 if (alloc && (source_header & header_mask) == 0) | |
| 4414 { | |
| 4415 dest = source; | |
| 4416 | |
| 4417 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared | |
| 4418 header |= xml_memory_page_contents_shared_mask; | |
| 4419 source_header |= xml_memory_page_contents_shared_mask; | |
| 4420 } | |
| 4421 else | |
| 4422 strcpy_insitu(dest, header, header_mask, source, strlength(source)); | |
| 4423 } | |
| 4424 } | |
| 4425 | |
| 4426 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) | |
| 4427 { | |
| 4428 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); | |
| 4429 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); | |
| 4430 | |
| 4431 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) | |
| 4432 { | |
| 4433 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); | |
| 4434 | |
| 4435 if (da) | |
| 4436 { | |
| 4437 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
| 4438 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
| 4439 } | |
| 4440 } | |
| 4441 } | |
| 4442 | |
| 4443 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) | |
| 4444 { | |
| 4445 xml_allocator& alloc = get_allocator(dn); | |
| 4446 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; | |
| 4447 | |
| 4448 node_copy_contents(dn, sn, shared_alloc); | |
| 4449 | |
| 4450 xml_node_struct* dit = dn; | |
| 4451 xml_node_struct* sit = sn->first_child; | |
| 4452 | |
| 4453 while (sit && sit != sn) | |
| 4454 { | |
| 4455 // loop invariant: dit is inside the subtree rooted at dn | |
| 4456 assert(dit); | |
| 4457 | |
| 4458 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop | |
| 4459 if (sit != dn) | |
| 4460 { | |
| 4461 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); | |
| 4462 | |
| 4463 if (copy) | |
| 4464 { | |
| 4465 node_copy_contents(copy, sit, shared_alloc); | |
| 4466 | |
| 4467 if (sit->first_child) | |
| 4468 { | |
| 4469 dit = copy; | |
| 4470 sit = sit->first_child; | |
| 4471 continue; | |
| 4472 } | |
| 4473 } | |
| 4474 } | |
| 4475 | |
| 4476 // continue to the next node | |
| 4477 do | |
| 4478 { | |
| 4479 if (sit->next_sibling) | |
| 4480 { | |
| 4481 sit = sit->next_sibling; | |
| 4482 break; | |
| 4483 } | |
| 4484 | |
| 4485 sit = sit->parent; | |
| 4486 dit = dit->parent; | |
| 4487 | |
| 4488 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn | |
| 4489 assert(sit == sn || dit); | |
| 4490 } | |
| 4491 while (sit != sn); | |
| 4492 } | |
| 4493 | |
| 4494 assert(!sit || dit == dn->parent); | |
| 4495 } | |
| 4496 | |
| 4497 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) | |
| 4498 { | |
| 4499 xml_allocator& alloc = get_allocator(da); | |
| 4500 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; | |
| 4501 | |
| 4502 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
| 4503 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
| 4504 } | |
| 4505 | |
| 4506 inline bool is_text_node(xml_node_struct* node) | |
| 4507 { | |
| 4508 xml_node_type type = PUGI__NODETYPE(node); | |
| 4509 | |
| 4510 return type == node_pcdata || type == node_cdata; | |
| 4511 } | |
| 4512 | |
| 4513 // get value with conversion functions | |
| 4514 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) | |
| 4515 { | |
| 4516 U result = 0; | |
| 4517 const char_t* s = value; | |
| 4518 | |
| 4519 while (PUGI__IS_CHARTYPE(*s, ct_space)) | |
| 4520 s++; | |
| 4521 | |
| 4522 bool negative = (*s == '-'); | |
| 4523 | |
| 4524 s += (*s == '+' || *s == '-'); | |
| 4525 | |
| 4526 bool overflow = false; | |
| 4527 | |
| 4528 if (s[0] == '0' && (s[1] | ' ') == 'x') | |
| 4529 { | |
| 4530 s += 2; | |
| 4531 | |
| 4532 // since overflow detection relies on length of the sequence skip leading zeros | |
| 4533 while (*s == '0') | |
| 4534 s++; | |
| 4535 | |
| 4536 const char_t* start = s; | |
| 4537 | |
| 4538 for (;;) | |
| 4539 { | |
| 4540 if (static_cast<unsigned>(*s - '0') < 10) | |
| 4541 result = result * 16 + (*s - '0'); | |
| 4542 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) | |
| 4543 result = result * 16 + ((*s | ' ') - 'a' + 10); | |
| 4544 else | |
| 4545 break; | |
| 4546 | |
| 4547 s++; | |
| 4548 } | |
| 4549 | |
| 4550 size_t digits = static_cast<size_t>(s - start); | |
| 4551 | |
| 4552 overflow = digits > sizeof(U) * 2; | |
| 4553 } | |
| 4554 else | |
| 4555 { | |
| 4556 // since overflow detection relies on length of the sequence skip leading zeros | |
| 4557 while (*s == '0') | |
| 4558 s++; | |
| 4559 | |
| 4560 const char_t* start = s; | |
| 4561 | |
| 4562 for (;;) | |
| 4563 { | |
| 4564 if (static_cast<unsigned>(*s - '0') < 10) | |
| 4565 result = result * 10 + (*s - '0'); | |
| 4566 else | |
| 4567 break; | |
| 4568 | |
| 4569 s++; | |
| 4570 } | |
| 4571 | |
| 4572 size_t digits = static_cast<size_t>(s - start); | |
| 4573 | |
| 4574 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); | |
| 4575 | |
| 4576 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; | |
| 4577 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; | |
| 4578 const size_t high_bit = sizeof(U) * 8 - 1; | |
| 4579 | |
| 4580 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); | |
| 4581 } | |
| 4582 | |
| 4583 if (negative) | |
| 4584 { | |
| 4585 // Workaround for crayc++ CC-3059: Expected no overflow in routine. | |
| 4586 #ifdef _CRAYC | |
| 4587 return (overflow || result > ~minv + 1) ? minv : ~result + 1; | |
| 4588 #else | |
| 4589 return (overflow || result > 0 - minv) ? minv : 0 - result; | |
| 4590 #endif | |
| 4591 } | |
| 4592 else | |
| 4593 return (overflow || result > maxv) ? maxv : result; | |
| 4594 } | |
| 4595 | |
| 4596 PUGI__FN int get_value_int(const char_t* value) | |
| 4597 { | |
| 4598 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); | |
| 4599 } | |
| 4600 | |
| 4601 PUGI__FN unsigned int get_value_uint(const char_t* value) | |
| 4602 { | |
| 4603 return string_to_integer<unsigned int>(value, 0, UINT_MAX); | |
| 4604 } | |
| 4605 | |
| 4606 PUGI__FN double get_value_double(const char_t* value) | |
| 4607 { | |
| 4608 #ifdef PUGIXML_WCHAR_MODE | |
| 4609 return wcstod(value, 0); | |
| 4610 #else | |
| 4611 return strtod(value, 0); | |
| 4612 #endif | |
| 4613 } | |
| 4614 | |
| 4615 PUGI__FN float get_value_float(const char_t* value) | |
| 4616 { | |
| 4617 #ifdef PUGIXML_WCHAR_MODE | |
| 4618 return static_cast<float>(wcstod(value, 0)); | |
| 4619 #else | |
| 4620 return static_cast<float>(strtod(value, 0)); | |
| 4621 #endif | |
| 4622 } | |
| 4623 | |
| 4624 PUGI__FN bool get_value_bool(const char_t* value) | |
| 4625 { | |
| 4626 // only look at first char | |
| 4627 char_t first = *value; | |
| 4628 | |
| 4629 // 1*, t* (true), T* (True), y* (yes), Y* (YES) | |
| 4630 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); | |
| 4631 } | |
| 4632 | |
| 4633 #ifdef PUGIXML_HAS_LONG_LONG | |
| 4634 PUGI__FN long long get_value_llong(const char_t* value) | |
| 4635 { | |
| 4636 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); | |
| 4637 } | |
| 4638 | |
| 4639 PUGI__FN unsigned long long get_value_ullong(const char_t* value) | |
| 4640 { | |
| 4641 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); | |
| 4642 } | |
| 4643 #endif | |
| 4644 | |
| 4645 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) | |
| 4646 { | |
| 4647 char_t* result = end - 1; | |
| 4648 U rest = negative ? 0 - value : value; | |
| 4649 | |
| 4650 do | |
| 4651 { | |
| 4652 *result-- = static_cast<char_t>('0' + (rest % 10)); | |
| 4653 rest /= 10; | |
| 4654 } | |
| 4655 while (rest); | |
| 4656 | |
| 4657 assert(result >= begin); | |
| 4658 (void)begin; | |
| 4659 | |
| 4660 *result = '-'; | |
| 4661 | |
| 4662 return result + !negative; | |
| 4663 } | |
| 4664 | |
| 4665 // set value with conversion functions | |
| 4666 template <typename String, typename Header> | |
| 4667 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) | |
| 4668 { | |
| 4669 #ifdef PUGIXML_WCHAR_MODE | |
| 4670 char_t wbuf[128]; | |
| 4671 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); | |
| 4672 | |
| 4673 size_t offset = 0; | |
| 4674 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; | |
| 4675 | |
| 4676 return strcpy_insitu(dest, header, header_mask, wbuf, offset); | |
| 4677 #else | |
| 4678 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); | |
| 4679 #endif | |
| 4680 } | |
| 4681 | |
| 4682 template <typename U, typename String, typename Header> | |
| 4683 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) | |
| 4684 { | |
| 4685 char_t buf[64]; | |
| 4686 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); | |
| 4687 char_t* begin = integer_to_string(buf, end, value, negative); | |
| 4688 | |
| 4689 return strcpy_insitu(dest, header, header_mask, begin, end - begin); | |
| 4690 } | |
| 4691 | |
| 4692 template <typename String, typename Header> | |
| 4693 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) | |
| 4694 { | |
| 4695 char buf[128]; | |
| 4696 PUGI__SNPRINTF(buf, "%.*g", precision, double(value)); | |
| 4697 | |
| 4698 return set_value_ascii(dest, header, header_mask, buf); | |
| 4699 } | |
| 4700 | |
| 4701 template <typename String, typename Header> | |
| 4702 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) | |
| 4703 { | |
| 4704 char buf[128]; | |
| 4705 PUGI__SNPRINTF(buf, "%.*g", precision, value); | |
| 4706 | |
| 4707 return set_value_ascii(dest, header, header_mask, buf); | |
| 4708 } | |
| 4709 | |
| 4710 template <typename String, typename Header> | |
| 4711 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) | |
| 4712 { | |
| 4713 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); | |
| 4714 } | |
| 4715 | |
| 4716 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) | |
| 4717 { | |
| 4718 // check input buffer | |
| 4719 if (!contents && size) return make_parse_result(status_io_error); | |
| 4720 | |
| 4721 // get actual encoding | |
| 4722 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); | |
| 4723 | |
| 4724 // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it | |
| 4725 auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate); | |
| 4726 | |
| 4727 // get private buffer | |
| 4728 char_t* buffer = 0; | |
| 4729 size_t length = 0; | |
| 4730 | |
| 4731 // coverity[var_deref_model] | |
| 4732 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); | |
| 4733 | |
| 4734 // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it | |
| 4735 contents_guard.release(); | |
| 4736 | |
| 4737 // delete original buffer if we performed a conversion | |
| 4738 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); | |
| 4739 | |
| 4740 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself | |
| 4741 if (own || buffer != contents) *out_buffer = buffer; | |
| 4742 | |
| 4743 // store buffer for offset_debug | |
| 4744 doc->buffer = buffer; | |
| 4745 | |
| 4746 // parse | |
| 4747 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); | |
| 4748 | |
| 4749 // remember encoding | |
| 4750 res.encoding = buffer_encoding; | |
| 4751 | |
| 4752 return res; | |
| 4753 } | |
| 4754 | |
| 4755 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick | |
| 4756 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) | |
| 4757 { | |
| 4758 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
| 4759 // there are 64-bit versions of fseek/ftell, let's use them | |
| 4760 typedef __int64 length_type; | |
| 4761 | |
| 4762 _fseeki64(file, 0, SEEK_END); | |
| 4763 length_type length = _ftelli64(file); | |
| 4764 _fseeki64(file, 0, SEEK_SET); | |
| 4765 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) | |
| 4766 // there are 64-bit versions of fseek/ftell, let's use them | |
| 4767 typedef off64_t length_type; | |
| 4768 | |
| 4769 fseeko64(file, 0, SEEK_END); | |
| 4770 length_type length = ftello64(file); | |
| 4771 fseeko64(file, 0, SEEK_SET); | |
| 4772 #else | |
| 4773 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. | |
| 4774 typedef long length_type; | |
| 4775 | |
| 4776 fseek(file, 0, SEEK_END); | |
| 4777 length_type length = ftell(file); | |
| 4778 fseek(file, 0, SEEK_SET); | |
| 4779 #endif | |
| 4780 | |
| 4781 // check for I/O errors | |
| 4782 if (length < 0) return status_io_error; | |
| 4783 | |
| 4784 // check for overflow | |
| 4785 size_t result = static_cast<size_t>(length); | |
| 4786 | |
| 4787 if (static_cast<length_type>(result) != length) return status_out_of_memory; | |
| 4788 | |
| 4789 // finalize | |
| 4790 out_result = result; | |
| 4791 | |
| 4792 return status_ok; | |
| 4793 } | |
| 4794 | |
| 4795 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size | |
| 4796 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) | |
| 4797 { | |
| 4798 // We only need to zero-terminate if encoding conversion does not do it for us | |
| 4799 #ifdef PUGIXML_WCHAR_MODE | |
| 4800 xml_encoding wchar_encoding = get_wchar_encoding(); | |
| 4801 | |
| 4802 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) | |
| 4803 { | |
| 4804 size_t length = size / sizeof(char_t); | |
| 4805 | |
| 4806 static_cast<char_t*>(buffer)[length] = 0; | |
| 4807 return (length + 1) * sizeof(char_t); | |
| 4808 } | |
| 4809 #else | |
| 4810 if (encoding == encoding_utf8) | |
| 4811 { | |
| 4812 static_cast<char*>(buffer)[size] = 0; | |
| 4813 return size + 1; | |
| 4814 } | |
| 4815 #endif | |
| 4816 | |
| 4817 return size; | |
| 4818 } | |
| 4819 | |
| 4820 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
| 4821 { | |
| 4822 if (!file) return make_parse_result(status_file_not_found); | |
| 4823 | |
| 4824 // get file size (can result in I/O errors) | |
| 4825 size_t size = 0; | |
| 4826 xml_parse_status size_status = get_file_size(file, size); | |
| 4827 if (size_status != status_ok) return make_parse_result(size_status); | |
| 4828 | |
| 4829 size_t max_suffix_size = sizeof(char_t); | |
| 4830 | |
| 4831 // allocate buffer for the whole file | |
| 4832 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); | |
| 4833 if (!contents) return make_parse_result(status_out_of_memory); | |
| 4834 | |
| 4835 // read file in memory | |
| 4836 size_t read_size = fread(contents, 1, size, file); | |
| 4837 | |
| 4838 if (read_size != size) | |
| 4839 { | |
| 4840 xml_memory::deallocate(contents); | |
| 4841 return make_parse_result(status_io_error); | |
| 4842 } | |
| 4843 | |
| 4844 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); | |
| 4845 | |
| 4846 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
| 4847 } | |
| 4848 | |
| 4849 PUGI__FN void close_file(FILE* file) | |
| 4850 { | |
| 4851 fclose(file); | |
| 4852 } | |
| 4853 | |
| 4854 #ifndef PUGIXML_NO_STL | |
| 4855 template <typename T> struct xml_stream_chunk | |
| 4856 { | |
| 4857 static xml_stream_chunk* create() | |
| 4858 { | |
| 4859 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); | |
| 4860 if (!memory) return 0; | |
| 4861 | |
| 4862 return new (memory) xml_stream_chunk(); | |
| 4863 } | |
| 4864 | |
| 4865 static void destroy(xml_stream_chunk* chunk) | |
| 4866 { | |
| 4867 // free chunk chain | |
| 4868 while (chunk) | |
| 4869 { | |
| 4870 xml_stream_chunk* next_ = chunk->next; | |
| 4871 | |
| 4872 xml_memory::deallocate(chunk); | |
| 4873 | |
| 4874 chunk = next_; | |
| 4875 } | |
| 4876 } | |
| 4877 | |
| 4878 xml_stream_chunk(): next(0), size(0) | |
| 4879 { | |
| 4880 } | |
| 4881 | |
| 4882 xml_stream_chunk* next; | |
| 4883 size_t size; | |
| 4884 | |
| 4885 T data[xml_memory_page_size / sizeof(T)]; | |
| 4886 }; | |
| 4887 | |
| 4888 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
| 4889 { | |
| 4890 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); | |
| 4891 | |
| 4892 // read file to a chunk list | |
| 4893 size_t total = 0; | |
| 4894 xml_stream_chunk<T>* last = 0; | |
| 4895 | |
| 4896 while (!stream.eof()) | |
| 4897 { | |
| 4898 // allocate new chunk | |
| 4899 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); | |
| 4900 if (!chunk) return status_out_of_memory; | |
| 4901 | |
| 4902 // append chunk to list | |
| 4903 if (last) last = last->next = chunk; | |
| 4904 else chunks.data = last = chunk; | |
| 4905 | |
| 4906 // read data to chunk | |
| 4907 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); | |
| 4908 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); | |
| 4909 | |
| 4910 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors | |
| 4911 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
| 4912 | |
| 4913 // guard against huge files (chunk size is small enough to make this overflow check work) | |
| 4914 if (total + chunk->size < total) return status_out_of_memory; | |
| 4915 total += chunk->size; | |
| 4916 } | |
| 4917 | |
| 4918 size_t max_suffix_size = sizeof(char_t); | |
| 4919 | |
| 4920 // copy chunk list to a contiguous buffer | |
| 4921 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); | |
| 4922 if (!buffer) return status_out_of_memory; | |
| 4923 | |
| 4924 char* write = buffer; | |
| 4925 | |
| 4926 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) | |
| 4927 { | |
| 4928 assert(write + chunk->size <= buffer + total); | |
| 4929 memcpy(write, chunk->data, chunk->size); | |
| 4930 write += chunk->size; | |
| 4931 } | |
| 4932 | |
| 4933 assert(write == buffer + total); | |
| 4934 | |
| 4935 // return buffer | |
| 4936 *out_buffer = buffer; | |
| 4937 *out_size = total; | |
| 4938 | |
| 4939 return status_ok; | |
| 4940 } | |
| 4941 | |
| 4942 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
| 4943 { | |
| 4944 // get length of remaining data in stream | |
| 4945 typename std::basic_istream<T>::pos_type pos = stream.tellg(); | |
| 4946 stream.seekg(0, std::ios::end); | |
| 4947 std::streamoff length = stream.tellg() - pos; | |
| 4948 stream.seekg(pos); | |
| 4949 | |
| 4950 if (stream.fail() || pos < 0) return status_io_error; | |
| 4951 | |
| 4952 // guard against huge files | |
| 4953 size_t read_length = static_cast<size_t>(length); | |
| 4954 | |
| 4955 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; | |
| 4956 | |
| 4957 size_t max_suffix_size = sizeof(char_t); | |
| 4958 | |
| 4959 // read stream data into memory (guard against stream exceptions with buffer holder) | |
| 4960 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); | |
| 4961 if (!buffer.data) return status_out_of_memory; | |
| 4962 | |
| 4963 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); | |
| 4964 | |
| 4965 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors | |
| 4966 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
| 4967 | |
| 4968 // return buffer | |
| 4969 size_t actual_length = static_cast<size_t>(stream.gcount()); | |
| 4970 assert(actual_length <= read_length); | |
| 4971 | |
| 4972 *out_buffer = buffer.release(); | |
| 4973 *out_size = actual_length * sizeof(T); | |
| 4974 | |
| 4975 return status_ok; | |
| 4976 } | |
| 4977 | |
| 4978 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
| 4979 { | |
| 4980 void* buffer = 0; | |
| 4981 size_t size = 0; | |
| 4982 xml_parse_status status = status_ok; | |
| 4983 | |
| 4984 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) | |
| 4985 if (stream.fail()) return make_parse_result(status_io_error); | |
| 4986 | |
| 4987 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) | |
| 4988 if (stream.tellg() < 0) | |
| 4989 { | |
| 4990 stream.clear(); // clear error flags that could be set by a failing tellg | |
| 4991 status = load_stream_data_noseek(stream, &buffer, &size); | |
| 4992 } | |
| 4993 else | |
| 4994 status = load_stream_data_seek(stream, &buffer, &size); | |
| 4995 | |
| 4996 if (status != status_ok) return make_parse_result(status); | |
| 4997 | |
| 4998 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); | |
| 4999 | |
| 5000 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
| 5001 } | |
| 5002 #endif | |
| 5003 | |
| 5004 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) | |
| 5005 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
| 5006 { | |
| 5007 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
| 5008 FILE* file = 0; | |
| 5009 return _wfopen_s(&file, path, mode) == 0 ? file : 0; | |
| 5010 #else | |
| 5011 return _wfopen(path, mode); | |
| 5012 #endif | |
| 5013 } | |
| 5014 #else | |
| 5015 PUGI__FN char* convert_path_heap(const wchar_t* str) | |
| 5016 { | |
| 5017 assert(str); | |
| 5018 | |
| 5019 // first pass: get length in utf8 characters | |
| 5020 size_t length = strlength_wide(str); | |
| 5021 size_t size = as_utf8_begin(str, length); | |
| 5022 | |
| 5023 // allocate resulting string | |
| 5024 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); | |
| 5025 if (!result) return 0; | |
| 5026 | |
| 5027 // second pass: convert to utf8 | |
| 5028 as_utf8_end(result, size, str, length); | |
| 5029 | |
| 5030 // zero-terminate | |
| 5031 result[size] = 0; | |
| 5032 | |
| 5033 return result; | |
| 5034 } | |
| 5035 | |
| 5036 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
| 5037 { | |
| 5038 // there is no standard function to open wide paths, so our best bet is to try utf8 path | |
| 5039 char* path_utf8 = convert_path_heap(path); | |
| 5040 if (!path_utf8) return 0; | |
| 5041 | |
| 5042 // convert mode to ASCII (we mirror _wfopen interface) | |
| 5043 char mode_ascii[4] = {0}; | |
| 5044 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); | |
| 5045 | |
| 5046 // try to open the utf8 path | |
| 5047 FILE* result = fopen(path_utf8, mode_ascii); | |
| 5048 | |
| 5049 // free dummy buffer | |
| 5050 xml_memory::deallocate(path_utf8); | |
| 5051 | |
| 5052 return result; | |
| 5053 } | |
| 5054 #endif | |
| 5055 | |
| 5056 PUGI__FN FILE* open_file(const char* path, const char* mode) | |
| 5057 { | |
| 5058 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
| 5059 FILE* file = 0; | |
| 5060 return fopen_s(&file, path, mode) == 0 ? file : 0; | |
| 5061 #else | |
| 5062 return fopen(path, mode); | |
| 5063 #endif | |
| 5064 } | |
| 5065 | |
| 5066 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) | |
| 5067 { | |
| 5068 if (!file) return false; | |
| 5069 | |
| 5070 xml_writer_file writer(file); | |
| 5071 doc.save(writer, indent, flags, encoding); | |
| 5072 | |
| 5073 return fflush(file) == 0 && ferror(file) == 0; | |
| 5074 } | |
| 5075 | |
| 5076 struct name_null_sentry | |
| 5077 { | |
| 5078 xml_node_struct* node; | |
| 5079 char_t* name; | |
| 5080 | |
| 5081 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) | |
| 5082 { | |
| 5083 node->name = 0; | |
| 5084 } | |
| 5085 | |
| 5086 ~name_null_sentry() | |
| 5087 { | |
| 5088 node->name = name; | |
| 5089 } | |
| 5090 }; | |
| 5091 PUGI__NS_END | |
| 5092 | |
| 5093 namespace pugi | |
| 5094 { | |
| 5095 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) | |
| 5096 { | |
| 5097 } | |
| 5098 | |
| 5099 PUGI__FN void xml_writer_file::write(const void* data, size_t size) | |
| 5100 { | |
| 5101 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); | |
| 5102 (void)!result; // unfortunately we can't do proper error handling here | |
| 5103 } | |
| 5104 | |
| 5105 #ifndef PUGIXML_NO_STL | |
| 5106 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) | |
| 5107 { | |
| 5108 } | |
| 5109 | |
| 5110 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) | |
| 5111 { | |
| 5112 } | |
| 5113 | |
| 5114 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) | |
| 5115 { | |
| 5116 if (narrow_stream) | |
| 5117 { | |
| 5118 assert(!wide_stream); | |
| 5119 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); | |
| 5120 } | |
| 5121 else | |
| 5122 { | |
| 5123 assert(wide_stream); | |
| 5124 assert(size % sizeof(wchar_t) == 0); | |
| 5125 | |
| 5126 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); | |
| 5127 } | |
| 5128 } | |
| 5129 #endif | |
| 5130 | |
| 5131 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) | |
| 5132 { | |
| 5133 } | |
| 5134 | |
| 5135 PUGI__FN xml_tree_walker::~xml_tree_walker() | |
| 5136 { | |
| 5137 } | |
| 5138 | |
| 5139 PUGI__FN int xml_tree_walker::depth() const | |
| 5140 { | |
| 5141 return _depth; | |
| 5142 } | |
| 5143 | |
| 5144 PUGI__FN bool xml_tree_walker::begin(xml_node&) | |
| 5145 { | |
| 5146 return true; | |
| 5147 } | |
| 5148 | |
| 5149 PUGI__FN bool xml_tree_walker::end(xml_node&) | |
| 5150 { | |
| 5151 return true; | |
| 5152 } | |
| 5153 | |
| 5154 PUGI__FN xml_attribute::xml_attribute(): _attr(0) | |
| 5155 { | |
| 5156 } | |
| 5157 | |
| 5158 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) | |
| 5159 { | |
| 5160 } | |
| 5161 | |
| 5162 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) | |
| 5163 { | |
| 5164 } | |
| 5165 | |
| 5166 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const | |
| 5167 { | |
| 5168 return _attr ? unspecified_bool_xml_attribute : 0; | |
| 5169 } | |
| 5170 | |
| 5171 PUGI__FN bool xml_attribute::operator!() const | |
| 5172 { | |
| 5173 return !_attr; | |
| 5174 } | |
| 5175 | |
| 5176 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const | |
| 5177 { | |
| 5178 return (_attr == r._attr); | |
| 5179 } | |
| 5180 | |
| 5181 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const | |
| 5182 { | |
| 5183 return (_attr != r._attr); | |
| 5184 } | |
| 5185 | |
| 5186 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const | |
| 5187 { | |
| 5188 return (_attr < r._attr); | |
| 5189 } | |
| 5190 | |
| 5191 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const | |
| 5192 { | |
| 5193 return (_attr > r._attr); | |
| 5194 } | |
| 5195 | |
| 5196 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const | |
| 5197 { | |
| 5198 return (_attr <= r._attr); | |
| 5199 } | |
| 5200 | |
| 5201 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const | |
| 5202 { | |
| 5203 return (_attr >= r._attr); | |
| 5204 } | |
| 5205 | |
| 5206 PUGI__FN xml_attribute xml_attribute::next_attribute() const | |
| 5207 { | |
| 5208 if (!_attr) return xml_attribute(); | |
| 5209 return xml_attribute(_attr->next_attribute); | |
| 5210 } | |
| 5211 | |
| 5212 PUGI__FN xml_attribute xml_attribute::previous_attribute() const | |
| 5213 { | |
| 5214 if (!_attr) return xml_attribute(); | |
| 5215 xml_attribute_struct* prev = _attr->prev_attribute_c; | |
| 5216 return prev->next_attribute ? xml_attribute(prev) : xml_attribute(); | |
| 5217 } | |
| 5218 | |
| 5219 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const | |
| 5220 { | |
| 5221 if (!_attr) return def; | |
| 5222 const char_t* value = _attr->value; | |
| 5223 return value ? value : def; | |
| 5224 } | |
| 5225 | |
| 5226 PUGI__FN int xml_attribute::as_int(int def) const | |
| 5227 { | |
| 5228 if (!_attr) return def; | |
| 5229 const char_t* value = _attr->value; | |
| 5230 return value ? impl::get_value_int(value) : def; | |
| 5231 } | |
| 5232 | |
| 5233 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const | |
| 5234 { | |
| 5235 if (!_attr) return def; | |
| 5236 const char_t* value = _attr->value; | |
| 5237 return value ? impl::get_value_uint(value) : def; | |
| 5238 } | |
| 5239 | |
| 5240 PUGI__FN double xml_attribute::as_double(double def) const | |
| 5241 { | |
| 5242 if (!_attr) return def; | |
| 5243 const char_t* value = _attr->value; | |
| 5244 return value ? impl::get_value_double(value) : def; | |
| 5245 } | |
| 5246 | |
| 5247 PUGI__FN float xml_attribute::as_float(float def) const | |
| 5248 { | |
| 5249 if (!_attr) return def; | |
| 5250 const char_t* value = _attr->value; | |
| 5251 return value ? impl::get_value_float(value) : def; | |
| 5252 } | |
| 5253 | |
| 5254 PUGI__FN bool xml_attribute::as_bool(bool def) const | |
| 5255 { | |
| 5256 if (!_attr) return def; | |
| 5257 const char_t* value = _attr->value; | |
| 5258 return value ? impl::get_value_bool(value) : def; | |
| 5259 } | |
| 5260 | |
| 5261 #ifdef PUGIXML_HAS_LONG_LONG | |
| 5262 PUGI__FN long long xml_attribute::as_llong(long long def) const | |
| 5263 { | |
| 5264 if (!_attr) return def; | |
| 5265 const char_t* value = _attr->value; | |
| 5266 return value ? impl::get_value_llong(value) : def; | |
| 5267 } | |
| 5268 | |
| 5269 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const | |
| 5270 { | |
| 5271 if (!_attr) return def; | |
| 5272 const char_t* value = _attr->value; | |
| 5273 return value ? impl::get_value_ullong(value) : def; | |
| 5274 } | |
| 5275 #endif | |
| 5276 | |
| 5277 PUGI__FN bool xml_attribute::empty() const | |
| 5278 { | |
| 5279 return !_attr; | |
| 5280 } | |
| 5281 | |
| 5282 PUGI__FN const char_t* xml_attribute::name() const | |
| 5283 { | |
| 5284 if (!_attr) return PUGIXML_TEXT(""); | |
| 5285 const char_t* name = _attr->name; | |
| 5286 return name ? name : PUGIXML_TEXT(""); | |
| 5287 } | |
| 5288 | |
| 5289 PUGI__FN const char_t* xml_attribute::value() const | |
| 5290 { | |
| 5291 if (!_attr) return PUGIXML_TEXT(""); | |
| 5292 const char_t* value = _attr->value; | |
| 5293 return value ? value : PUGIXML_TEXT(""); | |
| 5294 } | |
| 5295 | |
| 5296 PUGI__FN size_t xml_attribute::hash_value() const | |
| 5297 { | |
| 5298 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); | |
| 5299 } | |
| 5300 | |
| 5301 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const | |
| 5302 { | |
| 5303 return _attr; | |
| 5304 } | |
| 5305 | |
| 5306 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) | |
| 5307 { | |
| 5308 set_value(rhs); | |
| 5309 return *this; | |
| 5310 } | |
| 5311 | |
| 5312 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) | |
| 5313 { | |
| 5314 set_value(rhs); | |
| 5315 return *this; | |
| 5316 } | |
| 5317 | |
| 5318 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) | |
| 5319 { | |
| 5320 set_value(rhs); | |
| 5321 return *this; | |
| 5322 } | |
| 5323 | |
| 5324 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) | |
| 5325 { | |
| 5326 set_value(rhs); | |
| 5327 return *this; | |
| 5328 } | |
| 5329 | |
| 5330 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) | |
| 5331 { | |
| 5332 set_value(rhs); | |
| 5333 return *this; | |
| 5334 } | |
| 5335 | |
| 5336 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) | |
| 5337 { | |
| 5338 set_value(rhs); | |
| 5339 return *this; | |
| 5340 } | |
| 5341 | |
| 5342 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) | |
| 5343 { | |
| 5344 set_value(rhs); | |
| 5345 return *this; | |
| 5346 } | |
| 5347 | |
| 5348 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) | |
| 5349 { | |
| 5350 set_value(rhs); | |
| 5351 return *this; | |
| 5352 } | |
| 5353 | |
| 5354 #ifdef PUGIXML_HAS_LONG_LONG | |
| 5355 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) | |
| 5356 { | |
| 5357 set_value(rhs); | |
| 5358 return *this; | |
| 5359 } | |
| 5360 | |
| 5361 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) | |
| 5362 { | |
| 5363 set_value(rhs); | |
| 5364 return *this; | |
| 5365 } | |
| 5366 #endif | |
| 5367 | |
| 5368 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) | |
| 5369 { | |
| 5370 if (!_attr) return false; | |
| 5371 | |
| 5372 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
| 5373 } | |
| 5374 | |
| 5375 PUGI__FN bool xml_attribute::set_value(const char_t* rhs, size_t sz) | |
| 5376 { | |
| 5377 if (!_attr) return false; | |
| 5378 | |
| 5379 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, sz); | |
| 5380 } | |
| 5381 | |
| 5382 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) | |
| 5383 { | |
| 5384 if (!_attr) return false; | |
| 5385 | |
| 5386 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
| 5387 } | |
| 5388 | |
| 5389 PUGI__FN bool xml_attribute::set_value(int rhs) | |
| 5390 { | |
| 5391 if (!_attr) return false; | |
| 5392 | |
| 5393 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
| 5394 } | |
| 5395 | |
| 5396 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) | |
| 5397 { | |
| 5398 if (!_attr) return false; | |
| 5399 | |
| 5400 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
| 5401 } | |
| 5402 | |
| 5403 PUGI__FN bool xml_attribute::set_value(long rhs) | |
| 5404 { | |
| 5405 if (!_attr) return false; | |
| 5406 | |
| 5407 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
| 5408 } | |
| 5409 | |
| 5410 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) | |
| 5411 { | |
| 5412 if (!_attr) return false; | |
| 5413 | |
| 5414 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
| 5415 } | |
| 5416 | |
| 5417 PUGI__FN bool xml_attribute::set_value(double rhs) | |
| 5418 { | |
| 5419 if (!_attr) return false; | |
| 5420 | |
| 5421 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); | |
| 5422 } | |
| 5423 | |
| 5424 PUGI__FN bool xml_attribute::set_value(double rhs, int precision) | |
| 5425 { | |
| 5426 if (!_attr) return false; | |
| 5427 | |
| 5428 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
| 5429 } | |
| 5430 | |
| 5431 PUGI__FN bool xml_attribute::set_value(float rhs) | |
| 5432 { | |
| 5433 if (!_attr) return false; | |
| 5434 | |
| 5435 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); | |
| 5436 } | |
| 5437 | |
| 5438 PUGI__FN bool xml_attribute::set_value(float rhs, int precision) | |
| 5439 { | |
| 5440 if (!_attr) return false; | |
| 5441 | |
| 5442 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
| 5443 } | |
| 5444 | |
| 5445 PUGI__FN bool xml_attribute::set_value(bool rhs) | |
| 5446 { | |
| 5447 if (!_attr) return false; | |
| 5448 | |
| 5449 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); | |
| 5450 } | |
| 5451 | |
| 5452 #ifdef PUGIXML_HAS_LONG_LONG | |
| 5453 PUGI__FN bool xml_attribute::set_value(long long rhs) | |
| 5454 { | |
| 5455 if (!_attr) return false; | |
| 5456 | |
| 5457 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
| 5458 } | |
| 5459 | |
| 5460 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) | |
| 5461 { | |
| 5462 if (!_attr) return false; | |
| 5463 | |
| 5464 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
| 5465 } | |
| 5466 #endif | |
| 5467 | |
| 5468 #ifdef __BORLANDC__ | |
| 5469 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) | |
| 5470 { | |
| 5471 return (bool)lhs && rhs; | |
| 5472 } | |
| 5473 | |
| 5474 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) | |
| 5475 { | |
| 5476 return (bool)lhs || rhs; | |
| 5477 } | |
| 5478 #endif | |
| 5479 | |
| 5480 PUGI__FN xml_node::xml_node(): _root(0) | |
| 5481 { | |
| 5482 } | |
| 5483 | |
| 5484 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) | |
| 5485 { | |
| 5486 } | |
| 5487 | |
| 5488 PUGI__FN static void unspecified_bool_xml_node(xml_node***) | |
| 5489 { | |
| 5490 } | |
| 5491 | |
| 5492 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const | |
| 5493 { | |
| 5494 return _root ? unspecified_bool_xml_node : 0; | |
| 5495 } | |
| 5496 | |
| 5497 PUGI__FN bool xml_node::operator!() const | |
| 5498 { | |
| 5499 return !_root; | |
| 5500 } | |
| 5501 | |
| 5502 PUGI__FN xml_node::iterator xml_node::begin() const | |
| 5503 { | |
| 5504 return iterator(_root ? _root->first_child + 0 : 0, _root); | |
| 5505 } | |
| 5506 | |
| 5507 PUGI__FN xml_node::iterator xml_node::end() const | |
| 5508 { | |
| 5509 return iterator(0, _root); | |
| 5510 } | |
| 5511 | |
| 5512 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const | |
| 5513 { | |
| 5514 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); | |
| 5515 } | |
| 5516 | |
| 5517 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const | |
| 5518 { | |
| 5519 return attribute_iterator(0, _root); | |
| 5520 } | |
| 5521 | |
| 5522 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const | |
| 5523 { | |
| 5524 return xml_object_range<xml_node_iterator>(begin(), end()); | |
| 5525 } | |
| 5526 | |
| 5527 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const | |
| 5528 { | |
| 5529 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); | |
| 5530 } | |
| 5531 | |
| 5532 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const | |
| 5533 { | |
| 5534 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); | |
| 5535 } | |
| 5536 | |
| 5537 PUGI__FN bool xml_node::operator==(const xml_node& r) const | |
| 5538 { | |
| 5539 return (_root == r._root); | |
| 5540 } | |
| 5541 | |
| 5542 PUGI__FN bool xml_node::operator!=(const xml_node& r) const | |
| 5543 { | |
| 5544 return (_root != r._root); | |
| 5545 } | |
| 5546 | |
| 5547 PUGI__FN bool xml_node::operator<(const xml_node& r) const | |
| 5548 { | |
| 5549 return (_root < r._root); | |
| 5550 } | |
| 5551 | |
| 5552 PUGI__FN bool xml_node::operator>(const xml_node& r) const | |
| 5553 { | |
| 5554 return (_root > r._root); | |
| 5555 } | |
| 5556 | |
| 5557 PUGI__FN bool xml_node::operator<=(const xml_node& r) const | |
| 5558 { | |
| 5559 return (_root <= r._root); | |
| 5560 } | |
| 5561 | |
| 5562 PUGI__FN bool xml_node::operator>=(const xml_node& r) const | |
| 5563 { | |
| 5564 return (_root >= r._root); | |
| 5565 } | |
| 5566 | |
| 5567 PUGI__FN bool xml_node::empty() const | |
| 5568 { | |
| 5569 return !_root; | |
| 5570 } | |
| 5571 | |
| 5572 PUGI__FN const char_t* xml_node::name() const | |
| 5573 { | |
| 5574 if (!_root) return PUGIXML_TEXT(""); | |
| 5575 const char_t* name = _root->name; | |
| 5576 return name ? name : PUGIXML_TEXT(""); | |
| 5577 } | |
| 5578 | |
| 5579 PUGI__FN xml_node_type xml_node::type() const | |
| 5580 { | |
| 5581 return _root ? PUGI__NODETYPE(_root) : node_null; | |
| 5582 } | |
| 5583 | |
| 5584 PUGI__FN const char_t* xml_node::value() const | |
| 5585 { | |
| 5586 if (!_root) return PUGIXML_TEXT(""); | |
| 5587 const char_t* value = _root->value; | |
| 5588 return value ? value : PUGIXML_TEXT(""); | |
| 5589 } | |
| 5590 | |
| 5591 PUGI__FN xml_node xml_node::child(const char_t* name_) const | |
| 5592 { | |
| 5593 if (!_root) return xml_node(); | |
| 5594 | |
| 5595 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
| 5596 { | |
| 5597 const char_t* iname = i->name; | |
| 5598 if (iname && impl::strequal(name_, iname)) | |
| 5599 return xml_node(i); | |
| 5600 } | |
| 5601 | |
| 5602 return xml_node(); | |
| 5603 } | |
| 5604 | |
| 5605 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const | |
| 5606 { | |
| 5607 if (!_root) return xml_attribute(); | |
| 5608 | |
| 5609 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) | |
| 5610 { | |
| 5611 const char_t* iname = i->name; | |
| 5612 if (iname && impl::strequal(name_, iname)) | |
| 5613 return xml_attribute(i); | |
| 5614 } | |
| 5615 | |
| 5616 return xml_attribute(); | |
| 5617 } | |
| 5618 | |
| 5619 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const | |
| 5620 { | |
| 5621 if (!_root) return xml_node(); | |
| 5622 | |
| 5623 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) | |
| 5624 { | |
| 5625 const char_t* iname = i->name; | |
| 5626 if (iname && impl::strequal(name_, iname)) | |
| 5627 return xml_node(i); | |
| 5628 } | |
| 5629 | |
| 5630 return xml_node(); | |
| 5631 } | |
| 5632 | |
| 5633 PUGI__FN xml_node xml_node::next_sibling() const | |
| 5634 { | |
| 5635 return _root ? xml_node(_root->next_sibling) : xml_node(); | |
| 5636 } | |
| 5637 | |
| 5638 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const | |
| 5639 { | |
| 5640 if (!_root) return xml_node(); | |
| 5641 | |
| 5642 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) | |
| 5643 { | |
| 5644 const char_t* iname = i->name; | |
| 5645 if (iname && impl::strequal(name_, iname)) | |
| 5646 return xml_node(i); | |
| 5647 } | |
| 5648 | |
| 5649 return xml_node(); | |
| 5650 } | |
| 5651 | |
| 5652 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const | |
| 5653 { | |
| 5654 xml_attribute_struct* hint = hint_._attr; | |
| 5655 | |
| 5656 // if hint is not an attribute of node, behavior is not defined | |
| 5657 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); | |
| 5658 | |
| 5659 if (!_root) return xml_attribute(); | |
| 5660 | |
| 5661 // optimistically search from hint up until the end | |
| 5662 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) | |
| 5663 { | |
| 5664 const char_t* iname = i->name; | |
| 5665 if (iname && impl::strequal(name_, iname)) | |
| 5666 { | |
| 5667 // update hint to maximize efficiency of searching for consecutive attributes | |
| 5668 hint_._attr = i->next_attribute; | |
| 5669 | |
| 5670 return xml_attribute(i); | |
| 5671 } | |
| 5672 } | |
| 5673 | |
| 5674 // wrap around and search from the first attribute until the hint | |
| 5675 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails | |
| 5676 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) | |
| 5677 { | |
| 5678 const char_t* jname = j->name; | |
| 5679 if (jname && impl::strequal(name_, jname)) | |
| 5680 { | |
| 5681 // update hint to maximize efficiency of searching for consecutive attributes | |
| 5682 hint_._attr = j->next_attribute; | |
| 5683 | |
| 5684 return xml_attribute(j); | |
| 5685 } | |
| 5686 } | |
| 5687 | |
| 5688 return xml_attribute(); | |
| 5689 } | |
| 5690 | |
| 5691 PUGI__FN xml_node xml_node::previous_sibling() const | |
| 5692 { | |
| 5693 if (!_root) return xml_node(); | |
| 5694 xml_node_struct* prev = _root->prev_sibling_c; | |
| 5695 return prev->next_sibling ? xml_node(prev) : xml_node(); | |
| 5696 } | |
| 5697 | |
| 5698 PUGI__FN xml_node xml_node::parent() const | |
| 5699 { | |
| 5700 return _root ? xml_node(_root->parent) : xml_node(); | |
| 5701 } | |
| 5702 | |
| 5703 PUGI__FN xml_node xml_node::root() const | |
| 5704 { | |
| 5705 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); | |
| 5706 } | |
| 5707 | |
| 5708 PUGI__FN xml_text xml_node::text() const | |
| 5709 { | |
| 5710 return xml_text(_root); | |
| 5711 } | |
| 5712 | |
| 5713 PUGI__FN const char_t* xml_node::child_value() const | |
| 5714 { | |
| 5715 if (!_root) return PUGIXML_TEXT(""); | |
| 5716 | |
| 5717 // element nodes can have value if parse_embed_pcdata was used | |
| 5718 if (PUGI__NODETYPE(_root) == node_element && _root->value) | |
| 5719 return _root->value; | |
| 5720 | |
| 5721 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
| 5722 { | |
| 5723 const char_t* ivalue = i->value; | |
| 5724 if (impl::is_text_node(i) && ivalue) | |
| 5725 return ivalue; | |
| 5726 } | |
| 5727 | |
| 5728 return PUGIXML_TEXT(""); | |
| 5729 } | |
| 5730 | |
| 5731 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const | |
| 5732 { | |
| 5733 return child(name_).child_value(); | |
| 5734 } | |
| 5735 | |
| 5736 PUGI__FN xml_attribute xml_node::first_attribute() const | |
| 5737 { | |
| 5738 if (!_root) return xml_attribute(); | |
| 5739 return xml_attribute(_root->first_attribute); | |
| 5740 } | |
| 5741 | |
| 5742 PUGI__FN xml_attribute xml_node::last_attribute() const | |
| 5743 { | |
| 5744 if (!_root) return xml_attribute(); | |
| 5745 xml_attribute_struct* first = _root->first_attribute; | |
| 5746 return first ? xml_attribute(first->prev_attribute_c) : xml_attribute(); | |
| 5747 } | |
| 5748 | |
| 5749 PUGI__FN xml_node xml_node::first_child() const | |
| 5750 { | |
| 5751 if (!_root) return xml_node(); | |
| 5752 return xml_node(_root->first_child); | |
| 5753 } | |
| 5754 | |
| 5755 PUGI__FN xml_node xml_node::last_child() const | |
| 5756 { | |
| 5757 if (!_root) return xml_node(); | |
| 5758 xml_node_struct* first = _root->first_child; | |
| 5759 return first ? xml_node(first->prev_sibling_c) : xml_node(); | |
| 5760 } | |
| 5761 | |
| 5762 PUGI__FN bool xml_node::set_name(const char_t* rhs) | |
| 5763 { | |
| 5764 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
| 5765 | |
| 5766 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) | |
| 5767 return false; | |
| 5768 | |
| 5769 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
| 5770 } | |
| 5771 | |
| 5772 PUGI__FN bool xml_node::set_value(const char_t* rhs, size_t sz) | |
| 5773 { | |
| 5774 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
| 5775 | |
| 5776 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
| 5777 return false; | |
| 5778 | |
| 5779 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, sz); | |
| 5780 } | |
| 5781 | |
| 5782 PUGI__FN bool xml_node::set_value(const char_t* rhs) | |
| 5783 { | |
| 5784 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
| 5785 | |
| 5786 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
| 5787 return false; | |
| 5788 | |
| 5789 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
| 5790 } | |
| 5791 | |
| 5792 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) | |
| 5793 { | |
| 5794 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5795 | |
| 5796 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5797 if (!alloc.reserve()) return xml_attribute(); | |
| 5798 | |
| 5799 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5800 if (!a) return xml_attribute(); | |
| 5801 | |
| 5802 impl::append_attribute(a._attr, _root); | |
| 5803 | |
| 5804 a.set_name(name_); | |
| 5805 | |
| 5806 return a; | |
| 5807 } | |
| 5808 | |
| 5809 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) | |
| 5810 { | |
| 5811 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5812 | |
| 5813 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5814 if (!alloc.reserve()) return xml_attribute(); | |
| 5815 | |
| 5816 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5817 if (!a) return xml_attribute(); | |
| 5818 | |
| 5819 impl::prepend_attribute(a._attr, _root); | |
| 5820 | |
| 5821 a.set_name(name_); | |
| 5822 | |
| 5823 return a; | |
| 5824 } | |
| 5825 | |
| 5826 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) | |
| 5827 { | |
| 5828 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5829 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
| 5830 | |
| 5831 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5832 if (!alloc.reserve()) return xml_attribute(); | |
| 5833 | |
| 5834 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5835 if (!a) return xml_attribute(); | |
| 5836 | |
| 5837 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
| 5838 | |
| 5839 a.set_name(name_); | |
| 5840 | |
| 5841 return a; | |
| 5842 } | |
| 5843 | |
| 5844 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) | |
| 5845 { | |
| 5846 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5847 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
| 5848 | |
| 5849 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5850 if (!alloc.reserve()) return xml_attribute(); | |
| 5851 | |
| 5852 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5853 if (!a) return xml_attribute(); | |
| 5854 | |
| 5855 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
| 5856 | |
| 5857 a.set_name(name_); | |
| 5858 | |
| 5859 return a; | |
| 5860 } | |
| 5861 | |
| 5862 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) | |
| 5863 { | |
| 5864 if (!proto) return xml_attribute(); | |
| 5865 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5866 | |
| 5867 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5868 if (!alloc.reserve()) return xml_attribute(); | |
| 5869 | |
| 5870 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5871 if (!a) return xml_attribute(); | |
| 5872 | |
| 5873 impl::append_attribute(a._attr, _root); | |
| 5874 impl::node_copy_attribute(a._attr, proto._attr); | |
| 5875 | |
| 5876 return a; | |
| 5877 } | |
| 5878 | |
| 5879 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) | |
| 5880 { | |
| 5881 if (!proto) return xml_attribute(); | |
| 5882 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5883 | |
| 5884 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5885 if (!alloc.reserve()) return xml_attribute(); | |
| 5886 | |
| 5887 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5888 if (!a) return xml_attribute(); | |
| 5889 | |
| 5890 impl::prepend_attribute(a._attr, _root); | |
| 5891 impl::node_copy_attribute(a._attr, proto._attr); | |
| 5892 | |
| 5893 return a; | |
| 5894 } | |
| 5895 | |
| 5896 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) | |
| 5897 { | |
| 5898 if (!proto) return xml_attribute(); | |
| 5899 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5900 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
| 5901 | |
| 5902 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5903 if (!alloc.reserve()) return xml_attribute(); | |
| 5904 | |
| 5905 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5906 if (!a) return xml_attribute(); | |
| 5907 | |
| 5908 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
| 5909 impl::node_copy_attribute(a._attr, proto._attr); | |
| 5910 | |
| 5911 return a; | |
| 5912 } | |
| 5913 | |
| 5914 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) | |
| 5915 { | |
| 5916 if (!proto) return xml_attribute(); | |
| 5917 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
| 5918 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
| 5919 | |
| 5920 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5921 if (!alloc.reserve()) return xml_attribute(); | |
| 5922 | |
| 5923 xml_attribute a(impl::allocate_attribute(alloc)); | |
| 5924 if (!a) return xml_attribute(); | |
| 5925 | |
| 5926 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
| 5927 impl::node_copy_attribute(a._attr, proto._attr); | |
| 5928 | |
| 5929 return a; | |
| 5930 } | |
| 5931 | |
| 5932 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) | |
| 5933 { | |
| 5934 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 5935 | |
| 5936 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5937 if (!alloc.reserve()) return xml_node(); | |
| 5938 | |
| 5939 xml_node n(impl::allocate_node(alloc, type_)); | |
| 5940 if (!n) return xml_node(); | |
| 5941 | |
| 5942 impl::append_node(n._root, _root); | |
| 5943 | |
| 5944 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
| 5945 | |
| 5946 return n; | |
| 5947 } | |
| 5948 | |
| 5949 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) | |
| 5950 { | |
| 5951 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 5952 | |
| 5953 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5954 if (!alloc.reserve()) return xml_node(); | |
| 5955 | |
| 5956 xml_node n(impl::allocate_node(alloc, type_)); | |
| 5957 if (!n) return xml_node(); | |
| 5958 | |
| 5959 impl::prepend_node(n._root, _root); | |
| 5960 | |
| 5961 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
| 5962 | |
| 5963 return n; | |
| 5964 } | |
| 5965 | |
| 5966 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) | |
| 5967 { | |
| 5968 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 5969 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 5970 | |
| 5971 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5972 if (!alloc.reserve()) return xml_node(); | |
| 5973 | |
| 5974 xml_node n(impl::allocate_node(alloc, type_)); | |
| 5975 if (!n) return xml_node(); | |
| 5976 | |
| 5977 impl::insert_node_before(n._root, node._root); | |
| 5978 | |
| 5979 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
| 5980 | |
| 5981 return n; | |
| 5982 } | |
| 5983 | |
| 5984 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) | |
| 5985 { | |
| 5986 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 5987 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 5988 | |
| 5989 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 5990 if (!alloc.reserve()) return xml_node(); | |
| 5991 | |
| 5992 xml_node n(impl::allocate_node(alloc, type_)); | |
| 5993 if (!n) return xml_node(); | |
| 5994 | |
| 5995 impl::insert_node_after(n._root, node._root); | |
| 5996 | |
| 5997 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
| 5998 | |
| 5999 return n; | |
| 6000 } | |
| 6001 | |
| 6002 PUGI__FN xml_node xml_node::append_child(const char_t* name_) | |
| 6003 { | |
| 6004 xml_node result = append_child(node_element); | |
| 6005 | |
| 6006 result.set_name(name_); | |
| 6007 | |
| 6008 return result; | |
| 6009 } | |
| 6010 | |
| 6011 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) | |
| 6012 { | |
| 6013 xml_node result = prepend_child(node_element); | |
| 6014 | |
| 6015 result.set_name(name_); | |
| 6016 | |
| 6017 return result; | |
| 6018 } | |
| 6019 | |
| 6020 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) | |
| 6021 { | |
| 6022 xml_node result = insert_child_after(node_element, node); | |
| 6023 | |
| 6024 result.set_name(name_); | |
| 6025 | |
| 6026 return result; | |
| 6027 } | |
| 6028 | |
| 6029 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) | |
| 6030 { | |
| 6031 xml_node result = insert_child_before(node_element, node); | |
| 6032 | |
| 6033 result.set_name(name_); | |
| 6034 | |
| 6035 return result; | |
| 6036 } | |
| 6037 | |
| 6038 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) | |
| 6039 { | |
| 6040 xml_node_type type_ = proto.type(); | |
| 6041 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 6042 | |
| 6043 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6044 if (!alloc.reserve()) return xml_node(); | |
| 6045 | |
| 6046 xml_node n(impl::allocate_node(alloc, type_)); | |
| 6047 if (!n) return xml_node(); | |
| 6048 | |
| 6049 impl::append_node(n._root, _root); | |
| 6050 impl::node_copy_tree(n._root, proto._root); | |
| 6051 | |
| 6052 return n; | |
| 6053 } | |
| 6054 | |
| 6055 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) | |
| 6056 { | |
| 6057 xml_node_type type_ = proto.type(); | |
| 6058 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 6059 | |
| 6060 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6061 if (!alloc.reserve()) return xml_node(); | |
| 6062 | |
| 6063 xml_node n(impl::allocate_node(alloc, type_)); | |
| 6064 if (!n) return xml_node(); | |
| 6065 | |
| 6066 impl::prepend_node(n._root, _root); | |
| 6067 impl::node_copy_tree(n._root, proto._root); | |
| 6068 | |
| 6069 return n; | |
| 6070 } | |
| 6071 | |
| 6072 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) | |
| 6073 { | |
| 6074 xml_node_type type_ = proto.type(); | |
| 6075 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 6076 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 6077 | |
| 6078 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6079 if (!alloc.reserve()) return xml_node(); | |
| 6080 | |
| 6081 xml_node n(impl::allocate_node(alloc, type_)); | |
| 6082 if (!n) return xml_node(); | |
| 6083 | |
| 6084 impl::insert_node_after(n._root, node._root); | |
| 6085 impl::node_copy_tree(n._root, proto._root); | |
| 6086 | |
| 6087 return n; | |
| 6088 } | |
| 6089 | |
| 6090 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) | |
| 6091 { | |
| 6092 xml_node_type type_ = proto.type(); | |
| 6093 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
| 6094 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 6095 | |
| 6096 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6097 if (!alloc.reserve()) return xml_node(); | |
| 6098 | |
| 6099 xml_node n(impl::allocate_node(alloc, type_)); | |
| 6100 if (!n) return xml_node(); | |
| 6101 | |
| 6102 impl::insert_node_before(n._root, node._root); | |
| 6103 impl::node_copy_tree(n._root, proto._root); | |
| 6104 | |
| 6105 return n; | |
| 6106 } | |
| 6107 | |
| 6108 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) | |
| 6109 { | |
| 6110 if (!impl::allow_move(*this, moved)) return xml_node(); | |
| 6111 | |
| 6112 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6113 if (!alloc.reserve()) return xml_node(); | |
| 6114 | |
| 6115 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
| 6116 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
| 6117 | |
| 6118 impl::remove_node(moved._root); | |
| 6119 impl::append_node(moved._root, _root); | |
| 6120 | |
| 6121 return moved; | |
| 6122 } | |
| 6123 | |
| 6124 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) | |
| 6125 { | |
| 6126 if (!impl::allow_move(*this, moved)) return xml_node(); | |
| 6127 | |
| 6128 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6129 if (!alloc.reserve()) return xml_node(); | |
| 6130 | |
| 6131 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
| 6132 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
| 6133 | |
| 6134 impl::remove_node(moved._root); | |
| 6135 impl::prepend_node(moved._root, _root); | |
| 6136 | |
| 6137 return moved; | |
| 6138 } | |
| 6139 | |
| 6140 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) | |
| 6141 { | |
| 6142 if (!impl::allow_move(*this, moved)) return xml_node(); | |
| 6143 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 6144 if (moved._root == node._root) return xml_node(); | |
| 6145 | |
| 6146 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6147 if (!alloc.reserve()) return xml_node(); | |
| 6148 | |
| 6149 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
| 6150 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
| 6151 | |
| 6152 impl::remove_node(moved._root); | |
| 6153 impl::insert_node_after(moved._root, node._root); | |
| 6154 | |
| 6155 return moved; | |
| 6156 } | |
| 6157 | |
| 6158 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) | |
| 6159 { | |
| 6160 if (!impl::allow_move(*this, moved)) return xml_node(); | |
| 6161 if (!node._root || node._root->parent != _root) return xml_node(); | |
| 6162 if (moved._root == node._root) return xml_node(); | |
| 6163 | |
| 6164 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6165 if (!alloc.reserve()) return xml_node(); | |
| 6166 | |
| 6167 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
| 6168 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
| 6169 | |
| 6170 impl::remove_node(moved._root); | |
| 6171 impl::insert_node_before(moved._root, node._root); | |
| 6172 | |
| 6173 return moved; | |
| 6174 } | |
| 6175 | |
| 6176 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) | |
| 6177 { | |
| 6178 return remove_attribute(attribute(name_)); | |
| 6179 } | |
| 6180 | |
| 6181 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) | |
| 6182 { | |
| 6183 if (!_root || !a._attr) return false; | |
| 6184 if (!impl::is_attribute_of(a._attr, _root)) return false; | |
| 6185 | |
| 6186 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6187 if (!alloc.reserve()) return false; | |
| 6188 | |
| 6189 impl::remove_attribute(a._attr, _root); | |
| 6190 impl::destroy_attribute(a._attr, alloc); | |
| 6191 | |
| 6192 return true; | |
| 6193 } | |
| 6194 | |
| 6195 PUGI__FN bool xml_node::remove_attributes() | |
| 6196 { | |
| 6197 if (!_root) return false; | |
| 6198 | |
| 6199 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6200 if (!alloc.reserve()) return false; | |
| 6201 | |
| 6202 for (xml_attribute_struct* attr = _root->first_attribute; attr; ) | |
| 6203 { | |
| 6204 xml_attribute_struct* next = attr->next_attribute; | |
| 6205 | |
| 6206 impl::destroy_attribute(attr, alloc); | |
| 6207 | |
| 6208 attr = next; | |
| 6209 } | |
| 6210 | |
| 6211 _root->first_attribute = 0; | |
| 6212 | |
| 6213 return true; | |
| 6214 } | |
| 6215 | |
| 6216 PUGI__FN bool xml_node::remove_child(const char_t* name_) | |
| 6217 { | |
| 6218 return remove_child(child(name_)); | |
| 6219 } | |
| 6220 | |
| 6221 PUGI__FN bool xml_node::remove_child(const xml_node& n) | |
| 6222 { | |
| 6223 if (!_root || !n._root || n._root->parent != _root) return false; | |
| 6224 | |
| 6225 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6226 if (!alloc.reserve()) return false; | |
| 6227 | |
| 6228 impl::remove_node(n._root); | |
| 6229 impl::destroy_node(n._root, alloc); | |
| 6230 | |
| 6231 return true; | |
| 6232 } | |
| 6233 | |
| 6234 PUGI__FN bool xml_node::remove_children() | |
| 6235 { | |
| 6236 if (!_root) return false; | |
| 6237 | |
| 6238 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
| 6239 if (!alloc.reserve()) return false; | |
| 6240 | |
| 6241 for (xml_node_struct* cur = _root->first_child; cur; ) | |
| 6242 { | |
| 6243 xml_node_struct* next = cur->next_sibling; | |
| 6244 | |
| 6245 impl::destroy_node(cur, alloc); | |
| 6246 | |
| 6247 cur = next; | |
| 6248 } | |
| 6249 | |
| 6250 _root->first_child = 0; | |
| 6251 | |
| 6252 return true; | |
| 6253 } | |
| 6254 | |
| 6255 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
| 6256 { | |
| 6257 // append_buffer is only valid for elements/documents | |
| 6258 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); | |
| 6259 | |
| 6260 // get document node | |
| 6261 impl::xml_document_struct* doc = &impl::get_document(_root); | |
| 6262 | |
| 6263 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense | |
| 6264 doc->header |= impl::xml_memory_page_contents_shared_mask; | |
| 6265 | |
| 6266 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) | |
| 6267 impl::xml_memory_page* page = 0; | |
| 6268 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); | |
| 6269 (void)page; | |
| 6270 | |
| 6271 if (!extra) return impl::make_parse_result(status_out_of_memory); | |
| 6272 | |
| 6273 #ifdef PUGIXML_COMPACT | |
| 6274 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned | |
| 6275 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account | |
| 6276 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); | |
| 6277 #endif | |
| 6278 | |
| 6279 // add extra buffer to the list | |
| 6280 extra->buffer = 0; | |
| 6281 extra->next = doc->extra_buffers; | |
| 6282 doc->extra_buffers = extra; | |
| 6283 | |
| 6284 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level | |
| 6285 impl::name_null_sentry sentry(_root); | |
| 6286 | |
| 6287 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); | |
| 6288 } | |
| 6289 | |
| 6290 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const | |
| 6291 { | |
| 6292 if (!_root) return xml_node(); | |
| 6293 | |
| 6294 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
| 6295 { | |
| 6296 const char_t* iname = i->name; | |
| 6297 if (iname && impl::strequal(name_, iname)) | |
| 6298 { | |
| 6299 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
| 6300 { | |
| 6301 const char_t* aname = a->name; | |
| 6302 if (aname && impl::strequal(attr_name, aname)) | |
| 6303 { | |
| 6304 const char_t* avalue = a->value; | |
| 6305 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
| 6306 return xml_node(i); | |
| 6307 } | |
| 6308 } | |
| 6309 } | |
| 6310 } | |
| 6311 | |
| 6312 return xml_node(); | |
| 6313 } | |
| 6314 | |
| 6315 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const | |
| 6316 { | |
| 6317 if (!_root) return xml_node(); | |
| 6318 | |
| 6319 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
| 6320 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
| 6321 { | |
| 6322 const char_t* aname = a->name; | |
| 6323 if (aname && impl::strequal(attr_name, aname)) | |
| 6324 { | |
| 6325 const char_t* avalue = a->value; | |
| 6326 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
| 6327 return xml_node(i); | |
| 6328 } | |
| 6329 } | |
| 6330 | |
| 6331 return xml_node(); | |
| 6332 } | |
| 6333 | |
| 6334 #ifndef PUGIXML_NO_STL | |
| 6335 PUGI__FN string_t xml_node::path(char_t delimiter) const | |
| 6336 { | |
| 6337 if (!_root) return string_t(); | |
| 6338 | |
| 6339 size_t offset = 0; | |
| 6340 | |
| 6341 for (xml_node_struct* i = _root; i; i = i->parent) | |
| 6342 { | |
| 6343 const char_t* iname = i->name; | |
| 6344 offset += (i != _root); | |
| 6345 offset += iname ? impl::strlength(iname) : 0; | |
| 6346 } | |
| 6347 | |
| 6348 string_t result; | |
| 6349 result.resize(offset); | |
| 6350 | |
| 6351 for (xml_node_struct* j = _root; j; j = j->parent) | |
| 6352 { | |
| 6353 if (j != _root) | |
| 6354 result[--offset] = delimiter; | |
| 6355 | |
| 6356 const char_t* jname = j->name; | |
| 6357 if (jname) | |
| 6358 { | |
| 6359 size_t length = impl::strlength(jname); | |
| 6360 | |
| 6361 offset -= length; | |
| 6362 memcpy(&result[offset], jname, length * sizeof(char_t)); | |
| 6363 } | |
| 6364 } | |
| 6365 | |
| 6366 assert(offset == 0); | |
| 6367 | |
| 6368 return result; | |
| 6369 } | |
| 6370 #endif | |
| 6371 | |
| 6372 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const | |
| 6373 { | |
| 6374 xml_node context = path_[0] == delimiter ? root() : *this; | |
| 6375 | |
| 6376 if (!context._root) return xml_node(); | |
| 6377 | |
| 6378 const char_t* path_segment = path_; | |
| 6379 | |
| 6380 while (*path_segment == delimiter) ++path_segment; | |
| 6381 | |
| 6382 const char_t* path_segment_end = path_segment; | |
| 6383 | |
| 6384 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; | |
| 6385 | |
| 6386 if (path_segment == path_segment_end) return context; | |
| 6387 | |
| 6388 const char_t* next_segment = path_segment_end; | |
| 6389 | |
| 6390 while (*next_segment == delimiter) ++next_segment; | |
| 6391 | |
| 6392 if (*path_segment == '.' && path_segment + 1 == path_segment_end) | |
| 6393 return context.first_element_by_path(next_segment, delimiter); | |
| 6394 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) | |
| 6395 return context.parent().first_element_by_path(next_segment, delimiter); | |
| 6396 else | |
| 6397 { | |
| 6398 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) | |
| 6399 { | |
| 6400 const char_t* jname = j->name; | |
| 6401 if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment))) | |
| 6402 { | |
| 6403 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); | |
| 6404 | |
| 6405 if (subsearch) return subsearch; | |
| 6406 } | |
| 6407 } | |
| 6408 | |
| 6409 return xml_node(); | |
| 6410 } | |
| 6411 } | |
| 6412 | |
| 6413 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) | |
| 6414 { | |
| 6415 walker._depth = -1; | |
| 6416 | |
| 6417 xml_node arg_begin(_root); | |
| 6418 if (!walker.begin(arg_begin)) return false; | |
| 6419 | |
| 6420 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; | |
| 6421 | |
| 6422 if (cur) | |
| 6423 { | |
| 6424 ++walker._depth; | |
| 6425 | |
| 6426 do | |
| 6427 { | |
| 6428 xml_node arg_for_each(cur); | |
| 6429 if (!walker.for_each(arg_for_each)) | |
| 6430 return false; | |
| 6431 | |
| 6432 if (cur->first_child) | |
| 6433 { | |
| 6434 ++walker._depth; | |
| 6435 cur = cur->first_child; | |
| 6436 } | |
| 6437 else if (cur->next_sibling) | |
| 6438 cur = cur->next_sibling; | |
| 6439 else | |
| 6440 { | |
| 6441 while (!cur->next_sibling && cur != _root && cur->parent) | |
| 6442 { | |
| 6443 --walker._depth; | |
| 6444 cur = cur->parent; | |
| 6445 } | |
| 6446 | |
| 6447 if (cur != _root) | |
| 6448 cur = cur->next_sibling; | |
| 6449 } | |
| 6450 } | |
| 6451 while (cur && cur != _root); | |
| 6452 } | |
| 6453 | |
| 6454 assert(walker._depth == -1); | |
| 6455 | |
| 6456 xml_node arg_end(_root); | |
| 6457 return walker.end(arg_end); | |
| 6458 } | |
| 6459 | |
| 6460 PUGI__FN size_t xml_node::hash_value() const | |
| 6461 { | |
| 6462 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); | |
| 6463 } | |
| 6464 | |
| 6465 PUGI__FN xml_node_struct* xml_node::internal_object() const | |
| 6466 { | |
| 6467 return _root; | |
| 6468 } | |
| 6469 | |
| 6470 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
| 6471 { | |
| 6472 if (!_root) return; | |
| 6473 | |
| 6474 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
| 6475 | |
| 6476 impl::node_output(buffered_writer, _root, indent, flags, depth); | |
| 6477 | |
| 6478 buffered_writer.flush(); | |
| 6479 } | |
| 6480 | |
| 6481 #ifndef PUGIXML_NO_STL | |
| 6482 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
| 6483 { | |
| 6484 xml_writer_stream writer(stream); | |
| 6485 | |
| 6486 print(writer, indent, flags, encoding, depth); | |
| 6487 } | |
| 6488 | |
| 6489 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const | |
| 6490 { | |
| 6491 xml_writer_stream writer(stream); | |
| 6492 | |
| 6493 print(writer, indent, flags, encoding_wchar, depth); | |
| 6494 } | |
| 6495 #endif | |
| 6496 | |
| 6497 PUGI__FN ptrdiff_t xml_node::offset_debug() const | |
| 6498 { | |
| 6499 if (!_root) return -1; | |
| 6500 | |
| 6501 impl::xml_document_struct& doc = impl::get_document(_root); | |
| 6502 | |
| 6503 // we can determine the offset reliably only if there is exactly once parse buffer | |
| 6504 if (!doc.buffer || doc.extra_buffers) return -1; | |
| 6505 | |
| 6506 switch (type()) | |
| 6507 { | |
| 6508 case node_document: | |
| 6509 return 0; | |
| 6510 | |
| 6511 case node_element: | |
| 6512 case node_declaration: | |
| 6513 case node_pi: | |
| 6514 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; | |
| 6515 | |
| 6516 case node_pcdata: | |
| 6517 case node_cdata: | |
| 6518 case node_comment: | |
| 6519 case node_doctype: | |
| 6520 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; | |
| 6521 | |
| 6522 default: | |
| 6523 assert(false && "Invalid node type"); // unreachable | |
| 6524 return -1; | |
| 6525 } | |
| 6526 } | |
| 6527 | |
| 6528 #ifdef __BORLANDC__ | |
| 6529 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) | |
| 6530 { | |
| 6531 return (bool)lhs && rhs; | |
| 6532 } | |
| 6533 | |
| 6534 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) | |
| 6535 { | |
| 6536 return (bool)lhs || rhs; | |
| 6537 } | |
| 6538 #endif | |
| 6539 | |
| 6540 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) | |
| 6541 { | |
| 6542 } | |
| 6543 | |
| 6544 PUGI__FN xml_node_struct* xml_text::_data() const | |
| 6545 { | |
| 6546 if (!_root || impl::is_text_node(_root)) return _root; | |
| 6547 | |
| 6548 // element nodes can have value if parse_embed_pcdata was used | |
| 6549 if (PUGI__NODETYPE(_root) == node_element && _root->value) | |
| 6550 return _root; | |
| 6551 | |
| 6552 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) | |
| 6553 if (impl::is_text_node(node)) | |
| 6554 return node; | |
| 6555 | |
| 6556 return 0; | |
| 6557 } | |
| 6558 | |
| 6559 PUGI__FN xml_node_struct* xml_text::_data_new() | |
| 6560 { | |
| 6561 xml_node_struct* d = _data(); | |
| 6562 if (d) return d; | |
| 6563 | |
| 6564 return xml_node(_root).append_child(node_pcdata).internal_object(); | |
| 6565 } | |
| 6566 | |
| 6567 PUGI__FN xml_text::xml_text(): _root(0) | |
| 6568 { | |
| 6569 } | |
| 6570 | |
| 6571 PUGI__FN static void unspecified_bool_xml_text(xml_text***) | |
| 6572 { | |
| 6573 } | |
| 6574 | |
| 6575 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const | |
| 6576 { | |
| 6577 return _data() ? unspecified_bool_xml_text : 0; | |
| 6578 } | |
| 6579 | |
| 6580 PUGI__FN bool xml_text::operator!() const | |
| 6581 { | |
| 6582 return !_data(); | |
| 6583 } | |
| 6584 | |
| 6585 PUGI__FN bool xml_text::empty() const | |
| 6586 { | |
| 6587 return _data() == 0; | |
| 6588 } | |
| 6589 | |
| 6590 PUGI__FN const char_t* xml_text::get() const | |
| 6591 { | |
| 6592 xml_node_struct* d = _data(); | |
| 6593 if (!d) return PUGIXML_TEXT(""); | |
| 6594 const char_t* value = d->value; | |
| 6595 return value ? value : PUGIXML_TEXT(""); | |
| 6596 } | |
| 6597 | |
| 6598 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const | |
| 6599 { | |
| 6600 xml_node_struct* d = _data(); | |
| 6601 if (!d) return def; | |
| 6602 const char_t* value = d->value; | |
| 6603 return value ? value : def; | |
| 6604 } | |
| 6605 | |
| 6606 PUGI__FN int xml_text::as_int(int def) const | |
| 6607 { | |
| 6608 xml_node_struct* d = _data(); | |
| 6609 if (!d) return def; | |
| 6610 const char_t* value = d->value; | |
| 6611 return value ? impl::get_value_int(value) : def; | |
| 6612 } | |
| 6613 | |
| 6614 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const | |
| 6615 { | |
| 6616 xml_node_struct* d = _data(); | |
| 6617 if (!d) return def; | |
| 6618 const char_t* value = d->value; | |
| 6619 return value ? impl::get_value_uint(value) : def; | |
| 6620 } | |
| 6621 | |
| 6622 PUGI__FN double xml_text::as_double(double def) const | |
| 6623 { | |
| 6624 xml_node_struct* d = _data(); | |
| 6625 if (!d) return def; | |
| 6626 const char_t* value = d->value; | |
| 6627 return value ? impl::get_value_double(value) : def; | |
| 6628 } | |
| 6629 | |
| 6630 PUGI__FN float xml_text::as_float(float def) const | |
| 6631 { | |
| 6632 xml_node_struct* d = _data(); | |
| 6633 if (!d) return def; | |
| 6634 const char_t* value = d->value; | |
| 6635 return value ? impl::get_value_float(value) : def; | |
| 6636 } | |
| 6637 | |
| 6638 PUGI__FN bool xml_text::as_bool(bool def) const | |
| 6639 { | |
| 6640 xml_node_struct* d = _data(); | |
| 6641 if (!d) return def; | |
| 6642 const char_t* value = d->value; | |
| 6643 return value ? impl::get_value_bool(value) : def; | |
| 6644 } | |
| 6645 | |
| 6646 #ifdef PUGIXML_HAS_LONG_LONG | |
| 6647 PUGI__FN long long xml_text::as_llong(long long def) const | |
| 6648 { | |
| 6649 xml_node_struct* d = _data(); | |
| 6650 if (!d) return def; | |
| 6651 const char_t* value = d->value; | |
| 6652 return value ? impl::get_value_llong(value) : def; | |
| 6653 } | |
| 6654 | |
| 6655 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const | |
| 6656 { | |
| 6657 xml_node_struct* d = _data(); | |
| 6658 if (!d) return def; | |
| 6659 const char_t* value = d->value; | |
| 6660 return value ? impl::get_value_ullong(value) : def; | |
| 6661 } | |
| 6662 #endif | |
| 6663 | |
| 6664 PUGI__FN bool xml_text::set(const char_t* rhs, size_t sz) | |
| 6665 { | |
| 6666 xml_node_struct* dn = _data_new(); | |
| 6667 | |
| 6668 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, sz) : false; | |
| 6669 } | |
| 6670 | |
| 6671 PUGI__FN bool xml_text::set(const char_t* rhs) | |
| 6672 { | |
| 6673 xml_node_struct* dn = _data_new(); | |
| 6674 | |
| 6675 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; | |
| 6676 } | |
| 6677 | |
| 6678 PUGI__FN bool xml_text::set(int rhs) | |
| 6679 { | |
| 6680 xml_node_struct* dn = _data_new(); | |
| 6681 | |
| 6682 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
| 6683 } | |
| 6684 | |
| 6685 PUGI__FN bool xml_text::set(unsigned int rhs) | |
| 6686 { | |
| 6687 xml_node_struct* dn = _data_new(); | |
| 6688 | |
| 6689 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
| 6690 } | |
| 6691 | |
| 6692 PUGI__FN bool xml_text::set(long rhs) | |
| 6693 { | |
| 6694 xml_node_struct* dn = _data_new(); | |
| 6695 | |
| 6696 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
| 6697 } | |
| 6698 | |
| 6699 PUGI__FN bool xml_text::set(unsigned long rhs) | |
| 6700 { | |
| 6701 xml_node_struct* dn = _data_new(); | |
| 6702 | |
| 6703 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
| 6704 } | |
| 6705 | |
| 6706 PUGI__FN bool xml_text::set(float rhs) | |
| 6707 { | |
| 6708 xml_node_struct* dn = _data_new(); | |
| 6709 | |
| 6710 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; | |
| 6711 } | |
| 6712 | |
| 6713 PUGI__FN bool xml_text::set(float rhs, int precision) | |
| 6714 { | |
| 6715 xml_node_struct* dn = _data_new(); | |
| 6716 | |
| 6717 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
| 6718 } | |
| 6719 | |
| 6720 PUGI__FN bool xml_text::set(double rhs) | |
| 6721 { | |
| 6722 xml_node_struct* dn = _data_new(); | |
| 6723 | |
| 6724 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; | |
| 6725 } | |
| 6726 | |
| 6727 PUGI__FN bool xml_text::set(double rhs, int precision) | |
| 6728 { | |
| 6729 xml_node_struct* dn = _data_new(); | |
| 6730 | |
| 6731 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
| 6732 } | |
| 6733 | |
| 6734 PUGI__FN bool xml_text::set(bool rhs) | |
| 6735 { | |
| 6736 xml_node_struct* dn = _data_new(); | |
| 6737 | |
| 6738 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; | |
| 6739 } | |
| 6740 | |
| 6741 #ifdef PUGIXML_HAS_LONG_LONG | |
| 6742 PUGI__FN bool xml_text::set(long long rhs) | |
| 6743 { | |
| 6744 xml_node_struct* dn = _data_new(); | |
| 6745 | |
| 6746 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
| 6747 } | |
| 6748 | |
| 6749 PUGI__FN bool xml_text::set(unsigned long long rhs) | |
| 6750 { | |
| 6751 xml_node_struct* dn = _data_new(); | |
| 6752 | |
| 6753 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
| 6754 } | |
| 6755 #endif | |
| 6756 | |
| 6757 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) | |
| 6758 { | |
| 6759 set(rhs); | |
| 6760 return *this; | |
| 6761 } | |
| 6762 | |
| 6763 PUGI__FN xml_text& xml_text::operator=(int rhs) | |
| 6764 { | |
| 6765 set(rhs); | |
| 6766 return *this; | |
| 6767 } | |
| 6768 | |
| 6769 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) | |
| 6770 { | |
| 6771 set(rhs); | |
| 6772 return *this; | |
| 6773 } | |
| 6774 | |
| 6775 PUGI__FN xml_text& xml_text::operator=(long rhs) | |
| 6776 { | |
| 6777 set(rhs); | |
| 6778 return *this; | |
| 6779 } | |
| 6780 | |
| 6781 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) | |
| 6782 { | |
| 6783 set(rhs); | |
| 6784 return *this; | |
| 6785 } | |
| 6786 | |
| 6787 PUGI__FN xml_text& xml_text::operator=(double rhs) | |
| 6788 { | |
| 6789 set(rhs); | |
| 6790 return *this; | |
| 6791 } | |
| 6792 | |
| 6793 PUGI__FN xml_text& xml_text::operator=(float rhs) | |
| 6794 { | |
| 6795 set(rhs); | |
| 6796 return *this; | |
| 6797 } | |
| 6798 | |
| 6799 PUGI__FN xml_text& xml_text::operator=(bool rhs) | |
| 6800 { | |
| 6801 set(rhs); | |
| 6802 return *this; | |
| 6803 } | |
| 6804 | |
| 6805 #ifdef PUGIXML_HAS_LONG_LONG | |
| 6806 PUGI__FN xml_text& xml_text::operator=(long long rhs) | |
| 6807 { | |
| 6808 set(rhs); | |
| 6809 return *this; | |
| 6810 } | |
| 6811 | |
| 6812 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) | |
| 6813 { | |
| 6814 set(rhs); | |
| 6815 return *this; | |
| 6816 } | |
| 6817 #endif | |
| 6818 | |
| 6819 PUGI__FN xml_node xml_text::data() const | |
| 6820 { | |
| 6821 return xml_node(_data()); | |
| 6822 } | |
| 6823 | |
| 6824 #ifdef __BORLANDC__ | |
| 6825 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) | |
| 6826 { | |
| 6827 return (bool)lhs && rhs; | |
| 6828 } | |
| 6829 | |
| 6830 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) | |
| 6831 { | |
| 6832 return (bool)lhs || rhs; | |
| 6833 } | |
| 6834 #endif | |
| 6835 | |
| 6836 PUGI__FN xml_node_iterator::xml_node_iterator() | |
| 6837 { | |
| 6838 } | |
| 6839 | |
| 6840 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) | |
| 6841 { | |
| 6842 } | |
| 6843 | |
| 6844 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
| 6845 { | |
| 6846 } | |
| 6847 | |
| 6848 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const | |
| 6849 { | |
| 6850 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
| 6851 } | |
| 6852 | |
| 6853 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const | |
| 6854 { | |
| 6855 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
| 6856 } | |
| 6857 | |
| 6858 PUGI__FN xml_node& xml_node_iterator::operator*() const | |
| 6859 { | |
| 6860 assert(_wrap._root); | |
| 6861 return _wrap; | |
| 6862 } | |
| 6863 | |
| 6864 PUGI__FN xml_node* xml_node_iterator::operator->() const | |
| 6865 { | |
| 6866 assert(_wrap._root); | |
| 6867 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
| 6868 } | |
| 6869 | |
| 6870 PUGI__FN xml_node_iterator& xml_node_iterator::operator++() | |
| 6871 { | |
| 6872 assert(_wrap._root); | |
| 6873 _wrap._root = _wrap._root->next_sibling; | |
| 6874 return *this; | |
| 6875 } | |
| 6876 | |
| 6877 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) | |
| 6878 { | |
| 6879 xml_node_iterator temp = *this; | |
| 6880 ++*this; | |
| 6881 return temp; | |
| 6882 } | |
| 6883 | |
| 6884 PUGI__FN xml_node_iterator& xml_node_iterator::operator--() | |
| 6885 { | |
| 6886 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); | |
| 6887 return *this; | |
| 6888 } | |
| 6889 | |
| 6890 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) | |
| 6891 { | |
| 6892 xml_node_iterator temp = *this; | |
| 6893 --*this; | |
| 6894 return temp; | |
| 6895 } | |
| 6896 | |
| 6897 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() | |
| 6898 { | |
| 6899 } | |
| 6900 | |
| 6901 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) | |
| 6902 { | |
| 6903 } | |
| 6904 | |
| 6905 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
| 6906 { | |
| 6907 } | |
| 6908 | |
| 6909 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const | |
| 6910 { | |
| 6911 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; | |
| 6912 } | |
| 6913 | |
| 6914 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const | |
| 6915 { | |
| 6916 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; | |
| 6917 } | |
| 6918 | |
| 6919 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const | |
| 6920 { | |
| 6921 assert(_wrap._attr); | |
| 6922 return _wrap; | |
| 6923 } | |
| 6924 | |
| 6925 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const | |
| 6926 { | |
| 6927 assert(_wrap._attr); | |
| 6928 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround | |
| 6929 } | |
| 6930 | |
| 6931 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++() | |
| 6932 { | |
| 6933 assert(_wrap._attr); | |
| 6934 _wrap._attr = _wrap._attr->next_attribute; | |
| 6935 return *this; | |
| 6936 } | |
| 6937 | |
| 6938 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) | |
| 6939 { | |
| 6940 xml_attribute_iterator temp = *this; | |
| 6941 ++*this; | |
| 6942 return temp; | |
| 6943 } | |
| 6944 | |
| 6945 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--() | |
| 6946 { | |
| 6947 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); | |
| 6948 return *this; | |
| 6949 } | |
| 6950 | |
| 6951 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) | |
| 6952 { | |
| 6953 xml_attribute_iterator temp = *this; | |
| 6954 --*this; | |
| 6955 return temp; | |
| 6956 } | |
| 6957 | |
| 6958 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) | |
| 6959 { | |
| 6960 } | |
| 6961 | |
| 6962 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) | |
| 6963 { | |
| 6964 } | |
| 6965 | |
| 6966 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) | |
| 6967 { | |
| 6968 } | |
| 6969 | |
| 6970 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const | |
| 6971 { | |
| 6972 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
| 6973 } | |
| 6974 | |
| 6975 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const | |
| 6976 { | |
| 6977 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
| 6978 } | |
| 6979 | |
| 6980 PUGI__FN xml_node& xml_named_node_iterator::operator*() const | |
| 6981 { | |
| 6982 assert(_wrap._root); | |
| 6983 return _wrap; | |
| 6984 } | |
| 6985 | |
| 6986 PUGI__FN xml_node* xml_named_node_iterator::operator->() const | |
| 6987 { | |
| 6988 assert(_wrap._root); | |
| 6989 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
| 6990 } | |
| 6991 | |
| 6992 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++() | |
| 6993 { | |
| 6994 assert(_wrap._root); | |
| 6995 _wrap = _wrap.next_sibling(_name); | |
| 6996 return *this; | |
| 6997 } | |
| 6998 | |
| 6999 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) | |
| 7000 { | |
| 7001 xml_named_node_iterator temp = *this; | |
| 7002 ++*this; | |
| 7003 return temp; | |
| 7004 } | |
| 7005 | |
| 7006 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--() | |
| 7007 { | |
| 7008 if (_wrap._root) | |
| 7009 _wrap = _wrap.previous_sibling(_name); | |
| 7010 else | |
| 7011 { | |
| 7012 _wrap = _parent.last_child(); | |
| 7013 | |
| 7014 if (!impl::strequal(_wrap.name(), _name)) | |
| 7015 _wrap = _wrap.previous_sibling(_name); | |
| 7016 } | |
| 7017 | |
| 7018 return *this; | |
| 7019 } | |
| 7020 | |
| 7021 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) | |
| 7022 { | |
| 7023 xml_named_node_iterator temp = *this; | |
| 7024 --*this; | |
| 7025 return temp; | |
| 7026 } | |
| 7027 | |
| 7028 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) | |
| 7029 { | |
| 7030 } | |
| 7031 | |
| 7032 PUGI__FN xml_parse_result::operator bool() const | |
| 7033 { | |
| 7034 return status == status_ok; | |
| 7035 } | |
| 7036 | |
| 7037 PUGI__FN const char* xml_parse_result::description() const | |
| 7038 { | |
| 7039 switch (status) | |
| 7040 { | |
| 7041 case status_ok: return "No error"; | |
| 7042 | |
| 7043 case status_file_not_found: return "File was not found"; | |
| 7044 case status_io_error: return "Error reading from file/stream"; | |
| 7045 case status_out_of_memory: return "Could not allocate memory"; | |
| 7046 case status_internal_error: return "Internal error occurred"; | |
| 7047 | |
| 7048 case status_unrecognized_tag: return "Could not determine tag type"; | |
| 7049 | |
| 7050 case status_bad_pi: return "Error parsing document declaration/processing instruction"; | |
| 7051 case status_bad_comment: return "Error parsing comment"; | |
| 7052 case status_bad_cdata: return "Error parsing CDATA section"; | |
| 7053 case status_bad_doctype: return "Error parsing document type declaration"; | |
| 7054 case status_bad_pcdata: return "Error parsing PCDATA section"; | |
| 7055 case status_bad_start_element: return "Error parsing start element tag"; | |
| 7056 case status_bad_attribute: return "Error parsing element attribute"; | |
| 7057 case status_bad_end_element: return "Error parsing end element tag"; | |
| 7058 case status_end_element_mismatch: return "Start-end tags mismatch"; | |
| 7059 | |
| 7060 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; | |
| 7061 | |
| 7062 case status_no_document_element: return "No document element found"; | |
| 7063 | |
| 7064 default: return "Unknown error"; | |
| 7065 } | |
| 7066 } | |
| 7067 | |
| 7068 PUGI__FN xml_document::xml_document(): _buffer(0) | |
| 7069 { | |
| 7070 _create(); | |
| 7071 } | |
| 7072 | |
| 7073 PUGI__FN xml_document::~xml_document() | |
| 7074 { | |
| 7075 _destroy(); | |
| 7076 } | |
| 7077 | |
| 7078 #ifdef PUGIXML_HAS_MOVE | |
| 7079 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) | |
| 7080 { | |
| 7081 _create(); | |
| 7082 _move(rhs); | |
| 7083 } | |
| 7084 | |
| 7085 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
| 7086 { | |
| 7087 if (this == &rhs) return *this; | |
| 7088 | |
| 7089 _destroy(); | |
| 7090 _create(); | |
| 7091 _move(rhs); | |
| 7092 | |
| 7093 return *this; | |
| 7094 } | |
| 7095 #endif | |
| 7096 | |
| 7097 PUGI__FN void xml_document::reset() | |
| 7098 { | |
| 7099 _destroy(); | |
| 7100 _create(); | |
| 7101 } | |
| 7102 | |
| 7103 PUGI__FN void xml_document::reset(const xml_document& proto) | |
| 7104 { | |
| 7105 reset(); | |
| 7106 | |
| 7107 impl::node_copy_tree(_root, proto._root); | |
| 7108 } | |
| 7109 | |
| 7110 PUGI__FN void xml_document::_create() | |
| 7111 { | |
| 7112 assert(!_root); | |
| 7113 | |
| 7114 #ifdef PUGIXML_COMPACT | |
| 7115 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit | |
| 7116 const size_t page_offset = sizeof(void*); | |
| 7117 #else | |
| 7118 const size_t page_offset = 0; | |
| 7119 #endif | |
| 7120 | |
| 7121 // initialize sentinel page | |
| 7122 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); | |
| 7123 | |
| 7124 // prepare page structure | |
| 7125 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); | |
| 7126 assert(page); | |
| 7127 | |
| 7128 page->busy_size = impl::xml_memory_page_size; | |
| 7129 | |
| 7130 // setup first page marker | |
| 7131 #ifdef PUGIXML_COMPACT | |
| 7132 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
| 7133 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); | |
| 7134 *page->compact_page_marker = sizeof(impl::xml_memory_page); | |
| 7135 #endif | |
| 7136 | |
| 7137 // allocate new root | |
| 7138 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); | |
| 7139 _root->prev_sibling_c = _root; | |
| 7140 | |
| 7141 // setup sentinel page | |
| 7142 page->allocator = static_cast<impl::xml_document_struct*>(_root); | |
| 7143 | |
| 7144 // setup hash table pointer in allocator | |
| 7145 #ifdef PUGIXML_COMPACT | |
| 7146 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; | |
| 7147 #endif | |
| 7148 | |
| 7149 // verify the document allocation | |
| 7150 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); | |
| 7151 } | |
| 7152 | |
| 7153 PUGI__FN void xml_document::_destroy() | |
| 7154 { | |
| 7155 assert(_root); | |
| 7156 | |
| 7157 // destroy static storage | |
| 7158 if (_buffer) | |
| 7159 { | |
| 7160 impl::xml_memory::deallocate(_buffer); | |
| 7161 _buffer = 0; | |
| 7162 } | |
| 7163 | |
| 7164 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) | |
| 7165 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) | |
| 7166 { | |
| 7167 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); | |
| 7168 } | |
| 7169 | |
| 7170 // destroy dynamic storage, leave sentinel page (it's in static memory) | |
| 7171 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); | |
| 7172 assert(root_page && !root_page->prev); | |
| 7173 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); | |
| 7174 | |
| 7175 for (impl::xml_memory_page* page = root_page->next; page; ) | |
| 7176 { | |
| 7177 impl::xml_memory_page* next = page->next; | |
| 7178 | |
| 7179 impl::xml_allocator::deallocate_page(page); | |
| 7180 | |
| 7181 page = next; | |
| 7182 } | |
| 7183 | |
| 7184 #ifdef PUGIXML_COMPACT | |
| 7185 // destroy hash table | |
| 7186 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); | |
| 7187 #endif | |
| 7188 | |
| 7189 _root = 0; | |
| 7190 } | |
| 7191 | |
| 7192 #ifdef PUGIXML_HAS_MOVE | |
| 7193 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
| 7194 { | |
| 7195 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); | |
| 7196 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); | |
| 7197 | |
| 7198 // save first child pointer for later; this needs hash access | |
| 7199 xml_node_struct* other_first_child = other->first_child; | |
| 7200 | |
| 7201 #ifdef PUGIXML_COMPACT | |
| 7202 // reserve space for the hash table up front; this is the only operation that can fail | |
| 7203 // if it does, we have no choice but to throw (if we have exceptions) | |
| 7204 if (other_first_child) | |
| 7205 { | |
| 7206 size_t other_children = 0; | |
| 7207 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
| 7208 other_children++; | |
| 7209 | |
| 7210 // in compact mode, each pointer assignment could result in a hash table request | |
| 7211 // during move, we have to relocate document first_child and parents of all children | |
| 7212 // normally there's just one child and its parent has a pointerless encoding but | |
| 7213 // we assume the worst here | |
| 7214 if (!other->_hash->reserve(other_children + 1)) | |
| 7215 { | |
| 7216 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 7217 return; | |
| 7218 #else | |
| 7219 throw std::bad_alloc(); | |
| 7220 #endif | |
| 7221 } | |
| 7222 } | |
| 7223 #endif | |
| 7224 | |
| 7225 // move allocation state | |
| 7226 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state | |
| 7227 if (other->_root != PUGI__GETPAGE(other)) | |
| 7228 { | |
| 7229 doc->_root = other->_root; | |
| 7230 doc->_busy_size = other->_busy_size; | |
| 7231 } | |
| 7232 | |
| 7233 // move buffer state | |
| 7234 doc->buffer = other->buffer; | |
| 7235 doc->extra_buffers = other->extra_buffers; | |
| 7236 _buffer = rhs._buffer; | |
| 7237 | |
| 7238 #ifdef PUGIXML_COMPACT | |
| 7239 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child | |
| 7240 doc->hash = other->hash; | |
| 7241 doc->_hash = &doc->hash; | |
| 7242 | |
| 7243 // make sure we don't access other hash up until the end when we reinitialize other document | |
| 7244 other->_hash = 0; | |
| 7245 #endif | |
| 7246 | |
| 7247 // move page structure | |
| 7248 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); | |
| 7249 assert(doc_page && !doc_page->prev && !doc_page->next); | |
| 7250 | |
| 7251 impl::xml_memory_page* other_page = PUGI__GETPAGE(other); | |
| 7252 assert(other_page && !other_page->prev); | |
| 7253 | |
| 7254 // relink pages since root page is embedded into xml_document | |
| 7255 if (impl::xml_memory_page* page = other_page->next) | |
| 7256 { | |
| 7257 assert(page->prev == other_page); | |
| 7258 | |
| 7259 page->prev = doc_page; | |
| 7260 | |
| 7261 doc_page->next = page; | |
| 7262 other_page->next = 0; | |
| 7263 } | |
| 7264 | |
| 7265 // make sure pages point to the correct document state | |
| 7266 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) | |
| 7267 { | |
| 7268 assert(page->allocator == other); | |
| 7269 | |
| 7270 page->allocator = doc; | |
| 7271 | |
| 7272 #ifdef PUGIXML_COMPACT | |
| 7273 // this automatically migrates most children between documents and prevents ->parent assignment from allocating | |
| 7274 if (page->compact_shared_parent == other) | |
| 7275 page->compact_shared_parent = doc; | |
| 7276 #endif | |
| 7277 } | |
| 7278 | |
| 7279 // move tree structure | |
| 7280 assert(!doc->first_child); | |
| 7281 | |
| 7282 doc->first_child = other_first_child; | |
| 7283 | |
| 7284 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
| 7285 { | |
| 7286 #ifdef PUGIXML_COMPACT | |
| 7287 // most children will have migrated when we reassigned compact_shared_parent | |
| 7288 assert(node->parent == other || node->parent == doc); | |
| 7289 | |
| 7290 node->parent = doc; | |
| 7291 #else | |
| 7292 assert(node->parent == other); | |
| 7293 node->parent = doc; | |
| 7294 #endif | |
| 7295 } | |
| 7296 | |
| 7297 // reset other document | |
| 7298 new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); | |
| 7299 rhs._buffer = 0; | |
| 7300 } | |
| 7301 #endif | |
| 7302 | |
| 7303 #ifndef PUGIXML_NO_STL | |
| 7304 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) | |
| 7305 { | |
| 7306 reset(); | |
| 7307 | |
| 7308 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); | |
| 7309 } | |
| 7310 | |
| 7311 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) | |
| 7312 { | |
| 7313 reset(); | |
| 7314 | |
| 7315 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); | |
| 7316 } | |
| 7317 #endif | |
| 7318 | |
| 7319 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) | |
| 7320 { | |
| 7321 // Force native encoding (skip autodetection) | |
| 7322 #ifdef PUGIXML_WCHAR_MODE | |
| 7323 xml_encoding encoding = encoding_wchar; | |
| 7324 #else | |
| 7325 xml_encoding encoding = encoding_utf8; | |
| 7326 #endif | |
| 7327 | |
| 7328 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); | |
| 7329 } | |
| 7330 | |
| 7331 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) | |
| 7332 { | |
| 7333 return load_string(contents, options); | |
| 7334 } | |
| 7335 | |
| 7336 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) | |
| 7337 { | |
| 7338 reset(); | |
| 7339 | |
| 7340 using impl::auto_deleter; // MSVC7 workaround | |
| 7341 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file); | |
| 7342 | |
| 7343 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
| 7344 } | |
| 7345 | |
| 7346 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) | |
| 7347 { | |
| 7348 reset(); | |
| 7349 | |
| 7350 using impl::auto_deleter; // MSVC7 workaround | |
| 7351 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); | |
| 7352 | |
| 7353 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
| 7354 } | |
| 7355 | |
| 7356 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
| 7357 { | |
| 7358 reset(); | |
| 7359 | |
| 7360 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); | |
| 7361 } | |
| 7362 | |
| 7363 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
| 7364 { | |
| 7365 reset(); | |
| 7366 | |
| 7367 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); | |
| 7368 } | |
| 7369 | |
| 7370 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
| 7371 { | |
| 7372 reset(); | |
| 7373 | |
| 7374 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); | |
| 7375 } | |
| 7376 | |
| 7377 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
| 7378 { | |
| 7379 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
| 7380 | |
| 7381 if ((flags & format_write_bom) && encoding != encoding_latin1) | |
| 7382 { | |
| 7383 // BOM always represents the codepoint U+FEFF, so just write it in native encoding | |
| 7384 #ifdef PUGIXML_WCHAR_MODE | |
| 7385 unsigned int bom = 0xfeff; | |
| 7386 buffered_writer.write(static_cast<wchar_t>(bom)); | |
| 7387 #else | |
| 7388 buffered_writer.write('\xef', '\xbb', '\xbf'); | |
| 7389 #endif | |
| 7390 } | |
| 7391 | |
| 7392 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) | |
| 7393 { | |
| 7394 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); | |
| 7395 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); | |
| 7396 buffered_writer.write('?', '>'); | |
| 7397 if (!(flags & format_raw)) buffered_writer.write('\n'); | |
| 7398 } | |
| 7399 | |
| 7400 impl::node_output(buffered_writer, _root, indent, flags, 0); | |
| 7401 | |
| 7402 buffered_writer.flush(); | |
| 7403 } | |
| 7404 | |
| 7405 #ifndef PUGIXML_NO_STL | |
| 7406 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
| 7407 { | |
| 7408 xml_writer_stream writer(stream); | |
| 7409 | |
| 7410 save(writer, indent, flags, encoding); | |
| 7411 } | |
| 7412 | |
| 7413 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const | |
| 7414 { | |
| 7415 xml_writer_stream writer(stream); | |
| 7416 | |
| 7417 save(writer, indent, flags, encoding_wchar); | |
| 7418 } | |
| 7419 #endif | |
| 7420 | |
| 7421 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
| 7422 { | |
| 7423 using impl::auto_deleter; // MSVC7 workaround | |
| 7424 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); | |
| 7425 | |
| 7426 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
| 7427 } | |
| 7428 | |
| 7429 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
| 7430 { | |
| 7431 using impl::auto_deleter; // MSVC7 workaround | |
| 7432 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); | |
| 7433 | |
| 7434 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
| 7435 } | |
| 7436 | |
| 7437 PUGI__FN xml_node xml_document::document_element() const | |
| 7438 { | |
| 7439 assert(_root); | |
| 7440 | |
| 7441 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
| 7442 if (PUGI__NODETYPE(i) == node_element) | |
| 7443 return xml_node(i); | |
| 7444 | |
| 7445 return xml_node(); | |
| 7446 } | |
| 7447 | |
| 7448 #ifndef PUGIXML_NO_STL | |
| 7449 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) | |
| 7450 { | |
| 7451 assert(str); | |
| 7452 | |
| 7453 return impl::as_utf8_impl(str, impl::strlength_wide(str)); | |
| 7454 } | |
| 7455 | |
| 7456 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) | |
| 7457 { | |
| 7458 return impl::as_utf8_impl(str.c_str(), str.size()); | |
| 7459 } | |
| 7460 | |
| 7461 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) | |
| 7462 { | |
| 7463 assert(str); | |
| 7464 | |
| 7465 return impl::as_wide_impl(str, strlen(str)); | |
| 7466 } | |
| 7467 | |
| 7468 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) | |
| 7469 { | |
| 7470 return impl::as_wide_impl(str.c_str(), str.size()); | |
| 7471 } | |
| 7472 #endif | |
| 7473 | |
| 7474 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) | |
| 7475 { | |
| 7476 impl::xml_memory::allocate = allocate; | |
| 7477 impl::xml_memory::deallocate = deallocate; | |
| 7478 } | |
| 7479 | |
| 7480 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() | |
| 7481 { | |
| 7482 return impl::xml_memory::allocate; | |
| 7483 } | |
| 7484 | |
| 7485 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() | |
| 7486 { | |
| 7487 return impl::xml_memory::deallocate; | |
| 7488 } | |
| 7489 } | |
| 7490 | |
| 7491 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) | |
| 7492 namespace std | |
| 7493 { | |
| 7494 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) | |
| 7495 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) | |
| 7496 { | |
| 7497 return std::bidirectional_iterator_tag(); | |
| 7498 } | |
| 7499 | |
| 7500 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) | |
| 7501 { | |
| 7502 return std::bidirectional_iterator_tag(); | |
| 7503 } | |
| 7504 | |
| 7505 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) | |
| 7506 { | |
| 7507 return std::bidirectional_iterator_tag(); | |
| 7508 } | |
| 7509 } | |
| 7510 #endif | |
| 7511 | |
| 7512 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) | |
| 7513 namespace std | |
| 7514 { | |
| 7515 // Workarounds for (non-standard) iterator category detection | |
| 7516 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) | |
| 7517 { | |
| 7518 return std::bidirectional_iterator_tag(); | |
| 7519 } | |
| 7520 | |
| 7521 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) | |
| 7522 { | |
| 7523 return std::bidirectional_iterator_tag(); | |
| 7524 } | |
| 7525 | |
| 7526 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) | |
| 7527 { | |
| 7528 return std::bidirectional_iterator_tag(); | |
| 7529 } | |
| 7530 } | |
| 7531 #endif | |
| 7532 | |
| 7533 #ifndef PUGIXML_NO_XPATH | |
| 7534 // STL replacements | |
| 7535 PUGI__NS_BEGIN | |
| 7536 struct equal_to | |
| 7537 { | |
| 7538 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
| 7539 { | |
| 7540 return lhs == rhs; | |
| 7541 } | |
| 7542 }; | |
| 7543 | |
| 7544 struct not_equal_to | |
| 7545 { | |
| 7546 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
| 7547 { | |
| 7548 return lhs != rhs; | |
| 7549 } | |
| 7550 }; | |
| 7551 | |
| 7552 struct less | |
| 7553 { | |
| 7554 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
| 7555 { | |
| 7556 return lhs < rhs; | |
| 7557 } | |
| 7558 }; | |
| 7559 | |
| 7560 struct less_equal | |
| 7561 { | |
| 7562 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
| 7563 { | |
| 7564 return lhs <= rhs; | |
| 7565 } | |
| 7566 }; | |
| 7567 | |
| 7568 template <typename T> inline void swap(T& lhs, T& rhs) | |
| 7569 { | |
| 7570 T temp = lhs; | |
| 7571 lhs = rhs; | |
| 7572 rhs = temp; | |
| 7573 } | |
| 7574 | |
| 7575 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred) | |
| 7576 { | |
| 7577 I result = begin; | |
| 7578 | |
| 7579 for (I it = begin + 1; it != end; ++it) | |
| 7580 if (pred(*it, *result)) | |
| 7581 result = it; | |
| 7582 | |
| 7583 return result; | |
| 7584 } | |
| 7585 | |
| 7586 template <typename I> PUGI__FN void reverse(I begin, I end) | |
| 7587 { | |
| 7588 while (end - begin > 1) | |
| 7589 swap(*begin++, *--end); | |
| 7590 } | |
| 7591 | |
| 7592 template <typename I> PUGI__FN I unique(I begin, I end) | |
| 7593 { | |
| 7594 // fast skip head | |
| 7595 while (end - begin > 1 && *begin != *(begin + 1)) | |
| 7596 begin++; | |
| 7597 | |
| 7598 if (begin == end) | |
| 7599 return begin; | |
| 7600 | |
| 7601 // last written element | |
| 7602 I write = begin++; | |
| 7603 | |
| 7604 // merge unique elements | |
| 7605 while (begin != end) | |
| 7606 { | |
| 7607 if (*begin != *write) | |
| 7608 *++write = *begin++; | |
| 7609 else | |
| 7610 begin++; | |
| 7611 } | |
| 7612 | |
| 7613 // past-the-end (write points to live element) | |
| 7614 return write + 1; | |
| 7615 } | |
| 7616 | |
| 7617 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred) | |
| 7618 { | |
| 7619 if (begin == end) | |
| 7620 return; | |
| 7621 | |
| 7622 for (T* it = begin + 1; it != end; ++it) | |
| 7623 { | |
| 7624 T val = *it; | |
| 7625 T* hole = it; | |
| 7626 | |
| 7627 // move hole backwards | |
| 7628 while (hole > begin && pred(val, *(hole - 1))) | |
| 7629 { | |
| 7630 *hole = *(hole - 1); | |
| 7631 hole--; | |
| 7632 } | |
| 7633 | |
| 7634 // fill hole with element | |
| 7635 *hole = val; | |
| 7636 } | |
| 7637 } | |
| 7638 | |
| 7639 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred) | |
| 7640 { | |
| 7641 if (pred(*middle, *first)) | |
| 7642 swap(middle, first); | |
| 7643 if (pred(*last, *middle)) | |
| 7644 swap(last, middle); | |
| 7645 if (pred(*middle, *first)) | |
| 7646 swap(middle, first); | |
| 7647 | |
| 7648 return middle; | |
| 7649 } | |
| 7650 | |
| 7651 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) | |
| 7652 { | |
| 7653 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) | |
| 7654 T* eq = begin; | |
| 7655 T* lt = begin; | |
| 7656 T* gt = end; | |
| 7657 | |
| 7658 while (lt < gt) | |
| 7659 { | |
| 7660 if (pred(*lt, pivot)) | |
| 7661 lt++; | |
| 7662 else if (*lt == pivot) | |
| 7663 swap(*eq++, *lt++); | |
| 7664 else | |
| 7665 swap(*lt, *--gt); | |
| 7666 } | |
| 7667 | |
| 7668 // we now have just 4 groups: = < >; move equal elements to the middle | |
| 7669 T* eqbeg = gt; | |
| 7670 | |
| 7671 for (T* it = begin; it != eq; ++it) | |
| 7672 swap(*it, *--eqbeg); | |
| 7673 | |
| 7674 *out_eqbeg = eqbeg; | |
| 7675 *out_eqend = gt; | |
| 7676 } | |
| 7677 | |
| 7678 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred) | |
| 7679 { | |
| 7680 // sort large chunks | |
| 7681 while (end - begin > 16) | |
| 7682 { | |
| 7683 // find median element | |
| 7684 I middle = begin + (end - begin) / 2; | |
| 7685 I median = median3(begin, middle, end - 1, pred); | |
| 7686 | |
| 7687 // partition in three chunks (< = >) | |
| 7688 I eqbeg, eqend; | |
| 7689 partition3(begin, end, *median, pred, &eqbeg, &eqend); | |
| 7690 | |
| 7691 // loop on larger half | |
| 7692 if (eqbeg - begin > end - eqend) | |
| 7693 { | |
| 7694 sort(eqend, end, pred); | |
| 7695 end = eqbeg; | |
| 7696 } | |
| 7697 else | |
| 7698 { | |
| 7699 sort(begin, eqbeg, pred); | |
| 7700 begin = eqend; | |
| 7701 } | |
| 7702 } | |
| 7703 | |
| 7704 // insertion sort small chunk | |
| 7705 insertion_sort(begin, end, pred); | |
| 7706 } | |
| 7707 | |
| 7708 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key) | |
| 7709 { | |
| 7710 assert(key); | |
| 7711 | |
| 7712 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); | |
| 7713 | |
| 7714 // MurmurHash3 32-bit finalizer | |
| 7715 h ^= h >> 16; | |
| 7716 h *= 0x85ebca6bu; | |
| 7717 h ^= h >> 13; | |
| 7718 h *= 0xc2b2ae35u; | |
| 7719 h ^= h >> 16; | |
| 7720 | |
| 7721 size_t hashmod = size - 1; | |
| 7722 size_t bucket = h & hashmod; | |
| 7723 | |
| 7724 for (size_t probe = 0; probe <= hashmod; ++probe) | |
| 7725 { | |
| 7726 if (table[bucket] == 0) | |
| 7727 { | |
| 7728 table[bucket] = key; | |
| 7729 return true; | |
| 7730 } | |
| 7731 | |
| 7732 if (table[bucket] == key) | |
| 7733 return false; | |
| 7734 | |
| 7735 // hash collision, quadratic probing | |
| 7736 bucket = (bucket + probe + 1) & hashmod; | |
| 7737 } | |
| 7738 | |
| 7739 assert(false && "Hash table is full"); // unreachable | |
| 7740 return false; | |
| 7741 } | |
| 7742 PUGI__NS_END | |
| 7743 | |
| 7744 // Allocator used for AST and evaluation stacks | |
| 7745 PUGI__NS_BEGIN | |
| 7746 static const size_t xpath_memory_page_size = | |
| 7747 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
| 7748 PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
| 7749 #else | |
| 7750 4096 | |
| 7751 #endif | |
| 7752 ; | |
| 7753 | |
| 7754 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); | |
| 7755 | |
| 7756 struct xpath_memory_block | |
| 7757 { | |
| 7758 xpath_memory_block* next; | |
| 7759 size_t capacity; | |
| 7760 | |
| 7761 union | |
| 7762 { | |
| 7763 char data[xpath_memory_page_size]; | |
| 7764 double alignment; | |
| 7765 }; | |
| 7766 }; | |
| 7767 | |
| 7768 struct xpath_allocator | |
| 7769 { | |
| 7770 xpath_memory_block* _root; | |
| 7771 size_t _root_size; | |
| 7772 bool* _error; | |
| 7773 | |
| 7774 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) | |
| 7775 { | |
| 7776 } | |
| 7777 | |
| 7778 void* allocate(size_t size) | |
| 7779 { | |
| 7780 // round size up to block alignment boundary | |
| 7781 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
| 7782 | |
| 7783 if (_root_size + size <= _root->capacity) | |
| 7784 { | |
| 7785 void* buf = &_root->data[0] + _root_size; | |
| 7786 _root_size += size; | |
| 7787 return buf; | |
| 7788 } | |
| 7789 else | |
| 7790 { | |
| 7791 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests | |
| 7792 size_t block_capacity_base = sizeof(_root->data); | |
| 7793 size_t block_capacity_req = size + block_capacity_base / 4; | |
| 7794 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; | |
| 7795 | |
| 7796 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); | |
| 7797 | |
| 7798 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); | |
| 7799 if (!block) | |
| 7800 { | |
| 7801 if (_error) *_error = true; | |
| 7802 return 0; | |
| 7803 } | |
| 7804 | |
| 7805 block->next = _root; | |
| 7806 block->capacity = block_capacity; | |
| 7807 | |
| 7808 _root = block; | |
| 7809 _root_size = size; | |
| 7810 | |
| 7811 return block->data; | |
| 7812 } | |
| 7813 } | |
| 7814 | |
| 7815 void* reallocate(void* ptr, size_t old_size, size_t new_size) | |
| 7816 { | |
| 7817 // round size up to block alignment boundary | |
| 7818 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
| 7819 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
| 7820 | |
| 7821 // we can only reallocate the last object | |
| 7822 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); | |
| 7823 | |
| 7824 // try to reallocate the object inplace | |
| 7825 if (ptr && _root_size - old_size + new_size <= _root->capacity) | |
| 7826 { | |
| 7827 _root_size = _root_size - old_size + new_size; | |
| 7828 return ptr; | |
| 7829 } | |
| 7830 | |
| 7831 // allocate a new block | |
| 7832 void* result = allocate(new_size); | |
| 7833 if (!result) return 0; | |
| 7834 | |
| 7835 // we have a new block | |
| 7836 if (ptr) | |
| 7837 { | |
| 7838 // copy old data (we only support growing) | |
| 7839 assert(new_size >= old_size); | |
| 7840 memcpy(result, ptr, old_size); | |
| 7841 | |
| 7842 // free the previous page if it had no other objects | |
| 7843 assert(_root->data == result); | |
| 7844 assert(_root->next); | |
| 7845 | |
| 7846 if (_root->next->data == ptr) | |
| 7847 { | |
| 7848 // deallocate the whole page, unless it was the first one | |
| 7849 xpath_memory_block* next = _root->next->next; | |
| 7850 | |
| 7851 if (next) | |
| 7852 { | |
| 7853 xml_memory::deallocate(_root->next); | |
| 7854 _root->next = next; | |
| 7855 } | |
| 7856 } | |
| 7857 } | |
| 7858 | |
| 7859 return result; | |
| 7860 } | |
| 7861 | |
| 7862 void revert(const xpath_allocator& state) | |
| 7863 { | |
| 7864 // free all new pages | |
| 7865 xpath_memory_block* cur = _root; | |
| 7866 | |
| 7867 while (cur != state._root) | |
| 7868 { | |
| 7869 xpath_memory_block* next = cur->next; | |
| 7870 | |
| 7871 xml_memory::deallocate(cur); | |
| 7872 | |
| 7873 cur = next; | |
| 7874 } | |
| 7875 | |
| 7876 // restore state | |
| 7877 _root = state._root; | |
| 7878 _root_size = state._root_size; | |
| 7879 } | |
| 7880 | |
| 7881 void release() | |
| 7882 { | |
| 7883 xpath_memory_block* cur = _root; | |
| 7884 assert(cur); | |
| 7885 | |
| 7886 while (cur->next) | |
| 7887 { | |
| 7888 xpath_memory_block* next = cur->next; | |
| 7889 | |
| 7890 xml_memory::deallocate(cur); | |
| 7891 | |
| 7892 cur = next; | |
| 7893 } | |
| 7894 } | |
| 7895 }; | |
| 7896 | |
| 7897 struct xpath_allocator_capture | |
| 7898 { | |
| 7899 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) | |
| 7900 { | |
| 7901 } | |
| 7902 | |
| 7903 ~xpath_allocator_capture() | |
| 7904 { | |
| 7905 _target->revert(_state); | |
| 7906 } | |
| 7907 | |
| 7908 xpath_allocator* _target; | |
| 7909 xpath_allocator _state; | |
| 7910 }; | |
| 7911 | |
| 7912 struct xpath_stack | |
| 7913 { | |
| 7914 xpath_allocator* result; | |
| 7915 xpath_allocator* temp; | |
| 7916 }; | |
| 7917 | |
| 7918 struct xpath_stack_data | |
| 7919 { | |
| 7920 xpath_memory_block blocks[2]; | |
| 7921 xpath_allocator result; | |
| 7922 xpath_allocator temp; | |
| 7923 xpath_stack stack; | |
| 7924 bool oom; | |
| 7925 | |
| 7926 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) | |
| 7927 { | |
| 7928 blocks[0].next = blocks[1].next = 0; | |
| 7929 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); | |
| 7930 | |
| 7931 stack.result = &result; | |
| 7932 stack.temp = &temp; | |
| 7933 } | |
| 7934 | |
| 7935 ~xpath_stack_data() | |
| 7936 { | |
| 7937 result.release(); | |
| 7938 temp.release(); | |
| 7939 } | |
| 7940 }; | |
| 7941 PUGI__NS_END | |
| 7942 | |
| 7943 // String class | |
| 7944 PUGI__NS_BEGIN | |
| 7945 class xpath_string | |
| 7946 { | |
| 7947 const char_t* _buffer; | |
| 7948 bool _uses_heap; | |
| 7949 size_t _length_heap; | |
| 7950 | |
| 7951 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) | |
| 7952 { | |
| 7953 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); | |
| 7954 if (!result) return 0; | |
| 7955 | |
| 7956 memcpy(result, string, length * sizeof(char_t)); | |
| 7957 result[length] = 0; | |
| 7958 | |
| 7959 return result; | |
| 7960 } | |
| 7961 | |
| 7962 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) | |
| 7963 { | |
| 7964 } | |
| 7965 | |
| 7966 public: | |
| 7967 static xpath_string from_const(const char_t* str) | |
| 7968 { | |
| 7969 return xpath_string(str, false, 0); | |
| 7970 } | |
| 7971 | |
| 7972 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) | |
| 7973 { | |
| 7974 assert(begin <= end && *end == 0); | |
| 7975 | |
| 7976 return xpath_string(begin, true, static_cast<size_t>(end - begin)); | |
| 7977 } | |
| 7978 | |
| 7979 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) | |
| 7980 { | |
| 7981 assert(begin <= end); | |
| 7982 | |
| 7983 if (begin == end) | |
| 7984 return xpath_string(); | |
| 7985 | |
| 7986 size_t length = static_cast<size_t>(end - begin); | |
| 7987 const char_t* data = duplicate_string(begin, length, alloc); | |
| 7988 | |
| 7989 return data ? xpath_string(data, true, length) : xpath_string(); | |
| 7990 } | |
| 7991 | |
| 7992 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) | |
| 7993 { | |
| 7994 } | |
| 7995 | |
| 7996 void append(const xpath_string& o, xpath_allocator* alloc) | |
| 7997 { | |
| 7998 // skip empty sources | |
| 7999 if (!*o._buffer) return; | |
| 8000 | |
| 8001 // fast append for constant empty target and constant source | |
| 8002 if (!*_buffer && !_uses_heap && !o._uses_heap) | |
| 8003 { | |
| 8004 _buffer = o._buffer; | |
| 8005 } | |
| 8006 else | |
| 8007 { | |
| 8008 // need to make heap copy | |
| 8009 size_t target_length = length(); | |
| 8010 size_t source_length = o.length(); | |
| 8011 size_t result_length = target_length + source_length; | |
| 8012 | |
| 8013 // allocate new buffer | |
| 8014 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); | |
| 8015 if (!result) return; | |
| 8016 | |
| 8017 // append first string to the new buffer in case there was no reallocation | |
| 8018 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); | |
| 8019 | |
| 8020 // append second string to the new buffer | |
| 8021 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); | |
| 8022 result[result_length] = 0; | |
| 8023 | |
| 8024 // finalize | |
| 8025 _buffer = result; | |
| 8026 _uses_heap = true; | |
| 8027 _length_heap = result_length; | |
| 8028 } | |
| 8029 } | |
| 8030 | |
| 8031 const char_t* c_str() const | |
| 8032 { | |
| 8033 return _buffer; | |
| 8034 } | |
| 8035 | |
| 8036 size_t length() const | |
| 8037 { | |
| 8038 return _uses_heap ? _length_heap : strlength(_buffer); | |
| 8039 } | |
| 8040 | |
| 8041 char_t* data(xpath_allocator* alloc) | |
| 8042 { | |
| 8043 // make private heap copy | |
| 8044 if (!_uses_heap) | |
| 8045 { | |
| 8046 size_t length_ = strlength(_buffer); | |
| 8047 const char_t* data_ = duplicate_string(_buffer, length_, alloc); | |
| 8048 | |
| 8049 if (!data_) return 0; | |
| 8050 | |
| 8051 _buffer = data_; | |
| 8052 _uses_heap = true; | |
| 8053 _length_heap = length_; | |
| 8054 } | |
| 8055 | |
| 8056 return const_cast<char_t*>(_buffer); | |
| 8057 } | |
| 8058 | |
| 8059 bool empty() const | |
| 8060 { | |
| 8061 return *_buffer == 0; | |
| 8062 } | |
| 8063 | |
| 8064 bool operator==(const xpath_string& o) const | |
| 8065 { | |
| 8066 return strequal(_buffer, o._buffer); | |
| 8067 } | |
| 8068 | |
| 8069 bool operator!=(const xpath_string& o) const | |
| 8070 { | |
| 8071 return !strequal(_buffer, o._buffer); | |
| 8072 } | |
| 8073 | |
| 8074 bool uses_heap() const | |
| 8075 { | |
| 8076 return _uses_heap; | |
| 8077 } | |
| 8078 }; | |
| 8079 PUGI__NS_END | |
| 8080 | |
| 8081 PUGI__NS_BEGIN | |
| 8082 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) | |
| 8083 { | |
| 8084 while (*pattern && *string == *pattern) | |
| 8085 { | |
| 8086 string++; | |
| 8087 pattern++; | |
| 8088 } | |
| 8089 | |
| 8090 return *pattern == 0; | |
| 8091 } | |
| 8092 | |
| 8093 PUGI__FN const char_t* find_char(const char_t* s, char_t c) | |
| 8094 { | |
| 8095 #ifdef PUGIXML_WCHAR_MODE | |
| 8096 return wcschr(s, c); | |
| 8097 #else | |
| 8098 return strchr(s, c); | |
| 8099 #endif | |
| 8100 } | |
| 8101 | |
| 8102 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) | |
| 8103 { | |
| 8104 #ifdef PUGIXML_WCHAR_MODE | |
| 8105 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) | |
| 8106 return (*p == 0) ? s : wcsstr(s, p); | |
| 8107 #else | |
| 8108 return strstr(s, p); | |
| 8109 #endif | |
| 8110 } | |
| 8111 | |
| 8112 // Converts symbol to lower case, if it is an ASCII one | |
| 8113 PUGI__FN char_t tolower_ascii(char_t ch) | |
| 8114 { | |
| 8115 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; | |
| 8116 } | |
| 8117 | |
| 8118 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) | |
| 8119 { | |
| 8120 if (na.attribute()) | |
| 8121 return xpath_string::from_const(na.attribute().value()); | |
| 8122 else | |
| 8123 { | |
| 8124 xml_node n = na.node(); | |
| 8125 | |
| 8126 switch (n.type()) | |
| 8127 { | |
| 8128 case node_pcdata: | |
| 8129 case node_cdata: | |
| 8130 case node_comment: | |
| 8131 case node_pi: | |
| 8132 return xpath_string::from_const(n.value()); | |
| 8133 | |
| 8134 case node_document: | |
| 8135 case node_element: | |
| 8136 { | |
| 8137 xpath_string result; | |
| 8138 | |
| 8139 // element nodes can have value if parse_embed_pcdata was used | |
| 8140 if (n.value()[0]) | |
| 8141 result.append(xpath_string::from_const(n.value()), alloc); | |
| 8142 | |
| 8143 xml_node cur = n.first_child(); | |
| 8144 | |
| 8145 while (cur && cur != n) | |
| 8146 { | |
| 8147 if (cur.type() == node_pcdata || cur.type() == node_cdata) | |
| 8148 result.append(xpath_string::from_const(cur.value()), alloc); | |
| 8149 | |
| 8150 if (cur.first_child()) | |
| 8151 cur = cur.first_child(); | |
| 8152 else if (cur.next_sibling()) | |
| 8153 cur = cur.next_sibling(); | |
| 8154 else | |
| 8155 { | |
| 8156 while (!cur.next_sibling() && cur != n) | |
| 8157 cur = cur.parent(); | |
| 8158 | |
| 8159 if (cur != n) cur = cur.next_sibling(); | |
| 8160 } | |
| 8161 } | |
| 8162 | |
| 8163 return result; | |
| 8164 } | |
| 8165 | |
| 8166 default: | |
| 8167 return xpath_string(); | |
| 8168 } | |
| 8169 } | |
| 8170 } | |
| 8171 | |
| 8172 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) | |
| 8173 { | |
| 8174 assert(ln->parent == rn->parent); | |
| 8175 | |
| 8176 // there is no common ancestor (the shared parent is null), nodes are from different documents | |
| 8177 if (!ln->parent) return ln < rn; | |
| 8178 | |
| 8179 // determine sibling order | |
| 8180 xml_node_struct* ls = ln; | |
| 8181 xml_node_struct* rs = rn; | |
| 8182 | |
| 8183 while (ls && rs) | |
| 8184 { | |
| 8185 if (ls == rn) return true; | |
| 8186 if (rs == ln) return false; | |
| 8187 | |
| 8188 ls = ls->next_sibling; | |
| 8189 rs = rs->next_sibling; | |
| 8190 } | |
| 8191 | |
| 8192 // if rn sibling chain ended ln must be before rn | |
| 8193 return !rs; | |
| 8194 } | |
| 8195 | |
| 8196 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) | |
| 8197 { | |
| 8198 // find common ancestor at the same depth, if any | |
| 8199 xml_node_struct* lp = ln; | |
| 8200 xml_node_struct* rp = rn; | |
| 8201 | |
| 8202 while (lp && rp && lp->parent != rp->parent) | |
| 8203 { | |
| 8204 lp = lp->parent; | |
| 8205 rp = rp->parent; | |
| 8206 } | |
| 8207 | |
| 8208 // parents are the same! | |
| 8209 if (lp && rp) return node_is_before_sibling(lp, rp); | |
| 8210 | |
| 8211 // nodes are at different depths, need to normalize heights | |
| 8212 bool left_higher = !lp; | |
| 8213 | |
| 8214 while (lp) | |
| 8215 { | |
| 8216 lp = lp->parent; | |
| 8217 ln = ln->parent; | |
| 8218 } | |
| 8219 | |
| 8220 while (rp) | |
| 8221 { | |
| 8222 rp = rp->parent; | |
| 8223 rn = rn->parent; | |
| 8224 } | |
| 8225 | |
| 8226 // one node is the ancestor of the other | |
| 8227 if (ln == rn) return left_higher; | |
| 8228 | |
| 8229 // find common ancestor... again | |
| 8230 while (ln->parent != rn->parent) | |
| 8231 { | |
| 8232 ln = ln->parent; | |
| 8233 rn = rn->parent; | |
| 8234 } | |
| 8235 | |
| 8236 return node_is_before_sibling(ln, rn); | |
| 8237 } | |
| 8238 | |
| 8239 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) | |
| 8240 { | |
| 8241 while (node && node != parent) node = node->parent; | |
| 8242 | |
| 8243 return parent && node == parent; | |
| 8244 } | |
| 8245 | |
| 8246 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) | |
| 8247 { | |
| 8248 xml_node_struct* node = xnode.node().internal_object(); | |
| 8249 | |
| 8250 if (node) | |
| 8251 { | |
| 8252 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) | |
| 8253 { | |
| 8254 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; | |
| 8255 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; | |
| 8256 } | |
| 8257 | |
| 8258 return 0; | |
| 8259 } | |
| 8260 | |
| 8261 xml_attribute_struct* attr = xnode.attribute().internal_object(); | |
| 8262 | |
| 8263 if (attr) | |
| 8264 { | |
| 8265 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) | |
| 8266 { | |
| 8267 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; | |
| 8268 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; | |
| 8269 } | |
| 8270 | |
| 8271 return 0; | |
| 8272 } | |
| 8273 | |
| 8274 return 0; | |
| 8275 } | |
| 8276 | |
| 8277 struct document_order_comparator | |
| 8278 { | |
| 8279 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const | |
| 8280 { | |
| 8281 // optimized document order based check | |
| 8282 const void* lo = document_buffer_order(lhs); | |
| 8283 const void* ro = document_buffer_order(rhs); | |
| 8284 | |
| 8285 if (lo && ro) return lo < ro; | |
| 8286 | |
| 8287 // slow comparison | |
| 8288 xml_node ln = lhs.node(), rn = rhs.node(); | |
| 8289 | |
| 8290 // compare attributes | |
| 8291 if (lhs.attribute() && rhs.attribute()) | |
| 8292 { | |
| 8293 // shared parent | |
| 8294 if (lhs.parent() == rhs.parent()) | |
| 8295 { | |
| 8296 // determine sibling order | |
| 8297 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) | |
| 8298 if (a == rhs.attribute()) | |
| 8299 return true; | |
| 8300 | |
| 8301 return false; | |
| 8302 } | |
| 8303 | |
| 8304 // compare attribute parents | |
| 8305 ln = lhs.parent(); | |
| 8306 rn = rhs.parent(); | |
| 8307 } | |
| 8308 else if (lhs.attribute()) | |
| 8309 { | |
| 8310 // attributes go after the parent element | |
| 8311 if (lhs.parent() == rhs.node()) return false; | |
| 8312 | |
| 8313 ln = lhs.parent(); | |
| 8314 } | |
| 8315 else if (rhs.attribute()) | |
| 8316 { | |
| 8317 // attributes go after the parent element | |
| 8318 if (rhs.parent() == lhs.node()) return true; | |
| 8319 | |
| 8320 rn = rhs.parent(); | |
| 8321 } | |
| 8322 | |
| 8323 if (ln == rn) return false; | |
| 8324 | |
| 8325 if (!ln || !rn) return ln < rn; | |
| 8326 | |
| 8327 return node_is_before(ln.internal_object(), rn.internal_object()); | |
| 8328 } | |
| 8329 }; | |
| 8330 | |
| 8331 PUGI__FN double gen_nan() | |
| 8332 { | |
| 8333 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) | |
| 8334 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); | |
| 8335 typedef uint32_t UI; // BCC5 workaround | |
| 8336 union { float f; UI i; } u; | |
| 8337 u.i = 0x7fc00000; | |
| 8338 return double(u.f); | |
| 8339 #else | |
| 8340 // fallback | |
| 8341 const volatile double zero = 0.0; | |
| 8342 return zero / zero; | |
| 8343 #endif | |
| 8344 } | |
| 8345 | |
| 8346 PUGI__FN bool is_nan(double value) | |
| 8347 { | |
| 8348 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
| 8349 return !!_isnan(value); | |
| 8350 #elif defined(fpclassify) && defined(FP_NAN) | |
| 8351 return fpclassify(value) == FP_NAN; | |
| 8352 #else | |
| 8353 // fallback | |
| 8354 const volatile double v = value; | |
| 8355 return v != v; | |
| 8356 #endif | |
| 8357 } | |
| 8358 | |
| 8359 PUGI__FN const char_t* convert_number_to_string_special(double value) | |
| 8360 { | |
| 8361 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
| 8362 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; | |
| 8363 if (_isnan(value)) return PUGIXML_TEXT("NaN"); | |
| 8364 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
| 8365 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) | |
| 8366 switch (fpclassify(value)) | |
| 8367 { | |
| 8368 case FP_NAN: | |
| 8369 return PUGIXML_TEXT("NaN"); | |
| 8370 | |
| 8371 case FP_INFINITE: | |
| 8372 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
| 8373 | |
| 8374 case FP_ZERO: | |
| 8375 return PUGIXML_TEXT("0"); | |
| 8376 | |
| 8377 default: | |
| 8378 return 0; | |
| 8379 } | |
| 8380 #else | |
| 8381 // fallback | |
| 8382 const volatile double v = value; | |
| 8383 | |
| 8384 if (v == 0) return PUGIXML_TEXT("0"); | |
| 8385 if (v != v) return PUGIXML_TEXT("NaN"); | |
| 8386 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
| 8387 return 0; | |
| 8388 #endif | |
| 8389 } | |
| 8390 | |
| 8391 PUGI__FN bool convert_number_to_boolean(double value) | |
| 8392 { | |
| 8393 return (value != 0 && !is_nan(value)); | |
| 8394 } | |
| 8395 | |
| 8396 PUGI__FN void truncate_zeros(char* begin, char* end) | |
| 8397 { | |
| 8398 while (begin != end && end[-1] == '0') end--; | |
| 8399 | |
| 8400 *end = 0; | |
| 8401 } | |
| 8402 | |
| 8403 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent | |
| 8404 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
| 8405 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
| 8406 { | |
| 8407 // get base values | |
| 8408 int sign, exponent; | |
| 8409 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); | |
| 8410 | |
| 8411 // truncate redundant zeros | |
| 8412 truncate_zeros(buffer, buffer + strlen(buffer)); | |
| 8413 | |
| 8414 // fill results | |
| 8415 *out_mantissa = buffer; | |
| 8416 *out_exponent = exponent; | |
| 8417 } | |
| 8418 #else | |
| 8419 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
| 8420 { | |
| 8421 // get a scientific notation value with IEEE DBL_DIG decimals | |
| 8422 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); | |
| 8423 | |
| 8424 // get the exponent (possibly negative) | |
| 8425 char* exponent_string = strchr(buffer, 'e'); | |
| 8426 assert(exponent_string); | |
| 8427 | |
| 8428 int exponent = atoi(exponent_string + 1); | |
| 8429 | |
| 8430 // extract mantissa string: skip sign | |
| 8431 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; | |
| 8432 assert(mantissa[0] != '0' && mantissa[1] == '.'); | |
| 8433 | |
| 8434 // divide mantissa by 10 to eliminate integer part | |
| 8435 mantissa[1] = mantissa[0]; | |
| 8436 mantissa++; | |
| 8437 exponent++; | |
| 8438 | |
| 8439 // remove extra mantissa digits and zero-terminate mantissa | |
| 8440 truncate_zeros(mantissa, exponent_string); | |
| 8441 | |
| 8442 // fill results | |
| 8443 *out_mantissa = mantissa; | |
| 8444 *out_exponent = exponent; | |
| 8445 } | |
| 8446 #endif | |
| 8447 | |
| 8448 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) | |
| 8449 { | |
| 8450 // try special number conversion | |
| 8451 const char_t* special = convert_number_to_string_special(value); | |
| 8452 if (special) return xpath_string::from_const(special); | |
| 8453 | |
| 8454 // get mantissa + exponent form | |
| 8455 char mantissa_buffer[32]; | |
| 8456 | |
| 8457 char* mantissa; | |
| 8458 int exponent; | |
| 8459 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); | |
| 8460 | |
| 8461 // allocate a buffer of suitable length for the number | |
| 8462 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; | |
| 8463 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); | |
| 8464 if (!result) return xpath_string(); | |
| 8465 | |
| 8466 // make the number! | |
| 8467 char_t* s = result; | |
| 8468 | |
| 8469 // sign | |
| 8470 if (value < 0) *s++ = '-'; | |
| 8471 | |
| 8472 // integer part | |
| 8473 if (exponent <= 0) | |
| 8474 { | |
| 8475 *s++ = '0'; | |
| 8476 } | |
| 8477 else | |
| 8478 { | |
| 8479 while (exponent > 0) | |
| 8480 { | |
| 8481 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); | |
| 8482 *s++ = *mantissa ? *mantissa++ : '0'; | |
| 8483 exponent--; | |
| 8484 } | |
| 8485 } | |
| 8486 | |
| 8487 // fractional part | |
| 8488 if (*mantissa) | |
| 8489 { | |
| 8490 // decimal point | |
| 8491 *s++ = '.'; | |
| 8492 | |
| 8493 // extra zeroes from negative exponent | |
| 8494 while (exponent < 0) | |
| 8495 { | |
| 8496 *s++ = '0'; | |
| 8497 exponent++; | |
| 8498 } | |
| 8499 | |
| 8500 // extra mantissa digits | |
| 8501 while (*mantissa) | |
| 8502 { | |
| 8503 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); | |
| 8504 *s++ = *mantissa++; | |
| 8505 } | |
| 8506 } | |
| 8507 | |
| 8508 // zero-terminate | |
| 8509 assert(s < result + result_size); | |
| 8510 *s = 0; | |
| 8511 | |
| 8512 return xpath_string::from_heap_preallocated(result, s); | |
| 8513 } | |
| 8514 | |
| 8515 PUGI__FN bool check_string_to_number_format(const char_t* string) | |
| 8516 { | |
| 8517 // parse leading whitespace | |
| 8518 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; | |
| 8519 | |
| 8520 // parse sign | |
| 8521 if (*string == '-') ++string; | |
| 8522 | |
| 8523 if (!*string) return false; | |
| 8524 | |
| 8525 // if there is no integer part, there should be a decimal part with at least one digit | |
| 8526 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; | |
| 8527 | |
| 8528 // parse integer part | |
| 8529 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
| 8530 | |
| 8531 // parse decimal part | |
| 8532 if (*string == '.') | |
| 8533 { | |
| 8534 ++string; | |
| 8535 | |
| 8536 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
| 8537 } | |
| 8538 | |
| 8539 // parse trailing whitespace | |
| 8540 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; | |
| 8541 | |
| 8542 return *string == 0; | |
| 8543 } | |
| 8544 | |
| 8545 PUGI__FN double convert_string_to_number(const char_t* string) | |
| 8546 { | |
| 8547 // check string format | |
| 8548 if (!check_string_to_number_format(string)) return gen_nan(); | |
| 8549 | |
| 8550 // parse string | |
| 8551 #ifdef PUGIXML_WCHAR_MODE | |
| 8552 return wcstod(string, 0); | |
| 8553 #else | |
| 8554 return strtod(string, 0); | |
| 8555 #endif | |
| 8556 } | |
| 8557 | |
| 8558 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) | |
| 8559 { | |
| 8560 size_t length = static_cast<size_t>(end - begin); | |
| 8561 char_t* scratch = buffer; | |
| 8562 | |
| 8563 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
| 8564 { | |
| 8565 // need to make dummy on-heap copy | |
| 8566 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 8567 if (!scratch) return false; | |
| 8568 } | |
| 8569 | |
| 8570 // copy string to zero-terminated buffer and perform conversion | |
| 8571 memcpy(scratch, begin, length * sizeof(char_t)); | |
| 8572 scratch[length] = 0; | |
| 8573 | |
| 8574 *out_result = convert_string_to_number(scratch); | |
| 8575 | |
| 8576 // free dummy buffer | |
| 8577 if (scratch != buffer) xml_memory::deallocate(scratch); | |
| 8578 | |
| 8579 return true; | |
| 8580 } | |
| 8581 | |
| 8582 PUGI__FN double round_nearest(double value) | |
| 8583 { | |
| 8584 return floor(value + 0.5); | |
| 8585 } | |
| 8586 | |
| 8587 PUGI__FN double round_nearest_nzero(double value) | |
| 8588 { | |
| 8589 // same as round_nearest, but returns -0 for [-0.5, -0] | |
| 8590 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) | |
| 8591 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); | |
| 8592 } | |
| 8593 | |
| 8594 PUGI__FN const char_t* qualified_name(const xpath_node& node) | |
| 8595 { | |
| 8596 return node.attribute() ? node.attribute().name() : node.node().name(); | |
| 8597 } | |
| 8598 | |
| 8599 PUGI__FN const char_t* local_name(const xpath_node& node) | |
| 8600 { | |
| 8601 const char_t* name = qualified_name(node); | |
| 8602 const char_t* p = find_char(name, ':'); | |
| 8603 | |
| 8604 return p ? p + 1 : name; | |
| 8605 } | |
| 8606 | |
| 8607 struct namespace_uri_predicate | |
| 8608 { | |
| 8609 const char_t* prefix; | |
| 8610 size_t prefix_length; | |
| 8611 | |
| 8612 namespace_uri_predicate(const char_t* name) | |
| 8613 { | |
| 8614 const char_t* pos = find_char(name, ':'); | |
| 8615 | |
| 8616 prefix = pos ? name : 0; | |
| 8617 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; | |
| 8618 } | |
| 8619 | |
| 8620 bool operator()(xml_attribute a) const | |
| 8621 { | |
| 8622 const char_t* name = a.name(); | |
| 8623 | |
| 8624 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; | |
| 8625 | |
| 8626 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; | |
| 8627 } | |
| 8628 }; | |
| 8629 | |
| 8630 PUGI__FN const char_t* namespace_uri(xml_node node) | |
| 8631 { | |
| 8632 namespace_uri_predicate pred = node.name(); | |
| 8633 | |
| 8634 xml_node p = node; | |
| 8635 | |
| 8636 while (p) | |
| 8637 { | |
| 8638 xml_attribute a = p.find_attribute(pred); | |
| 8639 | |
| 8640 if (a) return a.value(); | |
| 8641 | |
| 8642 p = p.parent(); | |
| 8643 } | |
| 8644 | |
| 8645 return PUGIXML_TEXT(""); | |
| 8646 } | |
| 8647 | |
| 8648 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) | |
| 8649 { | |
| 8650 namespace_uri_predicate pred = attr.name(); | |
| 8651 | |
| 8652 // Default namespace does not apply to attributes | |
| 8653 if (!pred.prefix) return PUGIXML_TEXT(""); | |
| 8654 | |
| 8655 xml_node p = parent; | |
| 8656 | |
| 8657 while (p) | |
| 8658 { | |
| 8659 xml_attribute a = p.find_attribute(pred); | |
| 8660 | |
| 8661 if (a) return a.value(); | |
| 8662 | |
| 8663 p = p.parent(); | |
| 8664 } | |
| 8665 | |
| 8666 return PUGIXML_TEXT(""); | |
| 8667 } | |
| 8668 | |
| 8669 PUGI__FN const char_t* namespace_uri(const xpath_node& node) | |
| 8670 { | |
| 8671 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); | |
| 8672 } | |
| 8673 | |
| 8674 PUGI__FN char_t* normalize_space(char_t* buffer) | |
| 8675 { | |
| 8676 char_t* write = buffer; | |
| 8677 | |
| 8678 for (char_t* it = buffer; *it; ) | |
| 8679 { | |
| 8680 char_t ch = *it++; | |
| 8681 | |
| 8682 if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
| 8683 { | |
| 8684 // replace whitespace sequence with single space | |
| 8685 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; | |
| 8686 | |
| 8687 // avoid leading spaces | |
| 8688 if (write != buffer) *write++ = ' '; | |
| 8689 } | |
| 8690 else *write++ = ch; | |
| 8691 } | |
| 8692 | |
| 8693 // remove trailing space | |
| 8694 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; | |
| 8695 | |
| 8696 // zero-terminate | |
| 8697 *write = 0; | |
| 8698 | |
| 8699 return write; | |
| 8700 } | |
| 8701 | |
| 8702 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) | |
| 8703 { | |
| 8704 char_t* write = buffer; | |
| 8705 | |
| 8706 while (*buffer) | |
| 8707 { | |
| 8708 PUGI__DMC_VOLATILE char_t ch = *buffer++; | |
| 8709 | |
| 8710 const char_t* pos = find_char(from, ch); | |
| 8711 | |
| 8712 if (!pos) | |
| 8713 *write++ = ch; // do not process | |
| 8714 else if (static_cast<size_t>(pos - from) < to_length) | |
| 8715 *write++ = to[pos - from]; // replace | |
| 8716 } | |
| 8717 | |
| 8718 // zero-terminate | |
| 8719 *write = 0; | |
| 8720 | |
| 8721 return write; | |
| 8722 } | |
| 8723 | |
| 8724 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) | |
| 8725 { | |
| 8726 unsigned char table[128] = {0}; | |
| 8727 | |
| 8728 while (*from) | |
| 8729 { | |
| 8730 unsigned int fc = static_cast<unsigned int>(*from); | |
| 8731 unsigned int tc = static_cast<unsigned int>(*to); | |
| 8732 | |
| 8733 if (fc >= 128 || tc >= 128) | |
| 8734 return 0; | |
| 8735 | |
| 8736 // code=128 means "skip character" | |
| 8737 if (!table[fc]) | |
| 8738 table[fc] = static_cast<unsigned char>(tc ? tc : 128); | |
| 8739 | |
| 8740 from++; | |
| 8741 if (tc) to++; | |
| 8742 } | |
| 8743 | |
| 8744 for (int i = 0; i < 128; ++i) | |
| 8745 if (!table[i]) | |
| 8746 table[i] = static_cast<unsigned char>(i); | |
| 8747 | |
| 8748 void* result = alloc->allocate(sizeof(table)); | |
| 8749 if (!result) return 0; | |
| 8750 | |
| 8751 memcpy(result, table, sizeof(table)); | |
| 8752 | |
| 8753 return static_cast<unsigned char*>(result); | |
| 8754 } | |
| 8755 | |
| 8756 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) | |
| 8757 { | |
| 8758 char_t* write = buffer; | |
| 8759 | |
| 8760 while (*buffer) | |
| 8761 { | |
| 8762 char_t ch = *buffer++; | |
| 8763 unsigned int index = static_cast<unsigned int>(ch); | |
| 8764 | |
| 8765 if (index < 128) | |
| 8766 { | |
| 8767 unsigned char code = table[index]; | |
| 8768 | |
| 8769 // code=128 means "skip character" (table size is 128 so 128 can be a special value) | |
| 8770 // this code skips these characters without extra branches | |
| 8771 *write = static_cast<char_t>(code); | |
| 8772 write += 1 - (code >> 7); | |
| 8773 } | |
| 8774 else | |
| 8775 { | |
| 8776 *write++ = ch; | |
| 8777 } | |
| 8778 } | |
| 8779 | |
| 8780 // zero-terminate | |
| 8781 *write = 0; | |
| 8782 | |
| 8783 return write; | |
| 8784 } | |
| 8785 | |
| 8786 inline bool is_xpath_attribute(const char_t* name) | |
| 8787 { | |
| 8788 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); | |
| 8789 } | |
| 8790 | |
| 8791 struct xpath_variable_boolean: xpath_variable | |
| 8792 { | |
| 8793 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) | |
| 8794 { | |
| 8795 } | |
| 8796 | |
| 8797 bool value; | |
| 8798 char_t name[1]; | |
| 8799 }; | |
| 8800 | |
| 8801 struct xpath_variable_number: xpath_variable | |
| 8802 { | |
| 8803 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) | |
| 8804 { | |
| 8805 } | |
| 8806 | |
| 8807 double value; | |
| 8808 char_t name[1]; | |
| 8809 }; | |
| 8810 | |
| 8811 struct xpath_variable_string: xpath_variable | |
| 8812 { | |
| 8813 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) | |
| 8814 { | |
| 8815 } | |
| 8816 | |
| 8817 ~xpath_variable_string() | |
| 8818 { | |
| 8819 if (value) xml_memory::deallocate(value); | |
| 8820 } | |
| 8821 | |
| 8822 char_t* value; | |
| 8823 char_t name[1]; | |
| 8824 }; | |
| 8825 | |
| 8826 struct xpath_variable_node_set: xpath_variable | |
| 8827 { | |
| 8828 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) | |
| 8829 { | |
| 8830 } | |
| 8831 | |
| 8832 xpath_node_set value; | |
| 8833 char_t name[1]; | |
| 8834 }; | |
| 8835 | |
| 8836 static const xpath_node_set dummy_node_set; | |
| 8837 | |
| 8838 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) | |
| 8839 { | |
| 8840 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) | |
| 8841 unsigned int result = 0; | |
| 8842 | |
| 8843 while (*str) | |
| 8844 { | |
| 8845 result += static_cast<unsigned int>(*str++); | |
| 8846 result += result << 10; | |
| 8847 result ^= result >> 6; | |
| 8848 } | |
| 8849 | |
| 8850 result += result << 3; | |
| 8851 result ^= result >> 11; | |
| 8852 result += result << 15; | |
| 8853 | |
| 8854 return result; | |
| 8855 } | |
| 8856 | |
| 8857 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) | |
| 8858 { | |
| 8859 size_t length = strlength(name); | |
| 8860 if (length == 0) return 0; // empty variable names are invalid | |
| 8861 | |
| 8862 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters | |
| 8863 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); | |
| 8864 if (!memory) return 0; | |
| 8865 | |
| 8866 T* result = new (memory) T(); | |
| 8867 | |
| 8868 memcpy(result->name, name, (length + 1) * sizeof(char_t)); | |
| 8869 | |
| 8870 return result; | |
| 8871 } | |
| 8872 | |
| 8873 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) | |
| 8874 { | |
| 8875 switch (type) | |
| 8876 { | |
| 8877 case xpath_type_node_set: | |
| 8878 return new_xpath_variable<xpath_variable_node_set>(name); | |
| 8879 | |
| 8880 case xpath_type_number: | |
| 8881 return new_xpath_variable<xpath_variable_number>(name); | |
| 8882 | |
| 8883 case xpath_type_string: | |
| 8884 return new_xpath_variable<xpath_variable_string>(name); | |
| 8885 | |
| 8886 case xpath_type_boolean: | |
| 8887 return new_xpath_variable<xpath_variable_boolean>(name); | |
| 8888 | |
| 8889 default: | |
| 8890 return 0; | |
| 8891 } | |
| 8892 } | |
| 8893 | |
| 8894 template <typename T> PUGI__FN void delete_xpath_variable(T* var) | |
| 8895 { | |
| 8896 var->~T(); | |
| 8897 xml_memory::deallocate(var); | |
| 8898 } | |
| 8899 | |
| 8900 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) | |
| 8901 { | |
| 8902 switch (type) | |
| 8903 { | |
| 8904 case xpath_type_node_set: | |
| 8905 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); | |
| 8906 break; | |
| 8907 | |
| 8908 case xpath_type_number: | |
| 8909 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); | |
| 8910 break; | |
| 8911 | |
| 8912 case xpath_type_string: | |
| 8913 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); | |
| 8914 break; | |
| 8915 | |
| 8916 case xpath_type_boolean: | |
| 8917 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); | |
| 8918 break; | |
| 8919 | |
| 8920 default: | |
| 8921 assert(false && "Invalid variable type"); // unreachable | |
| 8922 } | |
| 8923 } | |
| 8924 | |
| 8925 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) | |
| 8926 { | |
| 8927 switch (rhs->type()) | |
| 8928 { | |
| 8929 case xpath_type_node_set: | |
| 8930 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); | |
| 8931 | |
| 8932 case xpath_type_number: | |
| 8933 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); | |
| 8934 | |
| 8935 case xpath_type_string: | |
| 8936 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); | |
| 8937 | |
| 8938 case xpath_type_boolean: | |
| 8939 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); | |
| 8940 | |
| 8941 default: | |
| 8942 assert(false && "Invalid variable type"); // unreachable | |
| 8943 return false; | |
| 8944 } | |
| 8945 } | |
| 8946 | |
| 8947 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) | |
| 8948 { | |
| 8949 size_t length = static_cast<size_t>(end - begin); | |
| 8950 char_t* scratch = buffer; | |
| 8951 | |
| 8952 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
| 8953 { | |
| 8954 // need to make dummy on-heap copy | |
| 8955 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
| 8956 if (!scratch) return false; | |
| 8957 } | |
| 8958 | |
| 8959 // copy string to zero-terminated buffer and perform lookup | |
| 8960 memcpy(scratch, begin, length * sizeof(char_t)); | |
| 8961 scratch[length] = 0; | |
| 8962 | |
| 8963 *out_result = set->get(scratch); | |
| 8964 | |
| 8965 // free dummy buffer | |
| 8966 if (scratch != buffer) xml_memory::deallocate(scratch); | |
| 8967 | |
| 8968 return true; | |
| 8969 } | |
| 8970 PUGI__NS_END | |
| 8971 | |
| 8972 // Internal node set class | |
| 8973 PUGI__NS_BEGIN | |
| 8974 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) | |
| 8975 { | |
| 8976 if (end - begin < 2) | |
| 8977 return xpath_node_set::type_sorted; | |
| 8978 | |
| 8979 document_order_comparator cmp; | |
| 8980 | |
| 8981 bool first = cmp(begin[0], begin[1]); | |
| 8982 | |
| 8983 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) | |
| 8984 if (cmp(it[0], it[1]) != first) | |
| 8985 return xpath_node_set::type_unsorted; | |
| 8986 | |
| 8987 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; | |
| 8988 } | |
| 8989 | |
| 8990 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) | |
| 8991 { | |
| 8992 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
| 8993 | |
| 8994 if (type == xpath_node_set::type_unsorted) | |
| 8995 { | |
| 8996 xpath_node_set::type_t sorted = xpath_get_order(begin, end); | |
| 8997 | |
| 8998 if (sorted == xpath_node_set::type_unsorted) | |
| 8999 { | |
| 9000 sort(begin, end, document_order_comparator()); | |
| 9001 | |
| 9002 type = xpath_node_set::type_sorted; | |
| 9003 } | |
| 9004 else | |
| 9005 type = sorted; | |
| 9006 } | |
| 9007 | |
| 9008 if (type != order) reverse(begin, end); | |
| 9009 | |
| 9010 return order; | |
| 9011 } | |
| 9012 | |
| 9013 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) | |
| 9014 { | |
| 9015 if (begin == end) return xpath_node(); | |
| 9016 | |
| 9017 switch (type) | |
| 9018 { | |
| 9019 case xpath_node_set::type_sorted: | |
| 9020 return *begin; | |
| 9021 | |
| 9022 case xpath_node_set::type_sorted_reverse: | |
| 9023 return *(end - 1); | |
| 9024 | |
| 9025 case xpath_node_set::type_unsorted: | |
| 9026 return *min_element(begin, end, document_order_comparator()); | |
| 9027 | |
| 9028 default: | |
| 9029 assert(false && "Invalid node set type"); // unreachable | |
| 9030 return xpath_node(); | |
| 9031 } | |
| 9032 } | |
| 9033 | |
| 9034 class xpath_node_set_raw | |
| 9035 { | |
| 9036 xpath_node_set::type_t _type; | |
| 9037 | |
| 9038 xpath_node* _begin; | |
| 9039 xpath_node* _end; | |
| 9040 xpath_node* _eos; | |
| 9041 | |
| 9042 public: | |
| 9043 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) | |
| 9044 { | |
| 9045 } | |
| 9046 | |
| 9047 xpath_node* begin() const | |
| 9048 { | |
| 9049 return _begin; | |
| 9050 } | |
| 9051 | |
| 9052 xpath_node* end() const | |
| 9053 { | |
| 9054 return _end; | |
| 9055 } | |
| 9056 | |
| 9057 bool empty() const | |
| 9058 { | |
| 9059 return _begin == _end; | |
| 9060 } | |
| 9061 | |
| 9062 size_t size() const | |
| 9063 { | |
| 9064 return static_cast<size_t>(_end - _begin); | |
| 9065 } | |
| 9066 | |
| 9067 xpath_node first() const | |
| 9068 { | |
| 9069 return xpath_first(_begin, _end, _type); | |
| 9070 } | |
| 9071 | |
| 9072 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); | |
| 9073 | |
| 9074 void push_back(const xpath_node& node, xpath_allocator* alloc) | |
| 9075 { | |
| 9076 if (_end != _eos) | |
| 9077 *_end++ = node; | |
| 9078 else | |
| 9079 push_back_grow(node, alloc); | |
| 9080 } | |
| 9081 | |
| 9082 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) | |
| 9083 { | |
| 9084 if (begin_ == end_) return; | |
| 9085 | |
| 9086 size_t size_ = static_cast<size_t>(_end - _begin); | |
| 9087 size_t capacity = static_cast<size_t>(_eos - _begin); | |
| 9088 size_t count = static_cast<size_t>(end_ - begin_); | |
| 9089 | |
| 9090 if (size_ + count > capacity) | |
| 9091 { | |
| 9092 // reallocate the old array or allocate a new one | |
| 9093 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); | |
| 9094 if (!data) return; | |
| 9095 | |
| 9096 // finalize | |
| 9097 _begin = data; | |
| 9098 _end = data + size_; | |
| 9099 _eos = data + size_ + count; | |
| 9100 } | |
| 9101 | |
| 9102 memcpy(_end, begin_, count * sizeof(xpath_node)); | |
| 9103 _end += count; | |
| 9104 } | |
| 9105 | |
| 9106 void sort_do() | |
| 9107 { | |
| 9108 _type = xpath_sort(_begin, _end, _type, false); | |
| 9109 } | |
| 9110 | |
| 9111 void truncate(xpath_node* pos) | |
| 9112 { | |
| 9113 assert(_begin <= pos && pos <= _end); | |
| 9114 | |
| 9115 _end = pos; | |
| 9116 } | |
| 9117 | |
| 9118 void remove_duplicates(xpath_allocator* alloc) | |
| 9119 { | |
| 9120 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2) | |
| 9121 { | |
| 9122 xpath_allocator_capture cr(alloc); | |
| 9123 | |
| 9124 size_t size_ = static_cast<size_t>(_end - _begin); | |
| 9125 | |
| 9126 size_t hash_size = 1; | |
| 9127 while (hash_size < size_ + size_ / 2) hash_size *= 2; | |
| 9128 | |
| 9129 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**))); | |
| 9130 if (!hash_data) return; | |
| 9131 | |
| 9132 memset(hash_data, 0, hash_size * sizeof(const void**)); | |
| 9133 | |
| 9134 xpath_node* write = _begin; | |
| 9135 | |
| 9136 for (xpath_node* it = _begin; it != _end; ++it) | |
| 9137 { | |
| 9138 const void* attr = it->attribute().internal_object(); | |
| 9139 const void* node = it->node().internal_object(); | |
| 9140 const void* key = attr ? attr : node; | |
| 9141 | |
| 9142 if (key && hash_insert(hash_data, hash_size, key)) | |
| 9143 { | |
| 9144 *write++ = *it; | |
| 9145 } | |
| 9146 } | |
| 9147 | |
| 9148 _end = write; | |
| 9149 } | |
| 9150 else | |
| 9151 { | |
| 9152 _end = unique(_begin, _end); | |
| 9153 } | |
| 9154 } | |
| 9155 | |
| 9156 xpath_node_set::type_t type() const | |
| 9157 { | |
| 9158 return _type; | |
| 9159 } | |
| 9160 | |
| 9161 void set_type(xpath_node_set::type_t value) | |
| 9162 { | |
| 9163 _type = value; | |
| 9164 } | |
| 9165 }; | |
| 9166 | |
| 9167 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) | |
| 9168 { | |
| 9169 size_t capacity = static_cast<size_t>(_eos - _begin); | |
| 9170 | |
| 9171 // get new capacity (1.5x rule) | |
| 9172 size_t new_capacity = capacity + capacity / 2 + 1; | |
| 9173 | |
| 9174 // reallocate the old array or allocate a new one | |
| 9175 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); | |
| 9176 if (!data) return; | |
| 9177 | |
| 9178 // finalize | |
| 9179 _begin = data; | |
| 9180 _end = data + capacity; | |
| 9181 _eos = data + new_capacity; | |
| 9182 | |
| 9183 // push | |
| 9184 *_end++ = node; | |
| 9185 } | |
| 9186 PUGI__NS_END | |
| 9187 | |
| 9188 PUGI__NS_BEGIN | |
| 9189 struct xpath_context | |
| 9190 { | |
| 9191 xpath_node n; | |
| 9192 size_t position, size; | |
| 9193 | |
| 9194 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) | |
| 9195 { | |
| 9196 } | |
| 9197 }; | |
| 9198 | |
| 9199 enum lexeme_t | |
| 9200 { | |
| 9201 lex_none = 0, | |
| 9202 lex_equal, | |
| 9203 lex_not_equal, | |
| 9204 lex_less, | |
| 9205 lex_greater, | |
| 9206 lex_less_or_equal, | |
| 9207 lex_greater_or_equal, | |
| 9208 lex_plus, | |
| 9209 lex_minus, | |
| 9210 lex_multiply, | |
| 9211 lex_union, | |
| 9212 lex_var_ref, | |
| 9213 lex_open_brace, | |
| 9214 lex_close_brace, | |
| 9215 lex_quoted_string, | |
| 9216 lex_number, | |
| 9217 lex_slash, | |
| 9218 lex_double_slash, | |
| 9219 lex_open_square_brace, | |
| 9220 lex_close_square_brace, | |
| 9221 lex_string, | |
| 9222 lex_comma, | |
| 9223 lex_axis_attribute, | |
| 9224 lex_dot, | |
| 9225 lex_double_dot, | |
| 9226 lex_double_colon, | |
| 9227 lex_eof | |
| 9228 }; | |
| 9229 | |
| 9230 struct xpath_lexer_string | |
| 9231 { | |
| 9232 const char_t* begin; | |
| 9233 const char_t* end; | |
| 9234 | |
| 9235 xpath_lexer_string(): begin(0), end(0) | |
| 9236 { | |
| 9237 } | |
| 9238 | |
| 9239 bool operator==(const char_t* other) const | |
| 9240 { | |
| 9241 size_t length = static_cast<size_t>(end - begin); | |
| 9242 | |
| 9243 return strequalrange(other, begin, length); | |
| 9244 } | |
| 9245 }; | |
| 9246 | |
| 9247 class xpath_lexer | |
| 9248 { | |
| 9249 const char_t* _cur; | |
| 9250 const char_t* _cur_lexeme_pos; | |
| 9251 xpath_lexer_string _cur_lexeme_contents; | |
| 9252 | |
| 9253 lexeme_t _cur_lexeme; | |
| 9254 | |
| 9255 public: | |
| 9256 explicit xpath_lexer(const char_t* query): _cur(query) | |
| 9257 { | |
| 9258 next(); | |
| 9259 } | |
| 9260 | |
| 9261 const char_t* state() const | |
| 9262 { | |
| 9263 return _cur; | |
| 9264 } | |
| 9265 | |
| 9266 void next() | |
| 9267 { | |
| 9268 const char_t* cur = _cur; | |
| 9269 | |
| 9270 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; | |
| 9271 | |
| 9272 // save lexeme position for error reporting | |
| 9273 _cur_lexeme_pos = cur; | |
| 9274 | |
| 9275 switch (*cur) | |
| 9276 { | |
| 9277 case 0: | |
| 9278 _cur_lexeme = lex_eof; | |
| 9279 break; | |
| 9280 | |
| 9281 case '>': | |
| 9282 if (*(cur+1) == '=') | |
| 9283 { | |
| 9284 cur += 2; | |
| 9285 _cur_lexeme = lex_greater_or_equal; | |
| 9286 } | |
| 9287 else | |
| 9288 { | |
| 9289 cur += 1; | |
| 9290 _cur_lexeme = lex_greater; | |
| 9291 } | |
| 9292 break; | |
| 9293 | |
| 9294 case '<': | |
| 9295 if (*(cur+1) == '=') | |
| 9296 { | |
| 9297 cur += 2; | |
| 9298 _cur_lexeme = lex_less_or_equal; | |
| 9299 } | |
| 9300 else | |
| 9301 { | |
| 9302 cur += 1; | |
| 9303 _cur_lexeme = lex_less; | |
| 9304 } | |
| 9305 break; | |
| 9306 | |
| 9307 case '!': | |
| 9308 if (*(cur+1) == '=') | |
| 9309 { | |
| 9310 cur += 2; | |
| 9311 _cur_lexeme = lex_not_equal; | |
| 9312 } | |
| 9313 else | |
| 9314 { | |
| 9315 _cur_lexeme = lex_none; | |
| 9316 } | |
| 9317 break; | |
| 9318 | |
| 9319 case '=': | |
| 9320 cur += 1; | |
| 9321 _cur_lexeme = lex_equal; | |
| 9322 | |
| 9323 break; | |
| 9324 | |
| 9325 case '+': | |
| 9326 cur += 1; | |
| 9327 _cur_lexeme = lex_plus; | |
| 9328 | |
| 9329 break; | |
| 9330 | |
| 9331 case '-': | |
| 9332 cur += 1; | |
| 9333 _cur_lexeme = lex_minus; | |
| 9334 | |
| 9335 break; | |
| 9336 | |
| 9337 case '*': | |
| 9338 cur += 1; | |
| 9339 _cur_lexeme = lex_multiply; | |
| 9340 | |
| 9341 break; | |
| 9342 | |
| 9343 case '|': | |
| 9344 cur += 1; | |
| 9345 _cur_lexeme = lex_union; | |
| 9346 | |
| 9347 break; | |
| 9348 | |
| 9349 case '$': | |
| 9350 cur += 1; | |
| 9351 | |
| 9352 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
| 9353 { | |
| 9354 _cur_lexeme_contents.begin = cur; | |
| 9355 | |
| 9356 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
| 9357 | |
| 9358 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname | |
| 9359 { | |
| 9360 cur++; // : | |
| 9361 | |
| 9362 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
| 9363 } | |
| 9364 | |
| 9365 _cur_lexeme_contents.end = cur; | |
| 9366 | |
| 9367 _cur_lexeme = lex_var_ref; | |
| 9368 } | |
| 9369 else | |
| 9370 { | |
| 9371 _cur_lexeme = lex_none; | |
| 9372 } | |
| 9373 | |
| 9374 break; | |
| 9375 | |
| 9376 case '(': | |
| 9377 cur += 1; | |
| 9378 _cur_lexeme = lex_open_brace; | |
| 9379 | |
| 9380 break; | |
| 9381 | |
| 9382 case ')': | |
| 9383 cur += 1; | |
| 9384 _cur_lexeme = lex_close_brace; | |
| 9385 | |
| 9386 break; | |
| 9387 | |
| 9388 case '[': | |
| 9389 cur += 1; | |
| 9390 _cur_lexeme = lex_open_square_brace; | |
| 9391 | |
| 9392 break; | |
| 9393 | |
| 9394 case ']': | |
| 9395 cur += 1; | |
| 9396 _cur_lexeme = lex_close_square_brace; | |
| 9397 | |
| 9398 break; | |
| 9399 | |
| 9400 case ',': | |
| 9401 cur += 1; | |
| 9402 _cur_lexeme = lex_comma; | |
| 9403 | |
| 9404 break; | |
| 9405 | |
| 9406 case '/': | |
| 9407 if (*(cur+1) == '/') | |
| 9408 { | |
| 9409 cur += 2; | |
| 9410 _cur_lexeme = lex_double_slash; | |
| 9411 } | |
| 9412 else | |
| 9413 { | |
| 9414 cur += 1; | |
| 9415 _cur_lexeme = lex_slash; | |
| 9416 } | |
| 9417 break; | |
| 9418 | |
| 9419 case '.': | |
| 9420 if (*(cur+1) == '.') | |
| 9421 { | |
| 9422 cur += 2; | |
| 9423 _cur_lexeme = lex_double_dot; | |
| 9424 } | |
| 9425 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) | |
| 9426 { | |
| 9427 _cur_lexeme_contents.begin = cur; // . | |
| 9428 | |
| 9429 ++cur; | |
| 9430 | |
| 9431 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
| 9432 | |
| 9433 _cur_lexeme_contents.end = cur; | |
| 9434 | |
| 9435 _cur_lexeme = lex_number; | |
| 9436 } | |
| 9437 else | |
| 9438 { | |
| 9439 cur += 1; | |
| 9440 _cur_lexeme = lex_dot; | |
| 9441 } | |
| 9442 break; | |
| 9443 | |
| 9444 case '@': | |
| 9445 cur += 1; | |
| 9446 _cur_lexeme = lex_axis_attribute; | |
| 9447 | |
| 9448 break; | |
| 9449 | |
| 9450 case '"': | |
| 9451 case '\'': | |
| 9452 { | |
| 9453 char_t terminator = *cur; | |
| 9454 | |
| 9455 ++cur; | |
| 9456 | |
| 9457 _cur_lexeme_contents.begin = cur; | |
| 9458 while (*cur && *cur != terminator) cur++; | |
| 9459 _cur_lexeme_contents.end = cur; | |
| 9460 | |
| 9461 if (!*cur) | |
| 9462 _cur_lexeme = lex_none; | |
| 9463 else | |
| 9464 { | |
| 9465 cur += 1; | |
| 9466 _cur_lexeme = lex_quoted_string; | |
| 9467 } | |
| 9468 | |
| 9469 break; | |
| 9470 } | |
| 9471 | |
| 9472 case ':': | |
| 9473 if (*(cur+1) == ':') | |
| 9474 { | |
| 9475 cur += 2; | |
| 9476 _cur_lexeme = lex_double_colon; | |
| 9477 } | |
| 9478 else | |
| 9479 { | |
| 9480 _cur_lexeme = lex_none; | |
| 9481 } | |
| 9482 break; | |
| 9483 | |
| 9484 default: | |
| 9485 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) | |
| 9486 { | |
| 9487 _cur_lexeme_contents.begin = cur; | |
| 9488 | |
| 9489 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
| 9490 | |
| 9491 if (*cur == '.') | |
| 9492 { | |
| 9493 cur++; | |
| 9494 | |
| 9495 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
| 9496 } | |
| 9497 | |
| 9498 _cur_lexeme_contents.end = cur; | |
| 9499 | |
| 9500 _cur_lexeme = lex_number; | |
| 9501 } | |
| 9502 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
| 9503 { | |
| 9504 _cur_lexeme_contents.begin = cur; | |
| 9505 | |
| 9506 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
| 9507 | |
| 9508 if (cur[0] == ':') | |
| 9509 { | |
| 9510 if (cur[1] == '*') // namespace test ncname:* | |
| 9511 { | |
| 9512 cur += 2; // :* | |
| 9513 } | |
| 9514 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname | |
| 9515 { | |
| 9516 cur++; // : | |
| 9517 | |
| 9518 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
| 9519 } | |
| 9520 } | |
| 9521 | |
| 9522 _cur_lexeme_contents.end = cur; | |
| 9523 | |
| 9524 _cur_lexeme = lex_string; | |
| 9525 } | |
| 9526 else | |
| 9527 { | |
| 9528 _cur_lexeme = lex_none; | |
| 9529 } | |
| 9530 } | |
| 9531 | |
| 9532 _cur = cur; | |
| 9533 } | |
| 9534 | |
| 9535 lexeme_t current() const | |
| 9536 { | |
| 9537 return _cur_lexeme; | |
| 9538 } | |
| 9539 | |
| 9540 const char_t* current_pos() const | |
| 9541 { | |
| 9542 return _cur_lexeme_pos; | |
| 9543 } | |
| 9544 | |
| 9545 const xpath_lexer_string& contents() const | |
| 9546 { | |
| 9547 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); | |
| 9548 | |
| 9549 return _cur_lexeme_contents; | |
| 9550 } | |
| 9551 }; | |
| 9552 | |
| 9553 enum ast_type_t | |
| 9554 { | |
| 9555 ast_unknown, | |
| 9556 ast_op_or, // left or right | |
| 9557 ast_op_and, // left and right | |
| 9558 ast_op_equal, // left = right | |
| 9559 ast_op_not_equal, // left != right | |
| 9560 ast_op_less, // left < right | |
| 9561 ast_op_greater, // left > right | |
| 9562 ast_op_less_or_equal, // left <= right | |
| 9563 ast_op_greater_or_equal, // left >= right | |
| 9564 ast_op_add, // left + right | |
| 9565 ast_op_subtract, // left - right | |
| 9566 ast_op_multiply, // left * right | |
| 9567 ast_op_divide, // left / right | |
| 9568 ast_op_mod, // left % right | |
| 9569 ast_op_negate, // left - right | |
| 9570 ast_op_union, // left | right | |
| 9571 ast_predicate, // apply predicate to set; next points to next predicate | |
| 9572 ast_filter, // select * from left where right | |
| 9573 ast_string_constant, // string constant | |
| 9574 ast_number_constant, // number constant | |
| 9575 ast_variable, // variable | |
| 9576 ast_func_last, // last() | |
| 9577 ast_func_position, // position() | |
| 9578 ast_func_count, // count(left) | |
| 9579 ast_func_id, // id(left) | |
| 9580 ast_func_local_name_0, // local-name() | |
| 9581 ast_func_local_name_1, // local-name(left) | |
| 9582 ast_func_namespace_uri_0, // namespace-uri() | |
| 9583 ast_func_namespace_uri_1, // namespace-uri(left) | |
| 9584 ast_func_name_0, // name() | |
| 9585 ast_func_name_1, // name(left) | |
| 9586 ast_func_string_0, // string() | |
| 9587 ast_func_string_1, // string(left) | |
| 9588 ast_func_concat, // concat(left, right, siblings) | |
| 9589 ast_func_starts_with, // starts_with(left, right) | |
| 9590 ast_func_contains, // contains(left, right) | |
| 9591 ast_func_substring_before, // substring-before(left, right) | |
| 9592 ast_func_substring_after, // substring-after(left, right) | |
| 9593 ast_func_substring_2, // substring(left, right) | |
| 9594 ast_func_substring_3, // substring(left, right, third) | |
| 9595 ast_func_string_length_0, // string-length() | |
| 9596 ast_func_string_length_1, // string-length(left) | |
| 9597 ast_func_normalize_space_0, // normalize-space() | |
| 9598 ast_func_normalize_space_1, // normalize-space(left) | |
| 9599 ast_func_translate, // translate(left, right, third) | |
| 9600 ast_func_boolean, // boolean(left) | |
| 9601 ast_func_not, // not(left) | |
| 9602 ast_func_true, // true() | |
| 9603 ast_func_false, // false() | |
| 9604 ast_func_lang, // lang(left) | |
| 9605 ast_func_number_0, // number() | |
| 9606 ast_func_number_1, // number(left) | |
| 9607 ast_func_sum, // sum(left) | |
| 9608 ast_func_floor, // floor(left) | |
| 9609 ast_func_ceiling, // ceiling(left) | |
| 9610 ast_func_round, // round(left) | |
| 9611 ast_step, // process set left with step | |
| 9612 ast_step_root, // select root node | |
| 9613 | |
| 9614 ast_opt_translate_table, // translate(left, right, third) where right/third are constants | |
| 9615 ast_opt_compare_attribute // @name = 'string' | |
| 9616 }; | |
| 9617 | |
| 9618 enum axis_t | |
| 9619 { | |
| 9620 axis_ancestor, | |
| 9621 axis_ancestor_or_self, | |
| 9622 axis_attribute, | |
| 9623 axis_child, | |
| 9624 axis_descendant, | |
| 9625 axis_descendant_or_self, | |
| 9626 axis_following, | |
| 9627 axis_following_sibling, | |
| 9628 axis_namespace, | |
| 9629 axis_parent, | |
| 9630 axis_preceding, | |
| 9631 axis_preceding_sibling, | |
| 9632 axis_self | |
| 9633 }; | |
| 9634 | |
| 9635 enum nodetest_t | |
| 9636 { | |
| 9637 nodetest_none, | |
| 9638 nodetest_name, | |
| 9639 nodetest_type_node, | |
| 9640 nodetest_type_comment, | |
| 9641 nodetest_type_pi, | |
| 9642 nodetest_type_text, | |
| 9643 nodetest_pi, | |
| 9644 nodetest_all, | |
| 9645 nodetest_all_in_namespace | |
| 9646 }; | |
| 9647 | |
| 9648 enum predicate_t | |
| 9649 { | |
| 9650 predicate_default, | |
| 9651 predicate_posinv, | |
| 9652 predicate_constant, | |
| 9653 predicate_constant_one | |
| 9654 }; | |
| 9655 | |
| 9656 enum nodeset_eval_t | |
| 9657 { | |
| 9658 nodeset_eval_all, | |
| 9659 nodeset_eval_any, | |
| 9660 nodeset_eval_first | |
| 9661 }; | |
| 9662 | |
| 9663 template <axis_t N> struct axis_to_type | |
| 9664 { | |
| 9665 static const axis_t axis; | |
| 9666 }; | |
| 9667 | |
| 9668 template <axis_t N> const axis_t axis_to_type<N>::axis = N; | |
| 9669 | |
| 9670 class xpath_ast_node | |
| 9671 { | |
| 9672 private: | |
| 9673 // node type | |
| 9674 char _type; | |
| 9675 char _rettype; | |
| 9676 | |
| 9677 // for ast_step | |
| 9678 char _axis; | |
| 9679 | |
| 9680 // for ast_step/ast_predicate/ast_filter | |
| 9681 char _test; | |
| 9682 | |
| 9683 // tree node structure | |
| 9684 xpath_ast_node* _left; | |
| 9685 xpath_ast_node* _right; | |
| 9686 xpath_ast_node* _next; | |
| 9687 | |
| 9688 union | |
| 9689 { | |
| 9690 // value for ast_string_constant | |
| 9691 const char_t* string; | |
| 9692 // value for ast_number_constant | |
| 9693 double number; | |
| 9694 // variable for ast_variable | |
| 9695 xpath_variable* variable; | |
| 9696 // node test for ast_step (node name/namespace/node type/pi target) | |
| 9697 const char_t* nodetest; | |
| 9698 // table for ast_opt_translate_table | |
| 9699 const unsigned char* table; | |
| 9700 } _data; | |
| 9701 | |
| 9702 xpath_ast_node(const xpath_ast_node&); | |
| 9703 xpath_ast_node& operator=(const xpath_ast_node&); | |
| 9704 | |
| 9705 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
| 9706 { | |
| 9707 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
| 9708 | |
| 9709 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
| 9710 { | |
| 9711 if (lt == xpath_type_boolean || rt == xpath_type_boolean) | |
| 9712 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
| 9713 else if (lt == xpath_type_number || rt == xpath_type_number) | |
| 9714 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
| 9715 else if (lt == xpath_type_string || rt == xpath_type_string) | |
| 9716 { | |
| 9717 xpath_allocator_capture cr(stack.result); | |
| 9718 | |
| 9719 xpath_string ls = lhs->eval_string(c, stack); | |
| 9720 xpath_string rs = rhs->eval_string(c, stack); | |
| 9721 | |
| 9722 return comp(ls, rs); | |
| 9723 } | |
| 9724 } | |
| 9725 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
| 9726 { | |
| 9727 xpath_allocator_capture cr(stack.result); | |
| 9728 | |
| 9729 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9730 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9731 | |
| 9732 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
| 9733 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
| 9734 { | |
| 9735 xpath_allocator_capture cri(stack.result); | |
| 9736 | |
| 9737 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) | |
| 9738 return true; | |
| 9739 } | |
| 9740 | |
| 9741 return false; | |
| 9742 } | |
| 9743 else | |
| 9744 { | |
| 9745 if (lt == xpath_type_node_set) | |
| 9746 { | |
| 9747 swap(lhs, rhs); | |
| 9748 swap(lt, rt); | |
| 9749 } | |
| 9750 | |
| 9751 if (lt == xpath_type_boolean) | |
| 9752 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
| 9753 else if (lt == xpath_type_number) | |
| 9754 { | |
| 9755 xpath_allocator_capture cr(stack.result); | |
| 9756 | |
| 9757 double l = lhs->eval_number(c, stack); | |
| 9758 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9759 | |
| 9760 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
| 9761 { | |
| 9762 xpath_allocator_capture cri(stack.result); | |
| 9763 | |
| 9764 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
| 9765 return true; | |
| 9766 } | |
| 9767 | |
| 9768 return false; | |
| 9769 } | |
| 9770 else if (lt == xpath_type_string) | |
| 9771 { | |
| 9772 xpath_allocator_capture cr(stack.result); | |
| 9773 | |
| 9774 xpath_string l = lhs->eval_string(c, stack); | |
| 9775 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9776 | |
| 9777 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
| 9778 { | |
| 9779 xpath_allocator_capture cri(stack.result); | |
| 9780 | |
| 9781 if (comp(l, string_value(*ri, stack.result))) | |
| 9782 return true; | |
| 9783 } | |
| 9784 | |
| 9785 return false; | |
| 9786 } | |
| 9787 } | |
| 9788 | |
| 9789 assert(false && "Wrong types"); // unreachable | |
| 9790 return false; | |
| 9791 } | |
| 9792 | |
| 9793 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) | |
| 9794 { | |
| 9795 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; | |
| 9796 } | |
| 9797 | |
| 9798 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
| 9799 { | |
| 9800 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
| 9801 | |
| 9802 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
| 9803 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
| 9804 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
| 9805 { | |
| 9806 xpath_allocator_capture cr(stack.result); | |
| 9807 | |
| 9808 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9809 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9810 | |
| 9811 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
| 9812 { | |
| 9813 xpath_allocator_capture cri(stack.result); | |
| 9814 | |
| 9815 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); | |
| 9816 | |
| 9817 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
| 9818 { | |
| 9819 xpath_allocator_capture crii(stack.result); | |
| 9820 | |
| 9821 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
| 9822 return true; | |
| 9823 } | |
| 9824 } | |
| 9825 | |
| 9826 return false; | |
| 9827 } | |
| 9828 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) | |
| 9829 { | |
| 9830 xpath_allocator_capture cr(stack.result); | |
| 9831 | |
| 9832 double l = lhs->eval_number(c, stack); | |
| 9833 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9834 | |
| 9835 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
| 9836 { | |
| 9837 xpath_allocator_capture cri(stack.result); | |
| 9838 | |
| 9839 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
| 9840 return true; | |
| 9841 } | |
| 9842 | |
| 9843 return false; | |
| 9844 } | |
| 9845 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) | |
| 9846 { | |
| 9847 xpath_allocator_capture cr(stack.result); | |
| 9848 | |
| 9849 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
| 9850 double r = rhs->eval_number(c, stack); | |
| 9851 | |
| 9852 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
| 9853 { | |
| 9854 xpath_allocator_capture cri(stack.result); | |
| 9855 | |
| 9856 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) | |
| 9857 return true; | |
| 9858 } | |
| 9859 | |
| 9860 return false; | |
| 9861 } | |
| 9862 else | |
| 9863 { | |
| 9864 assert(false && "Wrong types"); // unreachable | |
| 9865 return false; | |
| 9866 } | |
| 9867 } | |
| 9868 | |
| 9869 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
| 9870 { | |
| 9871 assert(ns.size() >= first); | |
| 9872 assert(expr->rettype() != xpath_type_number); | |
| 9873 | |
| 9874 size_t i = 1; | |
| 9875 size_t size = ns.size() - first; | |
| 9876 | |
| 9877 xpath_node* last = ns.begin() + first; | |
| 9878 | |
| 9879 // remove_if... or well, sort of | |
| 9880 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
| 9881 { | |
| 9882 xpath_context c(*it, i, size); | |
| 9883 | |
| 9884 if (expr->eval_boolean(c, stack)) | |
| 9885 { | |
| 9886 *last++ = *it; | |
| 9887 | |
| 9888 if (once) break; | |
| 9889 } | |
| 9890 } | |
| 9891 | |
| 9892 ns.truncate(last); | |
| 9893 } | |
| 9894 | |
| 9895 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
| 9896 { | |
| 9897 assert(ns.size() >= first); | |
| 9898 assert(expr->rettype() == xpath_type_number); | |
| 9899 | |
| 9900 size_t i = 1; | |
| 9901 size_t size = ns.size() - first; | |
| 9902 | |
| 9903 xpath_node* last = ns.begin() + first; | |
| 9904 | |
| 9905 // remove_if... or well, sort of | |
| 9906 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
| 9907 { | |
| 9908 xpath_context c(*it, i, size); | |
| 9909 | |
| 9910 if (expr->eval_number(c, stack) == static_cast<double>(i)) | |
| 9911 { | |
| 9912 *last++ = *it; | |
| 9913 | |
| 9914 if (once) break; | |
| 9915 } | |
| 9916 } | |
| 9917 | |
| 9918 ns.truncate(last); | |
| 9919 } | |
| 9920 | |
| 9921 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) | |
| 9922 { | |
| 9923 assert(ns.size() >= first); | |
| 9924 assert(expr->rettype() == xpath_type_number); | |
| 9925 | |
| 9926 size_t size = ns.size() - first; | |
| 9927 | |
| 9928 xpath_node* last = ns.begin() + first; | |
| 9929 | |
| 9930 xpath_context c(xpath_node(), 1, size); | |
| 9931 | |
| 9932 double er = expr->eval_number(c, stack); | |
| 9933 | |
| 9934 if (er >= 1.0 && er <= static_cast<double>(size)) | |
| 9935 { | |
| 9936 size_t eri = static_cast<size_t>(er); | |
| 9937 | |
| 9938 if (er == static_cast<double>(eri)) | |
| 9939 { | |
| 9940 xpath_node r = last[eri - 1]; | |
| 9941 | |
| 9942 *last++ = r; | |
| 9943 } | |
| 9944 } | |
| 9945 | |
| 9946 ns.truncate(last); | |
| 9947 } | |
| 9948 | |
| 9949 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) | |
| 9950 { | |
| 9951 if (ns.size() == first) return; | |
| 9952 | |
| 9953 assert(_type == ast_filter || _type == ast_predicate); | |
| 9954 | |
| 9955 if (_test == predicate_constant || _test == predicate_constant_one) | |
| 9956 apply_predicate_number_const(ns, first, _right, stack); | |
| 9957 else if (_right->rettype() == xpath_type_number) | |
| 9958 apply_predicate_number(ns, first, _right, stack, once); | |
| 9959 else | |
| 9960 apply_predicate_boolean(ns, first, _right, stack, once); | |
| 9961 } | |
| 9962 | |
| 9963 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) | |
| 9964 { | |
| 9965 if (ns.size() == first) return; | |
| 9966 | |
| 9967 bool last_once = eval_once(ns.type(), eval); | |
| 9968 | |
| 9969 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) | |
| 9970 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); | |
| 9971 } | |
| 9972 | |
| 9973 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) | |
| 9974 { | |
| 9975 assert(a); | |
| 9976 | |
| 9977 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); | |
| 9978 | |
| 9979 switch (_test) | |
| 9980 { | |
| 9981 case nodetest_name: | |
| 9982 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) | |
| 9983 { | |
| 9984 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
| 9985 return true; | |
| 9986 } | |
| 9987 break; | |
| 9988 | |
| 9989 case nodetest_type_node: | |
| 9990 case nodetest_all: | |
| 9991 if (is_xpath_attribute(name)) | |
| 9992 { | |
| 9993 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
| 9994 return true; | |
| 9995 } | |
| 9996 break; | |
| 9997 | |
| 9998 case nodetest_all_in_namespace: | |
| 9999 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) | |
| 10000 { | |
| 10001 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
| 10002 return true; | |
| 10003 } | |
| 10004 break; | |
| 10005 | |
| 10006 default: | |
| 10007 ; | |
| 10008 } | |
| 10009 | |
| 10010 return false; | |
| 10011 } | |
| 10012 | |
| 10013 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) | |
| 10014 { | |
| 10015 assert(n); | |
| 10016 | |
| 10017 xml_node_type type = PUGI__NODETYPE(n); | |
| 10018 | |
| 10019 switch (_test) | |
| 10020 { | |
| 10021 case nodetest_name: | |
| 10022 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) | |
| 10023 { | |
| 10024 ns.push_back(xml_node(n), alloc); | |
| 10025 return true; | |
| 10026 } | |
| 10027 break; | |
| 10028 | |
| 10029 case nodetest_type_node: | |
| 10030 ns.push_back(xml_node(n), alloc); | |
| 10031 return true; | |
| 10032 | |
| 10033 case nodetest_type_comment: | |
| 10034 if (type == node_comment) | |
| 10035 { | |
| 10036 ns.push_back(xml_node(n), alloc); | |
| 10037 return true; | |
| 10038 } | |
| 10039 break; | |
| 10040 | |
| 10041 case nodetest_type_text: | |
| 10042 if (type == node_pcdata || type == node_cdata) | |
| 10043 { | |
| 10044 ns.push_back(xml_node(n), alloc); | |
| 10045 return true; | |
| 10046 } | |
| 10047 break; | |
| 10048 | |
| 10049 case nodetest_type_pi: | |
| 10050 if (type == node_pi) | |
| 10051 { | |
| 10052 ns.push_back(xml_node(n), alloc); | |
| 10053 return true; | |
| 10054 } | |
| 10055 break; | |
| 10056 | |
| 10057 case nodetest_pi: | |
| 10058 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) | |
| 10059 { | |
| 10060 ns.push_back(xml_node(n), alloc); | |
| 10061 return true; | |
| 10062 } | |
| 10063 break; | |
| 10064 | |
| 10065 case nodetest_all: | |
| 10066 if (type == node_element) | |
| 10067 { | |
| 10068 ns.push_back(xml_node(n), alloc); | |
| 10069 return true; | |
| 10070 } | |
| 10071 break; | |
| 10072 | |
| 10073 case nodetest_all_in_namespace: | |
| 10074 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) | |
| 10075 { | |
| 10076 ns.push_back(xml_node(n), alloc); | |
| 10077 return true; | |
| 10078 } | |
| 10079 break; | |
| 10080 | |
| 10081 default: | |
| 10082 assert(false && "Unknown axis"); // unreachable | |
| 10083 } | |
| 10084 | |
| 10085 return false; | |
| 10086 } | |
| 10087 | |
| 10088 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) | |
| 10089 { | |
| 10090 const axis_t axis = T::axis; | |
| 10091 | |
| 10092 switch (axis) | |
| 10093 { | |
| 10094 case axis_attribute: | |
| 10095 { | |
| 10096 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) | |
| 10097 if (step_push(ns, a, n, alloc) & once) | |
| 10098 return; | |
| 10099 | |
| 10100 break; | |
| 10101 } | |
| 10102 | |
| 10103 case axis_child: | |
| 10104 { | |
| 10105 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) | |
| 10106 if (step_push(ns, c, alloc) & once) | |
| 10107 return; | |
| 10108 | |
| 10109 break; | |
| 10110 } | |
| 10111 | |
| 10112 case axis_descendant: | |
| 10113 case axis_descendant_or_self: | |
| 10114 { | |
| 10115 if (axis == axis_descendant_or_self) | |
| 10116 if (step_push(ns, n, alloc) & once) | |
| 10117 return; | |
| 10118 | |
| 10119 xml_node_struct* cur = n->first_child; | |
| 10120 | |
| 10121 while (cur) | |
| 10122 { | |
| 10123 if (step_push(ns, cur, alloc) & once) | |
| 10124 return; | |
| 10125 | |
| 10126 if (cur->first_child) | |
| 10127 cur = cur->first_child; | |
| 10128 else | |
| 10129 { | |
| 10130 while (!cur->next_sibling) | |
| 10131 { | |
| 10132 cur = cur->parent; | |
| 10133 | |
| 10134 if (cur == n) return; | |
| 10135 } | |
| 10136 | |
| 10137 cur = cur->next_sibling; | |
| 10138 } | |
| 10139 } | |
| 10140 | |
| 10141 break; | |
| 10142 } | |
| 10143 | |
| 10144 case axis_following_sibling: | |
| 10145 { | |
| 10146 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) | |
| 10147 if (step_push(ns, c, alloc) & once) | |
| 10148 return; | |
| 10149 | |
| 10150 break; | |
| 10151 } | |
| 10152 | |
| 10153 case axis_preceding_sibling: | |
| 10154 { | |
| 10155 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) | |
| 10156 if (step_push(ns, c, alloc) & once) | |
| 10157 return; | |
| 10158 | |
| 10159 break; | |
| 10160 } | |
| 10161 | |
| 10162 case axis_following: | |
| 10163 { | |
| 10164 xml_node_struct* cur = n; | |
| 10165 | |
| 10166 // exit from this node so that we don't include descendants | |
| 10167 while (!cur->next_sibling) | |
| 10168 { | |
| 10169 cur = cur->parent; | |
| 10170 | |
| 10171 if (!cur) return; | |
| 10172 } | |
| 10173 | |
| 10174 cur = cur->next_sibling; | |
| 10175 | |
| 10176 while (cur) | |
| 10177 { | |
| 10178 if (step_push(ns, cur, alloc) & once) | |
| 10179 return; | |
| 10180 | |
| 10181 if (cur->first_child) | |
| 10182 cur = cur->first_child; | |
| 10183 else | |
| 10184 { | |
| 10185 while (!cur->next_sibling) | |
| 10186 { | |
| 10187 cur = cur->parent; | |
| 10188 | |
| 10189 if (!cur) return; | |
| 10190 } | |
| 10191 | |
| 10192 cur = cur->next_sibling; | |
| 10193 } | |
| 10194 } | |
| 10195 | |
| 10196 break; | |
| 10197 } | |
| 10198 | |
| 10199 case axis_preceding: | |
| 10200 { | |
| 10201 xml_node_struct* cur = n; | |
| 10202 | |
| 10203 // exit from this node so that we don't include descendants | |
| 10204 while (!cur->prev_sibling_c->next_sibling) | |
| 10205 { | |
| 10206 cur = cur->parent; | |
| 10207 | |
| 10208 if (!cur) return; | |
| 10209 } | |
| 10210 | |
| 10211 cur = cur->prev_sibling_c; | |
| 10212 | |
| 10213 while (cur) | |
| 10214 { | |
| 10215 if (cur->first_child) | |
| 10216 cur = cur->first_child->prev_sibling_c; | |
| 10217 else | |
| 10218 { | |
| 10219 // leaf node, can't be ancestor | |
| 10220 if (step_push(ns, cur, alloc) & once) | |
| 10221 return; | |
| 10222 | |
| 10223 while (!cur->prev_sibling_c->next_sibling) | |
| 10224 { | |
| 10225 cur = cur->parent; | |
| 10226 | |
| 10227 if (!cur) return; | |
| 10228 | |
| 10229 if (!node_is_ancestor(cur, n)) | |
| 10230 if (step_push(ns, cur, alloc) & once) | |
| 10231 return; | |
| 10232 } | |
| 10233 | |
| 10234 cur = cur->prev_sibling_c; | |
| 10235 } | |
| 10236 } | |
| 10237 | |
| 10238 break; | |
| 10239 } | |
| 10240 | |
| 10241 case axis_ancestor: | |
| 10242 case axis_ancestor_or_self: | |
| 10243 { | |
| 10244 if (axis == axis_ancestor_or_self) | |
| 10245 if (step_push(ns, n, alloc) & once) | |
| 10246 return; | |
| 10247 | |
| 10248 xml_node_struct* cur = n->parent; | |
| 10249 | |
| 10250 while (cur) | |
| 10251 { | |
| 10252 if (step_push(ns, cur, alloc) & once) | |
| 10253 return; | |
| 10254 | |
| 10255 cur = cur->parent; | |
| 10256 } | |
| 10257 | |
| 10258 break; | |
| 10259 } | |
| 10260 | |
| 10261 case axis_self: | |
| 10262 { | |
| 10263 step_push(ns, n, alloc); | |
| 10264 | |
| 10265 break; | |
| 10266 } | |
| 10267 | |
| 10268 case axis_parent: | |
| 10269 { | |
| 10270 if (n->parent) | |
| 10271 step_push(ns, n->parent, alloc); | |
| 10272 | |
| 10273 break; | |
| 10274 } | |
| 10275 | |
| 10276 default: | |
| 10277 assert(false && "Unimplemented axis"); // unreachable | |
| 10278 } | |
| 10279 } | |
| 10280 | |
| 10281 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) | |
| 10282 { | |
| 10283 const axis_t axis = T::axis; | |
| 10284 | |
| 10285 switch (axis) | |
| 10286 { | |
| 10287 case axis_ancestor: | |
| 10288 case axis_ancestor_or_self: | |
| 10289 { | |
| 10290 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test | |
| 10291 if (step_push(ns, a, p, alloc) & once) | |
| 10292 return; | |
| 10293 | |
| 10294 xml_node_struct* cur = p; | |
| 10295 | |
| 10296 while (cur) | |
| 10297 { | |
| 10298 if (step_push(ns, cur, alloc) & once) | |
| 10299 return; | |
| 10300 | |
| 10301 cur = cur->parent; | |
| 10302 } | |
| 10303 | |
| 10304 break; | |
| 10305 } | |
| 10306 | |
| 10307 case axis_descendant_or_self: | |
| 10308 case axis_self: | |
| 10309 { | |
| 10310 if (_test == nodetest_type_node) // reject attributes based on principal node type test | |
| 10311 step_push(ns, a, p, alloc); | |
| 10312 | |
| 10313 break; | |
| 10314 } | |
| 10315 | |
| 10316 case axis_following: | |
| 10317 { | |
| 10318 xml_node_struct* cur = p; | |
| 10319 | |
| 10320 while (cur) | |
| 10321 { | |
| 10322 if (cur->first_child) | |
| 10323 cur = cur->first_child; | |
| 10324 else | |
| 10325 { | |
| 10326 while (!cur->next_sibling) | |
| 10327 { | |
| 10328 cur = cur->parent; | |
| 10329 | |
| 10330 if (!cur) return; | |
| 10331 } | |
| 10332 | |
| 10333 cur = cur->next_sibling; | |
| 10334 } | |
| 10335 | |
| 10336 if (step_push(ns, cur, alloc) & once) | |
| 10337 return; | |
| 10338 } | |
| 10339 | |
| 10340 break; | |
| 10341 } | |
| 10342 | |
| 10343 case axis_parent: | |
| 10344 { | |
| 10345 step_push(ns, p, alloc); | |
| 10346 | |
| 10347 break; | |
| 10348 } | |
| 10349 | |
| 10350 case axis_preceding: | |
| 10351 { | |
| 10352 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding | |
| 10353 step_fill(ns, p, alloc, once, v); | |
| 10354 break; | |
| 10355 } | |
| 10356 | |
| 10357 default: | |
| 10358 assert(false && "Unimplemented axis"); // unreachable | |
| 10359 } | |
| 10360 } | |
| 10361 | |
| 10362 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) | |
| 10363 { | |
| 10364 const axis_t axis = T::axis; | |
| 10365 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); | |
| 10366 | |
| 10367 if (xn.node()) | |
| 10368 step_fill(ns, xn.node().internal_object(), alloc, once, v); | |
| 10369 else if (axis_has_attributes && xn.attribute() && xn.parent()) | |
| 10370 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); | |
| 10371 } | |
| 10372 | |
| 10373 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) | |
| 10374 { | |
| 10375 const axis_t axis = T::axis; | |
| 10376 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); | |
| 10377 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
| 10378 | |
| 10379 bool once = | |
| 10380 (axis == axis_attribute && _test == nodetest_name) || | |
| 10381 (!_right && eval_once(axis_type, eval)) || | |
| 10382 // coverity[mixed_enums] | |
| 10383 (_right && !_right->_next && _right->_test == predicate_constant_one); | |
| 10384 | |
| 10385 xpath_node_set_raw ns; | |
| 10386 ns.set_type(axis_type); | |
| 10387 | |
| 10388 if (_left) | |
| 10389 { | |
| 10390 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); | |
| 10391 | |
| 10392 // self axis preserves the original order | |
| 10393 if (axis == axis_self) ns.set_type(s.type()); | |
| 10394 | |
| 10395 for (const xpath_node* it = s.begin(); it != s.end(); ++it) | |
| 10396 { | |
| 10397 size_t size = ns.size(); | |
| 10398 | |
| 10399 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes | |
| 10400 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); | |
| 10401 | |
| 10402 step_fill(ns, *it, stack.result, once, v); | |
| 10403 if (_right) apply_predicates(ns, size, stack, eval); | |
| 10404 } | |
| 10405 } | |
| 10406 else | |
| 10407 { | |
| 10408 step_fill(ns, c.n, stack.result, once, v); | |
| 10409 if (_right) apply_predicates(ns, 0, stack, eval); | |
| 10410 } | |
| 10411 | |
| 10412 // child, attribute and self axes always generate unique set of nodes | |
| 10413 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice | |
| 10414 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) | |
| 10415 ns.remove_duplicates(stack.temp); | |
| 10416 | |
| 10417 return ns; | |
| 10418 } | |
| 10419 | |
| 10420 public: | |
| 10421 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): | |
| 10422 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
| 10423 { | |
| 10424 assert(type == ast_string_constant); | |
| 10425 _data.string = value; | |
| 10426 } | |
| 10427 | |
| 10428 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): | |
| 10429 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
| 10430 { | |
| 10431 assert(type == ast_number_constant); | |
| 10432 _data.number = value; | |
| 10433 } | |
| 10434 | |
| 10435 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): | |
| 10436 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
| 10437 { | |
| 10438 assert(type == ast_variable); | |
| 10439 _data.variable = value; | |
| 10440 } | |
| 10441 | |
| 10442 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): | |
| 10443 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) | |
| 10444 { | |
| 10445 } | |
| 10446 | |
| 10447 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): | |
| 10448 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) | |
| 10449 { | |
| 10450 assert(type == ast_step); | |
| 10451 _data.nodetest = contents; | |
| 10452 } | |
| 10453 | |
| 10454 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): | |
| 10455 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) | |
| 10456 { | |
| 10457 assert(type == ast_filter || type == ast_predicate); | |
| 10458 } | |
| 10459 | |
| 10460 void set_next(xpath_ast_node* value) | |
| 10461 { | |
| 10462 _next = value; | |
| 10463 } | |
| 10464 | |
| 10465 void set_right(xpath_ast_node* value) | |
| 10466 { | |
| 10467 _right = value; | |
| 10468 } | |
| 10469 | |
| 10470 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) | |
| 10471 { | |
| 10472 switch (_type) | |
| 10473 { | |
| 10474 case ast_op_or: | |
| 10475 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); | |
| 10476 | |
| 10477 case ast_op_and: | |
| 10478 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); | |
| 10479 | |
| 10480 case ast_op_equal: | |
| 10481 return compare_eq(_left, _right, c, stack, equal_to()); | |
| 10482 | |
| 10483 case ast_op_not_equal: | |
| 10484 return compare_eq(_left, _right, c, stack, not_equal_to()); | |
| 10485 | |
| 10486 case ast_op_less: | |
| 10487 return compare_rel(_left, _right, c, stack, less()); | |
| 10488 | |
| 10489 case ast_op_greater: | |
| 10490 return compare_rel(_right, _left, c, stack, less()); | |
| 10491 | |
| 10492 case ast_op_less_or_equal: | |
| 10493 return compare_rel(_left, _right, c, stack, less_equal()); | |
| 10494 | |
| 10495 case ast_op_greater_or_equal: | |
| 10496 return compare_rel(_right, _left, c, stack, less_equal()); | |
| 10497 | |
| 10498 case ast_func_starts_with: | |
| 10499 { | |
| 10500 xpath_allocator_capture cr(stack.result); | |
| 10501 | |
| 10502 xpath_string lr = _left->eval_string(c, stack); | |
| 10503 xpath_string rr = _right->eval_string(c, stack); | |
| 10504 | |
| 10505 return starts_with(lr.c_str(), rr.c_str()); | |
| 10506 } | |
| 10507 | |
| 10508 case ast_func_contains: | |
| 10509 { | |
| 10510 xpath_allocator_capture cr(stack.result); | |
| 10511 | |
| 10512 xpath_string lr = _left->eval_string(c, stack); | |
| 10513 xpath_string rr = _right->eval_string(c, stack); | |
| 10514 | |
| 10515 return find_substring(lr.c_str(), rr.c_str()) != 0; | |
| 10516 } | |
| 10517 | |
| 10518 case ast_func_boolean: | |
| 10519 return _left->eval_boolean(c, stack); | |
| 10520 | |
| 10521 case ast_func_not: | |
| 10522 return !_left->eval_boolean(c, stack); | |
| 10523 | |
| 10524 case ast_func_true: | |
| 10525 return true; | |
| 10526 | |
| 10527 case ast_func_false: | |
| 10528 return false; | |
| 10529 | |
| 10530 case ast_func_lang: | |
| 10531 { | |
| 10532 if (c.n.attribute()) return false; | |
| 10533 | |
| 10534 xpath_allocator_capture cr(stack.result); | |
| 10535 | |
| 10536 xpath_string lang = _left->eval_string(c, stack); | |
| 10537 | |
| 10538 for (xml_node n = c.n.node(); n; n = n.parent()) | |
| 10539 { | |
| 10540 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); | |
| 10541 | |
| 10542 if (a) | |
| 10543 { | |
| 10544 const char_t* value = a.value(); | |
| 10545 | |
| 10546 // strnicmp / strncasecmp is not portable | |
| 10547 for (const char_t* lit = lang.c_str(); *lit; ++lit) | |
| 10548 { | |
| 10549 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; | |
| 10550 ++value; | |
| 10551 } | |
| 10552 | |
| 10553 return *value == 0 || *value == '-'; | |
| 10554 } | |
| 10555 } | |
| 10556 | |
| 10557 return false; | |
| 10558 } | |
| 10559 | |
| 10560 case ast_opt_compare_attribute: | |
| 10561 { | |
| 10562 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); | |
| 10563 | |
| 10564 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); | |
| 10565 | |
| 10566 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); | |
| 10567 } | |
| 10568 | |
| 10569 case ast_variable: | |
| 10570 { | |
| 10571 assert(_rettype == _data.variable->type()); | |
| 10572 | |
| 10573 if (_rettype == xpath_type_boolean) | |
| 10574 return _data.variable->get_boolean(); | |
| 10575 | |
| 10576 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
| 10577 break; | |
| 10578 } | |
| 10579 | |
| 10580 default: | |
| 10581 ; | |
| 10582 } | |
| 10583 | |
| 10584 // none of the ast types that return the value directly matched, we need to perform type conversion | |
| 10585 switch (_rettype) | |
| 10586 { | |
| 10587 case xpath_type_number: | |
| 10588 return convert_number_to_boolean(eval_number(c, stack)); | |
| 10589 | |
| 10590 case xpath_type_string: | |
| 10591 { | |
| 10592 xpath_allocator_capture cr(stack.result); | |
| 10593 | |
| 10594 return !eval_string(c, stack).empty(); | |
| 10595 } | |
| 10596 | |
| 10597 case xpath_type_node_set: | |
| 10598 { | |
| 10599 xpath_allocator_capture cr(stack.result); | |
| 10600 | |
| 10601 return !eval_node_set(c, stack, nodeset_eval_any).empty(); | |
| 10602 } | |
| 10603 | |
| 10604 default: | |
| 10605 assert(false && "Wrong expression for return type boolean"); // unreachable | |
| 10606 return false; | |
| 10607 } | |
| 10608 } | |
| 10609 | |
| 10610 double eval_number(const xpath_context& c, const xpath_stack& stack) | |
| 10611 { | |
| 10612 switch (_type) | |
| 10613 { | |
| 10614 case ast_op_add: | |
| 10615 return _left->eval_number(c, stack) + _right->eval_number(c, stack); | |
| 10616 | |
| 10617 case ast_op_subtract: | |
| 10618 return _left->eval_number(c, stack) - _right->eval_number(c, stack); | |
| 10619 | |
| 10620 case ast_op_multiply: | |
| 10621 return _left->eval_number(c, stack) * _right->eval_number(c, stack); | |
| 10622 | |
| 10623 case ast_op_divide: | |
| 10624 return _left->eval_number(c, stack) / _right->eval_number(c, stack); | |
| 10625 | |
| 10626 case ast_op_mod: | |
| 10627 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); | |
| 10628 | |
| 10629 case ast_op_negate: | |
| 10630 return -_left->eval_number(c, stack); | |
| 10631 | |
| 10632 case ast_number_constant: | |
| 10633 return _data.number; | |
| 10634 | |
| 10635 case ast_func_last: | |
| 10636 return static_cast<double>(c.size); | |
| 10637 | |
| 10638 case ast_func_position: | |
| 10639 return static_cast<double>(c.position); | |
| 10640 | |
| 10641 case ast_func_count: | |
| 10642 { | |
| 10643 xpath_allocator_capture cr(stack.result); | |
| 10644 | |
| 10645 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); | |
| 10646 } | |
| 10647 | |
| 10648 case ast_func_string_length_0: | |
| 10649 { | |
| 10650 xpath_allocator_capture cr(stack.result); | |
| 10651 | |
| 10652 return static_cast<double>(string_value(c.n, stack.result).length()); | |
| 10653 } | |
| 10654 | |
| 10655 case ast_func_string_length_1: | |
| 10656 { | |
| 10657 xpath_allocator_capture cr(stack.result); | |
| 10658 | |
| 10659 return static_cast<double>(_left->eval_string(c, stack).length()); | |
| 10660 } | |
| 10661 | |
| 10662 case ast_func_number_0: | |
| 10663 { | |
| 10664 xpath_allocator_capture cr(stack.result); | |
| 10665 | |
| 10666 return convert_string_to_number(string_value(c.n, stack.result).c_str()); | |
| 10667 } | |
| 10668 | |
| 10669 case ast_func_number_1: | |
| 10670 return _left->eval_number(c, stack); | |
| 10671 | |
| 10672 case ast_func_sum: | |
| 10673 { | |
| 10674 xpath_allocator_capture cr(stack.result); | |
| 10675 | |
| 10676 double r = 0; | |
| 10677 | |
| 10678 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); | |
| 10679 | |
| 10680 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) | |
| 10681 { | |
| 10682 xpath_allocator_capture cri(stack.result); | |
| 10683 | |
| 10684 r += convert_string_to_number(string_value(*it, stack.result).c_str()); | |
| 10685 } | |
| 10686 | |
| 10687 return r; | |
| 10688 } | |
| 10689 | |
| 10690 case ast_func_floor: | |
| 10691 { | |
| 10692 double r = _left->eval_number(c, stack); | |
| 10693 | |
| 10694 return r == r ? floor(r) : r; | |
| 10695 } | |
| 10696 | |
| 10697 case ast_func_ceiling: | |
| 10698 { | |
| 10699 double r = _left->eval_number(c, stack); | |
| 10700 | |
| 10701 return r == r ? ceil(r) : r; | |
| 10702 } | |
| 10703 | |
| 10704 case ast_func_round: | |
| 10705 return round_nearest_nzero(_left->eval_number(c, stack)); | |
| 10706 | |
| 10707 case ast_variable: | |
| 10708 { | |
| 10709 assert(_rettype == _data.variable->type()); | |
| 10710 | |
| 10711 if (_rettype == xpath_type_number) | |
| 10712 return _data.variable->get_number(); | |
| 10713 | |
| 10714 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
| 10715 break; | |
| 10716 } | |
| 10717 | |
| 10718 default: | |
| 10719 ; | |
| 10720 } | |
| 10721 | |
| 10722 // none of the ast types that return the value directly matched, we need to perform type conversion | |
| 10723 switch (_rettype) | |
| 10724 { | |
| 10725 case xpath_type_boolean: | |
| 10726 return eval_boolean(c, stack) ? 1 : 0; | |
| 10727 | |
| 10728 case xpath_type_string: | |
| 10729 { | |
| 10730 xpath_allocator_capture cr(stack.result); | |
| 10731 | |
| 10732 return convert_string_to_number(eval_string(c, stack).c_str()); | |
| 10733 } | |
| 10734 | |
| 10735 case xpath_type_node_set: | |
| 10736 { | |
| 10737 xpath_allocator_capture cr(stack.result); | |
| 10738 | |
| 10739 return convert_string_to_number(eval_string(c, stack).c_str()); | |
| 10740 } | |
| 10741 | |
| 10742 default: | |
| 10743 assert(false && "Wrong expression for return type number"); // unreachable | |
| 10744 return 0; | |
| 10745 } | |
| 10746 } | |
| 10747 | |
| 10748 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) | |
| 10749 { | |
| 10750 assert(_type == ast_func_concat); | |
| 10751 | |
| 10752 xpath_allocator_capture ct(stack.temp); | |
| 10753 | |
| 10754 // count the string number | |
| 10755 size_t count = 1; | |
| 10756 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; | |
| 10757 | |
| 10758 // allocate a buffer for temporary string objects | |
| 10759 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); | |
| 10760 if (!buffer) return xpath_string(); | |
| 10761 | |
| 10762 // evaluate all strings to temporary stack | |
| 10763 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10764 | |
| 10765 buffer[0] = _left->eval_string(c, swapped_stack); | |
| 10766 | |
| 10767 size_t pos = 1; | |
| 10768 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); | |
| 10769 assert(pos == count); | |
| 10770 | |
| 10771 // get total length | |
| 10772 size_t length = 0; | |
| 10773 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); | |
| 10774 | |
| 10775 // create final string | |
| 10776 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); | |
| 10777 if (!result) return xpath_string(); | |
| 10778 | |
| 10779 char_t* ri = result; | |
| 10780 | |
| 10781 for (size_t j = 0; j < count; ++j) | |
| 10782 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) | |
| 10783 *ri++ = *bi; | |
| 10784 | |
| 10785 *ri = 0; | |
| 10786 | |
| 10787 return xpath_string::from_heap_preallocated(result, ri); | |
| 10788 } | |
| 10789 | |
| 10790 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) | |
| 10791 { | |
| 10792 switch (_type) | |
| 10793 { | |
| 10794 case ast_string_constant: | |
| 10795 return xpath_string::from_const(_data.string); | |
| 10796 | |
| 10797 case ast_func_local_name_0: | |
| 10798 { | |
| 10799 xpath_node na = c.n; | |
| 10800 | |
| 10801 return xpath_string::from_const(local_name(na)); | |
| 10802 } | |
| 10803 | |
| 10804 case ast_func_local_name_1: | |
| 10805 { | |
| 10806 xpath_allocator_capture cr(stack.result); | |
| 10807 | |
| 10808 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
| 10809 xpath_node na = ns.first(); | |
| 10810 | |
| 10811 return xpath_string::from_const(local_name(na)); | |
| 10812 } | |
| 10813 | |
| 10814 case ast_func_name_0: | |
| 10815 { | |
| 10816 xpath_node na = c.n; | |
| 10817 | |
| 10818 return xpath_string::from_const(qualified_name(na)); | |
| 10819 } | |
| 10820 | |
| 10821 case ast_func_name_1: | |
| 10822 { | |
| 10823 xpath_allocator_capture cr(stack.result); | |
| 10824 | |
| 10825 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
| 10826 xpath_node na = ns.first(); | |
| 10827 | |
| 10828 return xpath_string::from_const(qualified_name(na)); | |
| 10829 } | |
| 10830 | |
| 10831 case ast_func_namespace_uri_0: | |
| 10832 { | |
| 10833 xpath_node na = c.n; | |
| 10834 | |
| 10835 return xpath_string::from_const(namespace_uri(na)); | |
| 10836 } | |
| 10837 | |
| 10838 case ast_func_namespace_uri_1: | |
| 10839 { | |
| 10840 xpath_allocator_capture cr(stack.result); | |
| 10841 | |
| 10842 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
| 10843 xpath_node na = ns.first(); | |
| 10844 | |
| 10845 return xpath_string::from_const(namespace_uri(na)); | |
| 10846 } | |
| 10847 | |
| 10848 case ast_func_string_0: | |
| 10849 return string_value(c.n, stack.result); | |
| 10850 | |
| 10851 case ast_func_string_1: | |
| 10852 return _left->eval_string(c, stack); | |
| 10853 | |
| 10854 case ast_func_concat: | |
| 10855 return eval_string_concat(c, stack); | |
| 10856 | |
| 10857 case ast_func_substring_before: | |
| 10858 { | |
| 10859 xpath_allocator_capture cr(stack.temp); | |
| 10860 | |
| 10861 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10862 | |
| 10863 xpath_string s = _left->eval_string(c, swapped_stack); | |
| 10864 xpath_string p = _right->eval_string(c, swapped_stack); | |
| 10865 | |
| 10866 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
| 10867 | |
| 10868 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); | |
| 10869 } | |
| 10870 | |
| 10871 case ast_func_substring_after: | |
| 10872 { | |
| 10873 xpath_allocator_capture cr(stack.temp); | |
| 10874 | |
| 10875 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10876 | |
| 10877 xpath_string s = _left->eval_string(c, swapped_stack); | |
| 10878 xpath_string p = _right->eval_string(c, swapped_stack); | |
| 10879 | |
| 10880 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
| 10881 if (!pos) return xpath_string(); | |
| 10882 | |
| 10883 const char_t* rbegin = pos + p.length(); | |
| 10884 const char_t* rend = s.c_str() + s.length(); | |
| 10885 | |
| 10886 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
| 10887 } | |
| 10888 | |
| 10889 case ast_func_substring_2: | |
| 10890 { | |
| 10891 xpath_allocator_capture cr(stack.temp); | |
| 10892 | |
| 10893 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10894 | |
| 10895 xpath_string s = _left->eval_string(c, swapped_stack); | |
| 10896 size_t s_length = s.length(); | |
| 10897 | |
| 10898 double first = round_nearest(_right->eval_number(c, stack)); | |
| 10899 | |
| 10900 if (is_nan(first)) return xpath_string(); // NaN | |
| 10901 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
| 10902 | |
| 10903 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
| 10904 assert(1 <= pos && pos <= s_length + 1); | |
| 10905 | |
| 10906 const char_t* rbegin = s.c_str() + (pos - 1); | |
| 10907 const char_t* rend = s.c_str() + s.length(); | |
| 10908 | |
| 10909 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
| 10910 } | |
| 10911 | |
| 10912 case ast_func_substring_3: | |
| 10913 { | |
| 10914 xpath_allocator_capture cr(stack.temp); | |
| 10915 | |
| 10916 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10917 | |
| 10918 xpath_string s = _left->eval_string(c, swapped_stack); | |
| 10919 size_t s_length = s.length(); | |
| 10920 | |
| 10921 double first = round_nearest(_right->eval_number(c, stack)); | |
| 10922 double last = first + round_nearest(_right->_next->eval_number(c, stack)); | |
| 10923 | |
| 10924 if (is_nan(first) || is_nan(last)) return xpath_string(); | |
| 10925 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
| 10926 else if (first >= last) return xpath_string(); | |
| 10927 else if (last < 1) return xpath_string(); | |
| 10928 | |
| 10929 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
| 10930 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last); | |
| 10931 | |
| 10932 assert(1 <= pos && pos <= end && end <= s_length + 1); | |
| 10933 const char_t* rbegin = s.c_str() + (pos - 1); | |
| 10934 const char_t* rend = s.c_str() + (end - 1); | |
| 10935 | |
| 10936 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); | |
| 10937 } | |
| 10938 | |
| 10939 case ast_func_normalize_space_0: | |
| 10940 { | |
| 10941 xpath_string s = string_value(c.n, stack.result); | |
| 10942 | |
| 10943 char_t* begin = s.data(stack.result); | |
| 10944 if (!begin) return xpath_string(); | |
| 10945 | |
| 10946 char_t* end = normalize_space(begin); | |
| 10947 | |
| 10948 return xpath_string::from_heap_preallocated(begin, end); | |
| 10949 } | |
| 10950 | |
| 10951 case ast_func_normalize_space_1: | |
| 10952 { | |
| 10953 xpath_string s = _left->eval_string(c, stack); | |
| 10954 | |
| 10955 char_t* begin = s.data(stack.result); | |
| 10956 if (!begin) return xpath_string(); | |
| 10957 | |
| 10958 char_t* end = normalize_space(begin); | |
| 10959 | |
| 10960 return xpath_string::from_heap_preallocated(begin, end); | |
| 10961 } | |
| 10962 | |
| 10963 case ast_func_translate: | |
| 10964 { | |
| 10965 xpath_allocator_capture cr(stack.temp); | |
| 10966 | |
| 10967 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 10968 | |
| 10969 xpath_string s = _left->eval_string(c, stack); | |
| 10970 xpath_string from = _right->eval_string(c, swapped_stack); | |
| 10971 xpath_string to = _right->_next->eval_string(c, swapped_stack); | |
| 10972 | |
| 10973 char_t* begin = s.data(stack.result); | |
| 10974 if (!begin) return xpath_string(); | |
| 10975 | |
| 10976 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); | |
| 10977 | |
| 10978 return xpath_string::from_heap_preallocated(begin, end); | |
| 10979 } | |
| 10980 | |
| 10981 case ast_opt_translate_table: | |
| 10982 { | |
| 10983 xpath_string s = _left->eval_string(c, stack); | |
| 10984 | |
| 10985 char_t* begin = s.data(stack.result); | |
| 10986 if (!begin) return xpath_string(); | |
| 10987 | |
| 10988 char_t* end = translate_table(begin, _data.table); | |
| 10989 | |
| 10990 return xpath_string::from_heap_preallocated(begin, end); | |
| 10991 } | |
| 10992 | |
| 10993 case ast_variable: | |
| 10994 { | |
| 10995 assert(_rettype == _data.variable->type()); | |
| 10996 | |
| 10997 if (_rettype == xpath_type_string) | |
| 10998 return xpath_string::from_const(_data.variable->get_string()); | |
| 10999 | |
| 11000 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
| 11001 break; | |
| 11002 } | |
| 11003 | |
| 11004 default: | |
| 11005 ; | |
| 11006 } | |
| 11007 | |
| 11008 // none of the ast types that return the value directly matched, we need to perform type conversion | |
| 11009 switch (_rettype) | |
| 11010 { | |
| 11011 case xpath_type_boolean: | |
| 11012 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); | |
| 11013 | |
| 11014 case xpath_type_number: | |
| 11015 return convert_number_to_string(eval_number(c, stack), stack.result); | |
| 11016 | |
| 11017 case xpath_type_node_set: | |
| 11018 { | |
| 11019 xpath_allocator_capture cr(stack.temp); | |
| 11020 | |
| 11021 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 11022 | |
| 11023 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); | |
| 11024 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); | |
| 11025 } | |
| 11026 | |
| 11027 default: | |
| 11028 assert(false && "Wrong expression for return type string"); // unreachable | |
| 11029 return xpath_string(); | |
| 11030 } | |
| 11031 } | |
| 11032 | |
| 11033 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) | |
| 11034 { | |
| 11035 switch (_type) | |
| 11036 { | |
| 11037 case ast_op_union: | |
| 11038 { | |
| 11039 xpath_allocator_capture cr(stack.temp); | |
| 11040 | |
| 11041 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
| 11042 | |
| 11043 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval); | |
| 11044 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval); | |
| 11045 | |
| 11046 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother | |
| 11047 ls.set_type(xpath_node_set::type_unsorted); | |
| 11048 | |
| 11049 ls.append(rs.begin(), rs.end(), stack.result); | |
| 11050 ls.remove_duplicates(stack.temp); | |
| 11051 | |
| 11052 return ls; | |
| 11053 } | |
| 11054 | |
| 11055 case ast_filter: | |
| 11056 { | |
| 11057 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); | |
| 11058 | |
| 11059 // either expression is a number or it contains position() call; sort by document order | |
| 11060 if (_test != predicate_posinv) set.sort_do(); | |
| 11061 | |
| 11062 bool once = eval_once(set.type(), eval); | |
| 11063 | |
| 11064 apply_predicate(set, 0, stack, once); | |
| 11065 | |
| 11066 return set; | |
| 11067 } | |
| 11068 | |
| 11069 case ast_func_id: | |
| 11070 return xpath_node_set_raw(); | |
| 11071 | |
| 11072 case ast_step: | |
| 11073 { | |
| 11074 switch (_axis) | |
| 11075 { | |
| 11076 case axis_ancestor: | |
| 11077 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); | |
| 11078 | |
| 11079 case axis_ancestor_or_self: | |
| 11080 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); | |
| 11081 | |
| 11082 case axis_attribute: | |
| 11083 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); | |
| 11084 | |
| 11085 case axis_child: | |
| 11086 return step_do(c, stack, eval, axis_to_type<axis_child>()); | |
| 11087 | |
| 11088 case axis_descendant: | |
| 11089 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); | |
| 11090 | |
| 11091 case axis_descendant_or_self: | |
| 11092 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); | |
| 11093 | |
| 11094 case axis_following: | |
| 11095 return step_do(c, stack, eval, axis_to_type<axis_following>()); | |
| 11096 | |
| 11097 case axis_following_sibling: | |
| 11098 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); | |
| 11099 | |
| 11100 case axis_namespace: | |
| 11101 // namespaced axis is not supported | |
| 11102 return xpath_node_set_raw(); | |
| 11103 | |
| 11104 case axis_parent: | |
| 11105 return step_do(c, stack, eval, axis_to_type<axis_parent>()); | |
| 11106 | |
| 11107 case axis_preceding: | |
| 11108 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); | |
| 11109 | |
| 11110 case axis_preceding_sibling: | |
| 11111 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); | |
| 11112 | |
| 11113 case axis_self: | |
| 11114 return step_do(c, stack, eval, axis_to_type<axis_self>()); | |
| 11115 | |
| 11116 default: | |
| 11117 assert(false && "Unknown axis"); // unreachable | |
| 11118 return xpath_node_set_raw(); | |
| 11119 } | |
| 11120 } | |
| 11121 | |
| 11122 case ast_step_root: | |
| 11123 { | |
| 11124 assert(!_right); // root step can't have any predicates | |
| 11125 | |
| 11126 xpath_node_set_raw ns; | |
| 11127 | |
| 11128 ns.set_type(xpath_node_set::type_sorted); | |
| 11129 | |
| 11130 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); | |
| 11131 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); | |
| 11132 | |
| 11133 return ns; | |
| 11134 } | |
| 11135 | |
| 11136 case ast_variable: | |
| 11137 { | |
| 11138 assert(_rettype == _data.variable->type()); | |
| 11139 | |
| 11140 if (_rettype == xpath_type_node_set) | |
| 11141 { | |
| 11142 const xpath_node_set& s = _data.variable->get_node_set(); | |
| 11143 | |
| 11144 xpath_node_set_raw ns; | |
| 11145 | |
| 11146 ns.set_type(s.type()); | |
| 11147 ns.append(s.begin(), s.end(), stack.result); | |
| 11148 | |
| 11149 return ns; | |
| 11150 } | |
| 11151 | |
| 11152 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
| 11153 break; | |
| 11154 } | |
| 11155 | |
| 11156 default: | |
| 11157 ; | |
| 11158 } | |
| 11159 | |
| 11160 // none of the ast types that return the value directly matched, but conversions to node set are invalid | |
| 11161 assert(false && "Wrong expression for return type node set"); // unreachable | |
| 11162 return xpath_node_set_raw(); | |
| 11163 } | |
| 11164 | |
| 11165 void optimize(xpath_allocator* alloc) | |
| 11166 { | |
| 11167 if (_left) | |
| 11168 _left->optimize(alloc); | |
| 11169 | |
| 11170 if (_right) | |
| 11171 _right->optimize(alloc); | |
| 11172 | |
| 11173 if (_next) | |
| 11174 _next->optimize(alloc); | |
| 11175 | |
| 11176 // coverity[var_deref_model] | |
| 11177 optimize_self(alloc); | |
| 11178 } | |
| 11179 | |
| 11180 void optimize_self(xpath_allocator* alloc) | |
| 11181 { | |
| 11182 // Rewrite [position()=expr] with [expr] | |
| 11183 // Note that this step has to go before classification to recognize [position()=1] | |
| 11184 if ((_type == ast_filter || _type == ast_predicate) && | |
| 11185 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
| 11186 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) | |
| 11187 { | |
| 11188 _right = _right->_right; | |
| 11189 } | |
| 11190 | |
| 11191 // Classify filter/predicate ops to perform various optimizations during evaluation | |
| 11192 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
| 11193 { | |
| 11194 assert(_test == predicate_default); | |
| 11195 | |
| 11196 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) | |
| 11197 _test = predicate_constant_one; | |
| 11198 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) | |
| 11199 _test = predicate_constant; | |
| 11200 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) | |
| 11201 _test = predicate_posinv; | |
| 11202 } | |
| 11203 | |
| 11204 // Rewrite descendant-or-self::node()/child::foo with descendant::foo | |
| 11205 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately | |
| 11206 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes | |
| 11207 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) | |
| 11208 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && | |
| 11209 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && | |
| 11210 is_posinv_step()) | |
| 11211 { | |
| 11212 if (_axis == axis_child || _axis == axis_descendant) | |
| 11213 _axis = axis_descendant; | |
| 11214 else | |
| 11215 _axis = axis_descendant_or_self; | |
| 11216 | |
| 11217 _left = _left->_left; | |
| 11218 } | |
| 11219 | |
| 11220 // Use optimized lookup table implementation for translate() with constant arguments | |
| 11221 if (_type == ast_func_translate && | |
| 11222 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate) | |
| 11223 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) | |
| 11224 { | |
| 11225 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); | |
| 11226 | |
| 11227 if (table) | |
| 11228 { | |
| 11229 _type = ast_opt_translate_table; | |
| 11230 _data.table = table; | |
| 11231 } | |
| 11232 } | |
| 11233 | |
| 11234 // Use optimized path for @attr = 'value' or @attr = $value | |
| 11235 if (_type == ast_op_equal && | |
| 11236 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal) | |
| 11237 // coverity[mixed_enums] | |
| 11238 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && | |
| 11239 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) | |
| 11240 { | |
| 11241 _type = ast_opt_compare_attribute; | |
| 11242 } | |
| 11243 } | |
| 11244 | |
| 11245 bool is_posinv_expr() const | |
| 11246 { | |
| 11247 switch (_type) | |
| 11248 { | |
| 11249 case ast_func_position: | |
| 11250 case ast_func_last: | |
| 11251 return false; | |
| 11252 | |
| 11253 case ast_string_constant: | |
| 11254 case ast_number_constant: | |
| 11255 case ast_variable: | |
| 11256 return true; | |
| 11257 | |
| 11258 case ast_step: | |
| 11259 case ast_step_root: | |
| 11260 return true; | |
| 11261 | |
| 11262 case ast_predicate: | |
| 11263 case ast_filter: | |
| 11264 return true; | |
| 11265 | |
| 11266 default: | |
| 11267 if (_left && !_left->is_posinv_expr()) return false; | |
| 11268 | |
| 11269 for (xpath_ast_node* n = _right; n; n = n->_next) | |
| 11270 if (!n->is_posinv_expr()) return false; | |
| 11271 | |
| 11272 return true; | |
| 11273 } | |
| 11274 } | |
| 11275 | |
| 11276 bool is_posinv_step() const | |
| 11277 { | |
| 11278 assert(_type == ast_step); | |
| 11279 | |
| 11280 for (xpath_ast_node* n = _right; n; n = n->_next) | |
| 11281 { | |
| 11282 assert(n->_type == ast_predicate); | |
| 11283 | |
| 11284 if (n->_test != predicate_posinv) | |
| 11285 return false; | |
| 11286 } | |
| 11287 | |
| 11288 return true; | |
| 11289 } | |
| 11290 | |
| 11291 xpath_value_type rettype() const | |
| 11292 { | |
| 11293 return static_cast<xpath_value_type>(_rettype); | |
| 11294 } | |
| 11295 }; | |
| 11296 | |
| 11297 static const size_t xpath_ast_depth_limit = | |
| 11298 #ifdef PUGIXML_XPATH_DEPTH_LIMIT | |
| 11299 PUGIXML_XPATH_DEPTH_LIMIT | |
| 11300 #else | |
| 11301 1024 | |
| 11302 #endif | |
| 11303 ; | |
| 11304 | |
| 11305 struct xpath_parser | |
| 11306 { | |
| 11307 xpath_allocator* _alloc; | |
| 11308 xpath_lexer _lexer; | |
| 11309 | |
| 11310 const char_t* _query; | |
| 11311 xpath_variable_set* _variables; | |
| 11312 | |
| 11313 xpath_parse_result* _result; | |
| 11314 | |
| 11315 char_t _scratch[32]; | |
| 11316 | |
| 11317 size_t _depth; | |
| 11318 | |
| 11319 xpath_ast_node* error(const char* message) | |
| 11320 { | |
| 11321 _result->error = message; | |
| 11322 _result->offset = _lexer.current_pos() - _query; | |
| 11323 | |
| 11324 return 0; | |
| 11325 } | |
| 11326 | |
| 11327 xpath_ast_node* error_oom() | |
| 11328 { | |
| 11329 assert(_alloc->_error); | |
| 11330 *_alloc->_error = true; | |
| 11331 | |
| 11332 return 0; | |
| 11333 } | |
| 11334 | |
| 11335 xpath_ast_node* error_rec() | |
| 11336 { | |
| 11337 return error("Exceeded maximum allowed query depth"); | |
| 11338 } | |
| 11339 | |
| 11340 void* alloc_node() | |
| 11341 { | |
| 11342 return _alloc->allocate(sizeof(xpath_ast_node)); | |
| 11343 } | |
| 11344 | |
| 11345 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) | |
| 11346 { | |
| 11347 void* memory = alloc_node(); | |
| 11348 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
| 11349 } | |
| 11350 | |
| 11351 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) | |
| 11352 { | |
| 11353 void* memory = alloc_node(); | |
| 11354 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
| 11355 } | |
| 11356 | |
| 11357 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) | |
| 11358 { | |
| 11359 void* memory = alloc_node(); | |
| 11360 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
| 11361 } | |
| 11362 | |
| 11363 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) | |
| 11364 { | |
| 11365 void* memory = alloc_node(); | |
| 11366 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; | |
| 11367 } | |
| 11368 | |
| 11369 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) | |
| 11370 { | |
| 11371 void* memory = alloc_node(); | |
| 11372 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; | |
| 11373 } | |
| 11374 | |
| 11375 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) | |
| 11376 { | |
| 11377 void* memory = alloc_node(); | |
| 11378 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; | |
| 11379 } | |
| 11380 | |
| 11381 const char_t* alloc_string(const xpath_lexer_string& value) | |
| 11382 { | |
| 11383 if (!value.begin) | |
| 11384 return PUGIXML_TEXT(""); | |
| 11385 | |
| 11386 size_t length = static_cast<size_t>(value.end - value.begin); | |
| 11387 | |
| 11388 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); | |
| 11389 if (!c) return 0; | |
| 11390 | |
| 11391 memcpy(c, value.begin, length * sizeof(char_t)); | |
| 11392 c[length] = 0; | |
| 11393 | |
| 11394 return c; | |
| 11395 } | |
| 11396 | |
| 11397 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) | |
| 11398 { | |
| 11399 switch (name.begin[0]) | |
| 11400 { | |
| 11401 case 'b': | |
| 11402 if (name == PUGIXML_TEXT("boolean") && argc == 1) | |
| 11403 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); | |
| 11404 | |
| 11405 break; | |
| 11406 | |
| 11407 case 'c': | |
| 11408 if (name == PUGIXML_TEXT("count") && argc == 1) | |
| 11409 { | |
| 11410 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
| 11411 return alloc_node(ast_func_count, xpath_type_number, args[0]); | |
| 11412 } | |
| 11413 else if (name == PUGIXML_TEXT("contains") && argc == 2) | |
| 11414 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); | |
| 11415 else if (name == PUGIXML_TEXT("concat") && argc >= 2) | |
| 11416 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); | |
| 11417 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) | |
| 11418 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); | |
| 11419 | |
| 11420 break; | |
| 11421 | |
| 11422 case 'f': | |
| 11423 if (name == PUGIXML_TEXT("false") && argc == 0) | |
| 11424 return alloc_node(ast_func_false, xpath_type_boolean); | |
| 11425 else if (name == PUGIXML_TEXT("floor") && argc == 1) | |
| 11426 return alloc_node(ast_func_floor, xpath_type_number, args[0]); | |
| 11427 | |
| 11428 break; | |
| 11429 | |
| 11430 case 'i': | |
| 11431 if (name == PUGIXML_TEXT("id") && argc == 1) | |
| 11432 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); | |
| 11433 | |
| 11434 break; | |
| 11435 | |
| 11436 case 'l': | |
| 11437 if (name == PUGIXML_TEXT("last") && argc == 0) | |
| 11438 return alloc_node(ast_func_last, xpath_type_number); | |
| 11439 else if (name == PUGIXML_TEXT("lang") && argc == 1) | |
| 11440 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); | |
| 11441 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) | |
| 11442 { | |
| 11443 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
| 11444 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); | |
| 11445 } | |
| 11446 | |
| 11447 break; | |
| 11448 | |
| 11449 case 'n': | |
| 11450 if (name == PUGIXML_TEXT("name") && argc <= 1) | |
| 11451 { | |
| 11452 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
| 11453 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); | |
| 11454 } | |
| 11455 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) | |
| 11456 { | |
| 11457 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
| 11458 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); | |
| 11459 } | |
| 11460 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) | |
| 11461 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); | |
| 11462 else if (name == PUGIXML_TEXT("not") && argc == 1) | |
| 11463 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); | |
| 11464 else if (name == PUGIXML_TEXT("number") && argc <= 1) | |
| 11465 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); | |
| 11466 | |
| 11467 break; | |
| 11468 | |
| 11469 case 'p': | |
| 11470 if (name == PUGIXML_TEXT("position") && argc == 0) | |
| 11471 return alloc_node(ast_func_position, xpath_type_number); | |
| 11472 | |
| 11473 break; | |
| 11474 | |
| 11475 case 'r': | |
| 11476 if (name == PUGIXML_TEXT("round") && argc == 1) | |
| 11477 return alloc_node(ast_func_round, xpath_type_number, args[0]); | |
| 11478 | |
| 11479 break; | |
| 11480 | |
| 11481 case 's': | |
| 11482 if (name == PUGIXML_TEXT("string") && argc <= 1) | |
| 11483 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); | |
| 11484 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) | |
| 11485 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); | |
| 11486 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) | |
| 11487 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); | |
| 11488 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) | |
| 11489 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); | |
| 11490 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) | |
| 11491 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); | |
| 11492 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) | |
| 11493 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); | |
| 11494 else if (name == PUGIXML_TEXT("sum") && argc == 1) | |
| 11495 { | |
| 11496 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
| 11497 return alloc_node(ast_func_sum, xpath_type_number, args[0]); | |
| 11498 } | |
| 11499 | |
| 11500 break; | |
| 11501 | |
| 11502 case 't': | |
| 11503 if (name == PUGIXML_TEXT("translate") && argc == 3) | |
| 11504 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); | |
| 11505 else if (name == PUGIXML_TEXT("true") && argc == 0) | |
| 11506 return alloc_node(ast_func_true, xpath_type_boolean); | |
| 11507 | |
| 11508 break; | |
| 11509 | |
| 11510 default: | |
| 11511 break; | |
| 11512 } | |
| 11513 | |
| 11514 return error("Unrecognized function or wrong parameter count"); | |
| 11515 } | |
| 11516 | |
| 11517 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) | |
| 11518 { | |
| 11519 specified = true; | |
| 11520 | |
| 11521 switch (name.begin[0]) | |
| 11522 { | |
| 11523 case 'a': | |
| 11524 if (name == PUGIXML_TEXT("ancestor")) | |
| 11525 return axis_ancestor; | |
| 11526 else if (name == PUGIXML_TEXT("ancestor-or-self")) | |
| 11527 return axis_ancestor_or_self; | |
| 11528 else if (name == PUGIXML_TEXT("attribute")) | |
| 11529 return axis_attribute; | |
| 11530 | |
| 11531 break; | |
| 11532 | |
| 11533 case 'c': | |
| 11534 if (name == PUGIXML_TEXT("child")) | |
| 11535 return axis_child; | |
| 11536 | |
| 11537 break; | |
| 11538 | |
| 11539 case 'd': | |
| 11540 if (name == PUGIXML_TEXT("descendant")) | |
| 11541 return axis_descendant; | |
| 11542 else if (name == PUGIXML_TEXT("descendant-or-self")) | |
| 11543 return axis_descendant_or_self; | |
| 11544 | |
| 11545 break; | |
| 11546 | |
| 11547 case 'f': | |
| 11548 if (name == PUGIXML_TEXT("following")) | |
| 11549 return axis_following; | |
| 11550 else if (name == PUGIXML_TEXT("following-sibling")) | |
| 11551 return axis_following_sibling; | |
| 11552 | |
| 11553 break; | |
| 11554 | |
| 11555 case 'n': | |
| 11556 if (name == PUGIXML_TEXT("namespace")) | |
| 11557 return axis_namespace; | |
| 11558 | |
| 11559 break; | |
| 11560 | |
| 11561 case 'p': | |
| 11562 if (name == PUGIXML_TEXT("parent")) | |
| 11563 return axis_parent; | |
| 11564 else if (name == PUGIXML_TEXT("preceding")) | |
| 11565 return axis_preceding; | |
| 11566 else if (name == PUGIXML_TEXT("preceding-sibling")) | |
| 11567 return axis_preceding_sibling; | |
| 11568 | |
| 11569 break; | |
| 11570 | |
| 11571 case 's': | |
| 11572 if (name == PUGIXML_TEXT("self")) | |
| 11573 return axis_self; | |
| 11574 | |
| 11575 break; | |
| 11576 | |
| 11577 default: | |
| 11578 break; | |
| 11579 } | |
| 11580 | |
| 11581 specified = false; | |
| 11582 return axis_child; | |
| 11583 } | |
| 11584 | |
| 11585 nodetest_t parse_node_test_type(const xpath_lexer_string& name) | |
| 11586 { | |
| 11587 switch (name.begin[0]) | |
| 11588 { | |
| 11589 case 'c': | |
| 11590 if (name == PUGIXML_TEXT("comment")) | |
| 11591 return nodetest_type_comment; | |
| 11592 | |
| 11593 break; | |
| 11594 | |
| 11595 case 'n': | |
| 11596 if (name == PUGIXML_TEXT("node")) | |
| 11597 return nodetest_type_node; | |
| 11598 | |
| 11599 break; | |
| 11600 | |
| 11601 case 'p': | |
| 11602 if (name == PUGIXML_TEXT("processing-instruction")) | |
| 11603 return nodetest_type_pi; | |
| 11604 | |
| 11605 break; | |
| 11606 | |
| 11607 case 't': | |
| 11608 if (name == PUGIXML_TEXT("text")) | |
| 11609 return nodetest_type_text; | |
| 11610 | |
| 11611 break; | |
| 11612 | |
| 11613 default: | |
| 11614 break; | |
| 11615 } | |
| 11616 | |
| 11617 return nodetest_none; | |
| 11618 } | |
| 11619 | |
| 11620 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall | |
| 11621 xpath_ast_node* parse_primary_expression() | |
| 11622 { | |
| 11623 switch (_lexer.current()) | |
| 11624 { | |
| 11625 case lex_var_ref: | |
| 11626 { | |
| 11627 xpath_lexer_string name = _lexer.contents(); | |
| 11628 | |
| 11629 if (!_variables) | |
| 11630 return error("Unknown variable: variable set is not provided"); | |
| 11631 | |
| 11632 xpath_variable* var = 0; | |
| 11633 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) | |
| 11634 return error_oom(); | |
| 11635 | |
| 11636 if (!var) | |
| 11637 return error("Unknown variable: variable set does not contain the given name"); | |
| 11638 | |
| 11639 _lexer.next(); | |
| 11640 | |
| 11641 return alloc_node(ast_variable, var->type(), var); | |
| 11642 } | |
| 11643 | |
| 11644 case lex_open_brace: | |
| 11645 { | |
| 11646 _lexer.next(); | |
| 11647 | |
| 11648 xpath_ast_node* n = parse_expression(); | |
| 11649 if (!n) return 0; | |
| 11650 | |
| 11651 if (_lexer.current() != lex_close_brace) | |
| 11652 return error("Expected ')' to match an opening '('"); | |
| 11653 | |
| 11654 _lexer.next(); | |
| 11655 | |
| 11656 return n; | |
| 11657 } | |
| 11658 | |
| 11659 case lex_quoted_string: | |
| 11660 { | |
| 11661 const char_t* value = alloc_string(_lexer.contents()); | |
| 11662 if (!value) return 0; | |
| 11663 | |
| 11664 _lexer.next(); | |
| 11665 | |
| 11666 return alloc_node(ast_string_constant, xpath_type_string, value); | |
| 11667 } | |
| 11668 | |
| 11669 case lex_number: | |
| 11670 { | |
| 11671 double value = 0; | |
| 11672 | |
| 11673 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) | |
| 11674 return error_oom(); | |
| 11675 | |
| 11676 _lexer.next(); | |
| 11677 | |
| 11678 return alloc_node(ast_number_constant, xpath_type_number, value); | |
| 11679 } | |
| 11680 | |
| 11681 case lex_string: | |
| 11682 { | |
| 11683 xpath_ast_node* args[2] = {0}; | |
| 11684 size_t argc = 0; | |
| 11685 | |
| 11686 xpath_lexer_string function = _lexer.contents(); | |
| 11687 _lexer.next(); | |
| 11688 | |
| 11689 xpath_ast_node* last_arg = 0; | |
| 11690 | |
| 11691 if (_lexer.current() != lex_open_brace) | |
| 11692 return error("Unrecognized function call"); | |
| 11693 _lexer.next(); | |
| 11694 | |
| 11695 size_t old_depth = _depth; | |
| 11696 | |
| 11697 while (_lexer.current() != lex_close_brace) | |
| 11698 { | |
| 11699 if (argc > 0) | |
| 11700 { | |
| 11701 if (_lexer.current() != lex_comma) | |
| 11702 return error("No comma between function arguments"); | |
| 11703 _lexer.next(); | |
| 11704 } | |
| 11705 | |
| 11706 if (++_depth > xpath_ast_depth_limit) | |
| 11707 return error_rec(); | |
| 11708 | |
| 11709 xpath_ast_node* n = parse_expression(); | |
| 11710 if (!n) return 0; | |
| 11711 | |
| 11712 if (argc < 2) args[argc] = n; | |
| 11713 else last_arg->set_next(n); | |
| 11714 | |
| 11715 argc++; | |
| 11716 last_arg = n; | |
| 11717 } | |
| 11718 | |
| 11719 _lexer.next(); | |
| 11720 | |
| 11721 _depth = old_depth; | |
| 11722 | |
| 11723 return parse_function(function, argc, args); | |
| 11724 } | |
| 11725 | |
| 11726 default: | |
| 11727 return error("Unrecognizable primary expression"); | |
| 11728 } | |
| 11729 } | |
| 11730 | |
| 11731 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate | |
| 11732 // Predicate ::= '[' PredicateExpr ']' | |
| 11733 // PredicateExpr ::= Expr | |
| 11734 xpath_ast_node* parse_filter_expression() | |
| 11735 { | |
| 11736 xpath_ast_node* n = parse_primary_expression(); | |
| 11737 if (!n) return 0; | |
| 11738 | |
| 11739 size_t old_depth = _depth; | |
| 11740 | |
| 11741 while (_lexer.current() == lex_open_square_brace) | |
| 11742 { | |
| 11743 _lexer.next(); | |
| 11744 | |
| 11745 if (++_depth > xpath_ast_depth_limit) | |
| 11746 return error_rec(); | |
| 11747 | |
| 11748 if (n->rettype() != xpath_type_node_set) | |
| 11749 return error("Predicate has to be applied to node set"); | |
| 11750 | |
| 11751 xpath_ast_node* expr = parse_expression(); | |
| 11752 if (!expr) return 0; | |
| 11753 | |
| 11754 n = alloc_node(ast_filter, n, expr, predicate_default); | |
| 11755 if (!n) return 0; | |
| 11756 | |
| 11757 if (_lexer.current() != lex_close_square_brace) | |
| 11758 return error("Expected ']' to match an opening '['"); | |
| 11759 | |
| 11760 _lexer.next(); | |
| 11761 } | |
| 11762 | |
| 11763 _depth = old_depth; | |
| 11764 | |
| 11765 return n; | |
| 11766 } | |
| 11767 | |
| 11768 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep | |
| 11769 // AxisSpecifier ::= AxisName '::' | '@'? | |
| 11770 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' | |
| 11771 // NameTest ::= '*' | NCName ':' '*' | QName | |
| 11772 // AbbreviatedStep ::= '.' | '..' | |
| 11773 xpath_ast_node* parse_step(xpath_ast_node* set) | |
| 11774 { | |
| 11775 if (set && set->rettype() != xpath_type_node_set) | |
| 11776 return error("Step has to be applied to node set"); | |
| 11777 | |
| 11778 bool axis_specified = false; | |
| 11779 axis_t axis = axis_child; // implied child axis | |
| 11780 | |
| 11781 if (_lexer.current() == lex_axis_attribute) | |
| 11782 { | |
| 11783 axis = axis_attribute; | |
| 11784 axis_specified = true; | |
| 11785 | |
| 11786 _lexer.next(); | |
| 11787 } | |
| 11788 else if (_lexer.current() == lex_dot) | |
| 11789 { | |
| 11790 _lexer.next(); | |
| 11791 | |
| 11792 if (_lexer.current() == lex_open_square_brace) | |
| 11793 return error("Predicates are not allowed after an abbreviated step"); | |
| 11794 | |
| 11795 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); | |
| 11796 } | |
| 11797 else if (_lexer.current() == lex_double_dot) | |
| 11798 { | |
| 11799 _lexer.next(); | |
| 11800 | |
| 11801 if (_lexer.current() == lex_open_square_brace) | |
| 11802 return error("Predicates are not allowed after an abbreviated step"); | |
| 11803 | |
| 11804 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); | |
| 11805 } | |
| 11806 | |
| 11807 nodetest_t nt_type = nodetest_none; | |
| 11808 xpath_lexer_string nt_name; | |
| 11809 | |
| 11810 if (_lexer.current() == lex_string) | |
| 11811 { | |
| 11812 // node name test | |
| 11813 nt_name = _lexer.contents(); | |
| 11814 _lexer.next(); | |
| 11815 | |
| 11816 // was it an axis name? | |
| 11817 if (_lexer.current() == lex_double_colon) | |
| 11818 { | |
| 11819 // parse axis name | |
| 11820 if (axis_specified) | |
| 11821 return error("Two axis specifiers in one step"); | |
| 11822 | |
| 11823 axis = parse_axis_name(nt_name, axis_specified); | |
| 11824 | |
| 11825 if (!axis_specified) | |
| 11826 return error("Unknown axis"); | |
| 11827 | |
| 11828 // read actual node test | |
| 11829 _lexer.next(); | |
| 11830 | |
| 11831 if (_lexer.current() == lex_multiply) | |
| 11832 { | |
| 11833 nt_type = nodetest_all; | |
| 11834 nt_name = xpath_lexer_string(); | |
| 11835 _lexer.next(); | |
| 11836 } | |
| 11837 else if (_lexer.current() == lex_string) | |
| 11838 { | |
| 11839 nt_name = _lexer.contents(); | |
| 11840 _lexer.next(); | |
| 11841 } | |
| 11842 else | |
| 11843 { | |
| 11844 return error("Unrecognized node test"); | |
| 11845 } | |
| 11846 } | |
| 11847 | |
| 11848 if (nt_type == nodetest_none) | |
| 11849 { | |
| 11850 // node type test or processing-instruction | |
| 11851 if (_lexer.current() == lex_open_brace) | |
| 11852 { | |
| 11853 _lexer.next(); | |
| 11854 | |
| 11855 if (_lexer.current() == lex_close_brace) | |
| 11856 { | |
| 11857 _lexer.next(); | |
| 11858 | |
| 11859 nt_type = parse_node_test_type(nt_name); | |
| 11860 | |
| 11861 if (nt_type == nodetest_none) | |
| 11862 return error("Unrecognized node type"); | |
| 11863 | |
| 11864 nt_name = xpath_lexer_string(); | |
| 11865 } | |
| 11866 else if (nt_name == PUGIXML_TEXT("processing-instruction")) | |
| 11867 { | |
| 11868 if (_lexer.current() != lex_quoted_string) | |
| 11869 return error("Only literals are allowed as arguments to processing-instruction()"); | |
| 11870 | |
| 11871 nt_type = nodetest_pi; | |
| 11872 nt_name = _lexer.contents(); | |
| 11873 _lexer.next(); | |
| 11874 | |
| 11875 if (_lexer.current() != lex_close_brace) | |
| 11876 return error("Unmatched brace near processing-instruction()"); | |
| 11877 _lexer.next(); | |
| 11878 } | |
| 11879 else | |
| 11880 { | |
| 11881 return error("Unmatched brace near node type test"); | |
| 11882 } | |
| 11883 } | |
| 11884 // QName or NCName:* | |
| 11885 else | |
| 11886 { | |
| 11887 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* | |
| 11888 { | |
| 11889 nt_name.end--; // erase * | |
| 11890 | |
| 11891 nt_type = nodetest_all_in_namespace; | |
| 11892 } | |
| 11893 else | |
| 11894 { | |
| 11895 nt_type = nodetest_name; | |
| 11896 } | |
| 11897 } | |
| 11898 } | |
| 11899 } | |
| 11900 else if (_lexer.current() == lex_multiply) | |
| 11901 { | |
| 11902 nt_type = nodetest_all; | |
| 11903 _lexer.next(); | |
| 11904 } | |
| 11905 else | |
| 11906 { | |
| 11907 return error("Unrecognized node test"); | |
| 11908 } | |
| 11909 | |
| 11910 const char_t* nt_name_copy = alloc_string(nt_name); | |
| 11911 if (!nt_name_copy) return 0; | |
| 11912 | |
| 11913 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); | |
| 11914 if (!n) return 0; | |
| 11915 | |
| 11916 size_t old_depth = _depth; | |
| 11917 | |
| 11918 xpath_ast_node* last = 0; | |
| 11919 | |
| 11920 while (_lexer.current() == lex_open_square_brace) | |
| 11921 { | |
| 11922 _lexer.next(); | |
| 11923 | |
| 11924 if (++_depth > xpath_ast_depth_limit) | |
| 11925 return error_rec(); | |
| 11926 | |
| 11927 xpath_ast_node* expr = parse_expression(); | |
| 11928 if (!expr) return 0; | |
| 11929 | |
| 11930 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); | |
| 11931 if (!pred) return 0; | |
| 11932 | |
| 11933 if (_lexer.current() != lex_close_square_brace) | |
| 11934 return error("Expected ']' to match an opening '['"); | |
| 11935 _lexer.next(); | |
| 11936 | |
| 11937 if (last) last->set_next(pred); | |
| 11938 else n->set_right(pred); | |
| 11939 | |
| 11940 last = pred; | |
| 11941 } | |
| 11942 | |
| 11943 _depth = old_depth; | |
| 11944 | |
| 11945 return n; | |
| 11946 } | |
| 11947 | |
| 11948 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step | |
| 11949 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) | |
| 11950 { | |
| 11951 xpath_ast_node* n = parse_step(set); | |
| 11952 if (!n) return 0; | |
| 11953 | |
| 11954 size_t old_depth = _depth; | |
| 11955 | |
| 11956 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
| 11957 { | |
| 11958 lexeme_t l = _lexer.current(); | |
| 11959 _lexer.next(); | |
| 11960 | |
| 11961 if (l == lex_double_slash) | |
| 11962 { | |
| 11963 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
| 11964 if (!n) return 0; | |
| 11965 | |
| 11966 ++_depth; | |
| 11967 } | |
| 11968 | |
| 11969 if (++_depth > xpath_ast_depth_limit) | |
| 11970 return error_rec(); | |
| 11971 | |
| 11972 n = parse_step(n); | |
| 11973 if (!n) return 0; | |
| 11974 } | |
| 11975 | |
| 11976 _depth = old_depth; | |
| 11977 | |
| 11978 return n; | |
| 11979 } | |
| 11980 | |
| 11981 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath | |
| 11982 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath | |
| 11983 xpath_ast_node* parse_location_path() | |
| 11984 { | |
| 11985 if (_lexer.current() == lex_slash) | |
| 11986 { | |
| 11987 _lexer.next(); | |
| 11988 | |
| 11989 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
| 11990 if (!n) return 0; | |
| 11991 | |
| 11992 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path | |
| 11993 lexeme_t l = _lexer.current(); | |
| 11994 | |
| 11995 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) | |
| 11996 return parse_relative_location_path(n); | |
| 11997 else | |
| 11998 return n; | |
| 11999 } | |
| 12000 else if (_lexer.current() == lex_double_slash) | |
| 12001 { | |
| 12002 _lexer.next(); | |
| 12003 | |
| 12004 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
| 12005 if (!n) return 0; | |
| 12006 | |
| 12007 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
| 12008 if (!n) return 0; | |
| 12009 | |
| 12010 return parse_relative_location_path(n); | |
| 12011 } | |
| 12012 | |
| 12013 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 | |
| 12014 return parse_relative_location_path(0); | |
| 12015 } | |
| 12016 | |
| 12017 // PathExpr ::= LocationPath | |
| 12018 // | FilterExpr | |
| 12019 // | FilterExpr '/' RelativeLocationPath | |
| 12020 // | FilterExpr '//' RelativeLocationPath | |
| 12021 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr | |
| 12022 // UnaryExpr ::= UnionExpr | '-' UnaryExpr | |
| 12023 xpath_ast_node* parse_path_or_unary_expression() | |
| 12024 { | |
| 12025 // Clarification. | |
| 12026 // PathExpr begins with either LocationPath or FilterExpr. | |
| 12027 // FilterExpr begins with PrimaryExpr | |
| 12028 // PrimaryExpr begins with '$' in case of it being a variable reference, | |
| 12029 // '(' in case of it being an expression, string literal, number constant or | |
| 12030 // function call. | |
| 12031 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || | |
| 12032 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || | |
| 12033 _lexer.current() == lex_string) | |
| 12034 { | |
| 12035 if (_lexer.current() == lex_string) | |
| 12036 { | |
| 12037 // This is either a function call, or not - if not, we shall proceed with location path | |
| 12038 const char_t* state = _lexer.state(); | |
| 12039 | |
| 12040 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; | |
| 12041 | |
| 12042 if (*state != '(') | |
| 12043 return parse_location_path(); | |
| 12044 | |
| 12045 // This looks like a function call; however this still can be a node-test. Check it. | |
| 12046 if (parse_node_test_type(_lexer.contents()) != nodetest_none) | |
| 12047 return parse_location_path(); | |
| 12048 } | |
| 12049 | |
| 12050 xpath_ast_node* n = parse_filter_expression(); | |
| 12051 if (!n) return 0; | |
| 12052 | |
| 12053 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
| 12054 { | |
| 12055 lexeme_t l = _lexer.current(); | |
| 12056 _lexer.next(); | |
| 12057 | |
| 12058 if (l == lex_double_slash) | |
| 12059 { | |
| 12060 if (n->rettype() != xpath_type_node_set) | |
| 12061 return error("Step has to be applied to node set"); | |
| 12062 | |
| 12063 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
| 12064 if (!n) return 0; | |
| 12065 } | |
| 12066 | |
| 12067 // select from location path | |
| 12068 return parse_relative_location_path(n); | |
| 12069 } | |
| 12070 | |
| 12071 return n; | |
| 12072 } | |
| 12073 else if (_lexer.current() == lex_minus) | |
| 12074 { | |
| 12075 _lexer.next(); | |
| 12076 | |
| 12077 // precedence 7+ - only parses union expressions | |
| 12078 xpath_ast_node* n = parse_expression(7); | |
| 12079 if (!n) return 0; | |
| 12080 | |
| 12081 return alloc_node(ast_op_negate, xpath_type_number, n); | |
| 12082 } | |
| 12083 else | |
| 12084 { | |
| 12085 return parse_location_path(); | |
| 12086 } | |
| 12087 } | |
| 12088 | |
| 12089 struct binary_op_t | |
| 12090 { | |
| 12091 ast_type_t asttype; | |
| 12092 xpath_value_type rettype; | |
| 12093 int precedence; | |
| 12094 | |
| 12095 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) | |
| 12096 { | |
| 12097 } | |
| 12098 | |
| 12099 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) | |
| 12100 { | |
| 12101 } | |
| 12102 | |
| 12103 static binary_op_t parse(xpath_lexer& lexer) | |
| 12104 { | |
| 12105 switch (lexer.current()) | |
| 12106 { | |
| 12107 case lex_string: | |
| 12108 if (lexer.contents() == PUGIXML_TEXT("or")) | |
| 12109 return binary_op_t(ast_op_or, xpath_type_boolean, 1); | |
| 12110 else if (lexer.contents() == PUGIXML_TEXT("and")) | |
| 12111 return binary_op_t(ast_op_and, xpath_type_boolean, 2); | |
| 12112 else if (lexer.contents() == PUGIXML_TEXT("div")) | |
| 12113 return binary_op_t(ast_op_divide, xpath_type_number, 6); | |
| 12114 else if (lexer.contents() == PUGIXML_TEXT("mod")) | |
| 12115 return binary_op_t(ast_op_mod, xpath_type_number, 6); | |
| 12116 else | |
| 12117 return binary_op_t(); | |
| 12118 | |
| 12119 case lex_equal: | |
| 12120 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); | |
| 12121 | |
| 12122 case lex_not_equal: | |
| 12123 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); | |
| 12124 | |
| 12125 case lex_less: | |
| 12126 return binary_op_t(ast_op_less, xpath_type_boolean, 4); | |
| 12127 | |
| 12128 case lex_greater: | |
| 12129 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); | |
| 12130 | |
| 12131 case lex_less_or_equal: | |
| 12132 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); | |
| 12133 | |
| 12134 case lex_greater_or_equal: | |
| 12135 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); | |
| 12136 | |
| 12137 case lex_plus: | |
| 12138 return binary_op_t(ast_op_add, xpath_type_number, 5); | |
| 12139 | |
| 12140 case lex_minus: | |
| 12141 return binary_op_t(ast_op_subtract, xpath_type_number, 5); | |
| 12142 | |
| 12143 case lex_multiply: | |
| 12144 return binary_op_t(ast_op_multiply, xpath_type_number, 6); | |
| 12145 | |
| 12146 case lex_union: | |
| 12147 return binary_op_t(ast_op_union, xpath_type_node_set, 7); | |
| 12148 | |
| 12149 default: | |
| 12150 return binary_op_t(); | |
| 12151 } | |
| 12152 } | |
| 12153 }; | |
| 12154 | |
| 12155 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) | |
| 12156 { | |
| 12157 binary_op_t op = binary_op_t::parse(_lexer); | |
| 12158 | |
| 12159 while (op.asttype != ast_unknown && op.precedence >= limit) | |
| 12160 { | |
| 12161 _lexer.next(); | |
| 12162 | |
| 12163 if (++_depth > xpath_ast_depth_limit) | |
| 12164 return error_rec(); | |
| 12165 | |
| 12166 xpath_ast_node* rhs = parse_path_or_unary_expression(); | |
| 12167 if (!rhs) return 0; | |
| 12168 | |
| 12169 binary_op_t nextop = binary_op_t::parse(_lexer); | |
| 12170 | |
| 12171 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) | |
| 12172 { | |
| 12173 rhs = parse_expression_rec(rhs, nextop.precedence); | |
| 12174 if (!rhs) return 0; | |
| 12175 | |
| 12176 nextop = binary_op_t::parse(_lexer); | |
| 12177 } | |
| 12178 | |
| 12179 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) | |
| 12180 return error("Union operator has to be applied to node sets"); | |
| 12181 | |
| 12182 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); | |
| 12183 if (!lhs) return 0; | |
| 12184 | |
| 12185 op = binary_op_t::parse(_lexer); | |
| 12186 } | |
| 12187 | |
| 12188 return lhs; | |
| 12189 } | |
| 12190 | |
| 12191 // Expr ::= OrExpr | |
| 12192 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr | |
| 12193 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr | |
| 12194 // EqualityExpr ::= RelationalExpr | |
| 12195 // | EqualityExpr '=' RelationalExpr | |
| 12196 // | EqualityExpr '!=' RelationalExpr | |
| 12197 // RelationalExpr ::= AdditiveExpr | |
| 12198 // | RelationalExpr '<' AdditiveExpr | |
| 12199 // | RelationalExpr '>' AdditiveExpr | |
| 12200 // | RelationalExpr '<=' AdditiveExpr | |
| 12201 // | RelationalExpr '>=' AdditiveExpr | |
| 12202 // AdditiveExpr ::= MultiplicativeExpr | |
| 12203 // | AdditiveExpr '+' MultiplicativeExpr | |
| 12204 // | AdditiveExpr '-' MultiplicativeExpr | |
| 12205 // MultiplicativeExpr ::= UnaryExpr | |
| 12206 // | MultiplicativeExpr '*' UnaryExpr | |
| 12207 // | MultiplicativeExpr 'div' UnaryExpr | |
| 12208 // | MultiplicativeExpr 'mod' UnaryExpr | |
| 12209 xpath_ast_node* parse_expression(int limit = 0) | |
| 12210 { | |
| 12211 size_t old_depth = _depth; | |
| 12212 | |
| 12213 if (++_depth > xpath_ast_depth_limit) | |
| 12214 return error_rec(); | |
| 12215 | |
| 12216 xpath_ast_node* n = parse_path_or_unary_expression(); | |
| 12217 if (!n) return 0; | |
| 12218 | |
| 12219 n = parse_expression_rec(n, limit); | |
| 12220 | |
| 12221 _depth = old_depth; | |
| 12222 | |
| 12223 return n; | |
| 12224 } | |
| 12225 | |
| 12226 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) | |
| 12227 { | |
| 12228 } | |
| 12229 | |
| 12230 xpath_ast_node* parse() | |
| 12231 { | |
| 12232 xpath_ast_node* n = parse_expression(); | |
| 12233 if (!n) return 0; | |
| 12234 | |
| 12235 assert(_depth == 0); | |
| 12236 | |
| 12237 // check if there are unparsed tokens left | |
| 12238 if (_lexer.current() != lex_eof) | |
| 12239 return error("Incorrect query"); | |
| 12240 | |
| 12241 return n; | |
| 12242 } | |
| 12243 | |
| 12244 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) | |
| 12245 { | |
| 12246 xpath_parser parser(query, variables, alloc, result); | |
| 12247 | |
| 12248 return parser.parse(); | |
| 12249 } | |
| 12250 }; | |
| 12251 | |
| 12252 struct xpath_query_impl | |
| 12253 { | |
| 12254 static xpath_query_impl* create() | |
| 12255 { | |
| 12256 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); | |
| 12257 if (!memory) return 0; | |
| 12258 | |
| 12259 return new (memory) xpath_query_impl(); | |
| 12260 } | |
| 12261 | |
| 12262 static void destroy(xpath_query_impl* impl) | |
| 12263 { | |
| 12264 // free all allocated pages | |
| 12265 impl->alloc.release(); | |
| 12266 | |
| 12267 // free allocator memory (with the first page) | |
| 12268 xml_memory::deallocate(impl); | |
| 12269 } | |
| 12270 | |
| 12271 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) | |
| 12272 { | |
| 12273 block.next = 0; | |
| 12274 block.capacity = sizeof(block.data); | |
| 12275 } | |
| 12276 | |
| 12277 xpath_ast_node* root; | |
| 12278 xpath_allocator alloc; | |
| 12279 xpath_memory_block block; | |
| 12280 bool oom; | |
| 12281 }; | |
| 12282 | |
| 12283 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) | |
| 12284 { | |
| 12285 if (!impl) return 0; | |
| 12286 | |
| 12287 if (impl->root->rettype() != xpath_type_node_set) | |
| 12288 { | |
| 12289 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12290 return 0; | |
| 12291 #else | |
| 12292 xpath_parse_result res; | |
| 12293 res.error = "Expression does not evaluate to node set"; | |
| 12294 | |
| 12295 throw xpath_exception(res); | |
| 12296 #endif | |
| 12297 } | |
| 12298 | |
| 12299 return impl->root; | |
| 12300 } | |
| 12301 PUGI__NS_END | |
| 12302 | |
| 12303 namespace pugi | |
| 12304 { | |
| 12305 #ifndef PUGIXML_NO_EXCEPTIONS | |
| 12306 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) | |
| 12307 { | |
| 12308 assert(_result.error); | |
| 12309 } | |
| 12310 | |
| 12311 PUGI__FN const char* xpath_exception::what() const throw() | |
| 12312 { | |
| 12313 return _result.error; | |
| 12314 } | |
| 12315 | |
| 12316 PUGI__FN const xpath_parse_result& xpath_exception::result() const | |
| 12317 { | |
| 12318 return _result; | |
| 12319 } | |
| 12320 #endif | |
| 12321 | |
| 12322 PUGI__FN xpath_node::xpath_node() | |
| 12323 { | |
| 12324 } | |
| 12325 | |
| 12326 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) | |
| 12327 { | |
| 12328 } | |
| 12329 | |
| 12330 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) | |
| 12331 { | |
| 12332 } | |
| 12333 | |
| 12334 PUGI__FN xml_node xpath_node::node() const | |
| 12335 { | |
| 12336 return _attribute ? xml_node() : _node; | |
| 12337 } | |
| 12338 | |
| 12339 PUGI__FN xml_attribute xpath_node::attribute() const | |
| 12340 { | |
| 12341 return _attribute; | |
| 12342 } | |
| 12343 | |
| 12344 PUGI__FN xml_node xpath_node::parent() const | |
| 12345 { | |
| 12346 return _attribute ? _node : _node.parent(); | |
| 12347 } | |
| 12348 | |
| 12349 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) | |
| 12350 { | |
| 12351 } | |
| 12352 | |
| 12353 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const | |
| 12354 { | |
| 12355 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; | |
| 12356 } | |
| 12357 | |
| 12358 PUGI__FN bool xpath_node::operator!() const | |
| 12359 { | |
| 12360 return !(_node || _attribute); | |
| 12361 } | |
| 12362 | |
| 12363 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const | |
| 12364 { | |
| 12365 return _node == n._node && _attribute == n._attribute; | |
| 12366 } | |
| 12367 | |
| 12368 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const | |
| 12369 { | |
| 12370 return _node != n._node || _attribute != n._attribute; | |
| 12371 } | |
| 12372 | |
| 12373 #ifdef __BORLANDC__ | |
| 12374 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) | |
| 12375 { | |
| 12376 return (bool)lhs && rhs; | |
| 12377 } | |
| 12378 | |
| 12379 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) | |
| 12380 { | |
| 12381 return (bool)lhs || rhs; | |
| 12382 } | |
| 12383 #endif | |
| 12384 | |
| 12385 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) | |
| 12386 { | |
| 12387 assert(begin_ <= end_); | |
| 12388 | |
| 12389 size_t size_ = static_cast<size_t>(end_ - begin_); | |
| 12390 | |
| 12391 // use internal buffer for 0 or 1 elements, heap buffer otherwise | |
| 12392 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); | |
| 12393 | |
| 12394 if (!storage) | |
| 12395 { | |
| 12396 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12397 return; | |
| 12398 #else | |
| 12399 throw std::bad_alloc(); | |
| 12400 #endif | |
| 12401 } | |
| 12402 | |
| 12403 // deallocate old buffer | |
| 12404 if (_begin != _storage) | |
| 12405 impl::xml_memory::deallocate(_begin); | |
| 12406 | |
| 12407 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB | |
| 12408 if (size_) | |
| 12409 memcpy(storage, begin_, size_ * sizeof(xpath_node)); | |
| 12410 | |
| 12411 _begin = storage; | |
| 12412 _end = storage + size_; | |
| 12413 _type = type_; | |
| 12414 } | |
| 12415 | |
| 12416 #ifdef PUGIXML_HAS_MOVE | |
| 12417 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT | |
| 12418 { | |
| 12419 _type = rhs._type; | |
| 12420 _storage[0] = rhs._storage[0]; | |
| 12421 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin; | |
| 12422 _end = _begin + (rhs._end - rhs._begin); | |
| 12423 | |
| 12424 rhs._type = type_unsorted; | |
| 12425 rhs._begin = rhs._storage; | |
| 12426 rhs._end = rhs._storage; | |
| 12427 } | |
| 12428 #endif | |
| 12429 | |
| 12430 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage) | |
| 12431 { | |
| 12432 } | |
| 12433 | |
| 12434 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage) | |
| 12435 { | |
| 12436 _assign(begin_, end_, type_); | |
| 12437 } | |
| 12438 | |
| 12439 PUGI__FN xpath_node_set::~xpath_node_set() | |
| 12440 { | |
| 12441 if (_begin != _storage) | |
| 12442 impl::xml_memory::deallocate(_begin); | |
| 12443 } | |
| 12444 | |
| 12445 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage) | |
| 12446 { | |
| 12447 _assign(ns._begin, ns._end, ns._type); | |
| 12448 } | |
| 12449 | |
| 12450 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) | |
| 12451 { | |
| 12452 if (this == &ns) return *this; | |
| 12453 | |
| 12454 _assign(ns._begin, ns._end, ns._type); | |
| 12455 | |
| 12456 return *this; | |
| 12457 } | |
| 12458 | |
| 12459 #ifdef PUGIXML_HAS_MOVE | |
| 12460 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage) | |
| 12461 { | |
| 12462 _move(rhs); | |
| 12463 } | |
| 12464 | |
| 12465 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT | |
| 12466 { | |
| 12467 if (this == &rhs) return *this; | |
| 12468 | |
| 12469 if (_begin != _storage) | |
| 12470 impl::xml_memory::deallocate(_begin); | |
| 12471 | |
| 12472 _move(rhs); | |
| 12473 | |
| 12474 return *this; | |
| 12475 } | |
| 12476 #endif | |
| 12477 | |
| 12478 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const | |
| 12479 { | |
| 12480 return _type; | |
| 12481 } | |
| 12482 | |
| 12483 PUGI__FN size_t xpath_node_set::size() const | |
| 12484 { | |
| 12485 return _end - _begin; | |
| 12486 } | |
| 12487 | |
| 12488 PUGI__FN bool xpath_node_set::empty() const | |
| 12489 { | |
| 12490 return _begin == _end; | |
| 12491 } | |
| 12492 | |
| 12493 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const | |
| 12494 { | |
| 12495 assert(index < size()); | |
| 12496 return _begin[index]; | |
| 12497 } | |
| 12498 | |
| 12499 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const | |
| 12500 { | |
| 12501 return _begin; | |
| 12502 } | |
| 12503 | |
| 12504 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const | |
| 12505 { | |
| 12506 return _end; | |
| 12507 } | |
| 12508 | |
| 12509 PUGI__FN void xpath_node_set::sort(bool reverse) | |
| 12510 { | |
| 12511 _type = impl::xpath_sort(_begin, _end, _type, reverse); | |
| 12512 } | |
| 12513 | |
| 12514 PUGI__FN xpath_node xpath_node_set::first() const | |
| 12515 { | |
| 12516 return impl::xpath_first(_begin, _end, _type); | |
| 12517 } | |
| 12518 | |
| 12519 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) | |
| 12520 { | |
| 12521 } | |
| 12522 | |
| 12523 PUGI__FN xpath_parse_result::operator bool() const | |
| 12524 { | |
| 12525 return error == 0; | |
| 12526 } | |
| 12527 | |
| 12528 PUGI__FN const char* xpath_parse_result::description() const | |
| 12529 { | |
| 12530 return error ? error : "No error"; | |
| 12531 } | |
| 12532 | |
| 12533 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) | |
| 12534 { | |
| 12535 } | |
| 12536 | |
| 12537 PUGI__FN const char_t* xpath_variable::name() const | |
| 12538 { | |
| 12539 switch (_type) | |
| 12540 { | |
| 12541 case xpath_type_node_set: | |
| 12542 return static_cast<const impl::xpath_variable_node_set*>(this)->name; | |
| 12543 | |
| 12544 case xpath_type_number: | |
| 12545 return static_cast<const impl::xpath_variable_number*>(this)->name; | |
| 12546 | |
| 12547 case xpath_type_string: | |
| 12548 return static_cast<const impl::xpath_variable_string*>(this)->name; | |
| 12549 | |
| 12550 case xpath_type_boolean: | |
| 12551 return static_cast<const impl::xpath_variable_boolean*>(this)->name; | |
| 12552 | |
| 12553 default: | |
| 12554 assert(false && "Invalid variable type"); // unreachable | |
| 12555 return 0; | |
| 12556 } | |
| 12557 } | |
| 12558 | |
| 12559 PUGI__FN xpath_value_type xpath_variable::type() const | |
| 12560 { | |
| 12561 return _type; | |
| 12562 } | |
| 12563 | |
| 12564 PUGI__FN bool xpath_variable::get_boolean() const | |
| 12565 { | |
| 12566 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; | |
| 12567 } | |
| 12568 | |
| 12569 PUGI__FN double xpath_variable::get_number() const | |
| 12570 { | |
| 12571 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); | |
| 12572 } | |
| 12573 | |
| 12574 PUGI__FN const char_t* xpath_variable::get_string() const | |
| 12575 { | |
| 12576 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; | |
| 12577 return value ? value : PUGIXML_TEXT(""); | |
| 12578 } | |
| 12579 | |
| 12580 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const | |
| 12581 { | |
| 12582 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; | |
| 12583 } | |
| 12584 | |
| 12585 PUGI__FN bool xpath_variable::set(bool value) | |
| 12586 { | |
| 12587 if (_type != xpath_type_boolean) return false; | |
| 12588 | |
| 12589 static_cast<impl::xpath_variable_boolean*>(this)->value = value; | |
| 12590 return true; | |
| 12591 } | |
| 12592 | |
| 12593 PUGI__FN bool xpath_variable::set(double value) | |
| 12594 { | |
| 12595 if (_type != xpath_type_number) return false; | |
| 12596 | |
| 12597 static_cast<impl::xpath_variable_number*>(this)->value = value; | |
| 12598 return true; | |
| 12599 } | |
| 12600 | |
| 12601 PUGI__FN bool xpath_variable::set(const char_t* value) | |
| 12602 { | |
| 12603 if (_type != xpath_type_string) return false; | |
| 12604 | |
| 12605 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); | |
| 12606 | |
| 12607 // duplicate string | |
| 12608 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); | |
| 12609 | |
| 12610 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); | |
| 12611 if (!copy) return false; | |
| 12612 | |
| 12613 memcpy(copy, value, size); | |
| 12614 | |
| 12615 // replace old string | |
| 12616 if (var->value) impl::xml_memory::deallocate(var->value); | |
| 12617 var->value = copy; | |
| 12618 | |
| 12619 return true; | |
| 12620 } | |
| 12621 | |
| 12622 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) | |
| 12623 { | |
| 12624 if (_type != xpath_type_node_set) return false; | |
| 12625 | |
| 12626 static_cast<impl::xpath_variable_node_set*>(this)->value = value; | |
| 12627 return true; | |
| 12628 } | |
| 12629 | |
| 12630 PUGI__FN xpath_variable_set::xpath_variable_set() | |
| 12631 { | |
| 12632 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12633 _data[i] = 0; | |
| 12634 } | |
| 12635 | |
| 12636 PUGI__FN xpath_variable_set::~xpath_variable_set() | |
| 12637 { | |
| 12638 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12639 _destroy(_data[i]); | |
| 12640 } | |
| 12641 | |
| 12642 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) | |
| 12643 { | |
| 12644 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12645 _data[i] = 0; | |
| 12646 | |
| 12647 _assign(rhs); | |
| 12648 } | |
| 12649 | |
| 12650 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) | |
| 12651 { | |
| 12652 if (this == &rhs) return *this; | |
| 12653 | |
| 12654 _assign(rhs); | |
| 12655 | |
| 12656 return *this; | |
| 12657 } | |
| 12658 | |
| 12659 #ifdef PUGIXML_HAS_MOVE | |
| 12660 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
| 12661 { | |
| 12662 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12663 { | |
| 12664 _data[i] = rhs._data[i]; | |
| 12665 rhs._data[i] = 0; | |
| 12666 } | |
| 12667 } | |
| 12668 | |
| 12669 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
| 12670 { | |
| 12671 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12672 { | |
| 12673 _destroy(_data[i]); | |
| 12674 | |
| 12675 _data[i] = rhs._data[i]; | |
| 12676 rhs._data[i] = 0; | |
| 12677 } | |
| 12678 | |
| 12679 return *this; | |
| 12680 } | |
| 12681 #endif | |
| 12682 | |
| 12683 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) | |
| 12684 { | |
| 12685 xpath_variable_set temp; | |
| 12686 | |
| 12687 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12688 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) | |
| 12689 return; | |
| 12690 | |
| 12691 _swap(temp); | |
| 12692 } | |
| 12693 | |
| 12694 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) | |
| 12695 { | |
| 12696 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
| 12697 { | |
| 12698 xpath_variable* chain = _data[i]; | |
| 12699 | |
| 12700 _data[i] = rhs._data[i]; | |
| 12701 rhs._data[i] = chain; | |
| 12702 } | |
| 12703 } | |
| 12704 | |
| 12705 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const | |
| 12706 { | |
| 12707 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
| 12708 size_t hash = impl::hash_string(name) % hash_size; | |
| 12709 | |
| 12710 // look for existing variable | |
| 12711 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
| 12712 if (impl::strequal(var->name(), name)) | |
| 12713 return var; | |
| 12714 | |
| 12715 return 0; | |
| 12716 } | |
| 12717 | |
| 12718 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) | |
| 12719 { | |
| 12720 xpath_variable* last = 0; | |
| 12721 | |
| 12722 while (var) | |
| 12723 { | |
| 12724 // allocate storage for new variable | |
| 12725 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); | |
| 12726 if (!nvar) return false; | |
| 12727 | |
| 12728 // link the variable to the result immediately to handle failures gracefully | |
| 12729 if (last) | |
| 12730 last->_next = nvar; | |
| 12731 else | |
| 12732 *out_result = nvar; | |
| 12733 | |
| 12734 last = nvar; | |
| 12735 | |
| 12736 // copy the value; this can fail due to out-of-memory conditions | |
| 12737 if (!impl::copy_xpath_variable(nvar, var)) return false; | |
| 12738 | |
| 12739 var = var->_next; | |
| 12740 } | |
| 12741 | |
| 12742 return true; | |
| 12743 } | |
| 12744 | |
| 12745 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) | |
| 12746 { | |
| 12747 while (var) | |
| 12748 { | |
| 12749 xpath_variable* next = var->_next; | |
| 12750 | |
| 12751 impl::delete_xpath_variable(var->_type, var); | |
| 12752 | |
| 12753 var = next; | |
| 12754 } | |
| 12755 } | |
| 12756 | |
| 12757 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) | |
| 12758 { | |
| 12759 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
| 12760 size_t hash = impl::hash_string(name) % hash_size; | |
| 12761 | |
| 12762 // look for existing variable | |
| 12763 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
| 12764 if (impl::strequal(var->name(), name)) | |
| 12765 return var->type() == type ? var : 0; | |
| 12766 | |
| 12767 // add new variable | |
| 12768 xpath_variable* result = impl::new_xpath_variable(type, name); | |
| 12769 | |
| 12770 if (result) | |
| 12771 { | |
| 12772 result->_next = _data[hash]; | |
| 12773 | |
| 12774 _data[hash] = result; | |
| 12775 } | |
| 12776 | |
| 12777 return result; | |
| 12778 } | |
| 12779 | |
| 12780 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) | |
| 12781 { | |
| 12782 xpath_variable* var = add(name, xpath_type_boolean); | |
| 12783 return var ? var->set(value) : false; | |
| 12784 } | |
| 12785 | |
| 12786 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) | |
| 12787 { | |
| 12788 xpath_variable* var = add(name, xpath_type_number); | |
| 12789 return var ? var->set(value) : false; | |
| 12790 } | |
| 12791 | |
| 12792 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) | |
| 12793 { | |
| 12794 xpath_variable* var = add(name, xpath_type_string); | |
| 12795 return var ? var->set(value) : false; | |
| 12796 } | |
| 12797 | |
| 12798 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) | |
| 12799 { | |
| 12800 xpath_variable* var = add(name, xpath_type_node_set); | |
| 12801 return var ? var->set(value) : false; | |
| 12802 } | |
| 12803 | |
| 12804 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) | |
| 12805 { | |
| 12806 return _find(name); | |
| 12807 } | |
| 12808 | |
| 12809 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const | |
| 12810 { | |
| 12811 return _find(name); | |
| 12812 } | |
| 12813 | |
| 12814 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) | |
| 12815 { | |
| 12816 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); | |
| 12817 | |
| 12818 if (!qimpl) | |
| 12819 { | |
| 12820 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12821 _result.error = "Out of memory"; | |
| 12822 #else | |
| 12823 throw std::bad_alloc(); | |
| 12824 #endif | |
| 12825 } | |
| 12826 else | |
| 12827 { | |
| 12828 using impl::auto_deleter; // MSVC7 workaround | |
| 12829 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); | |
| 12830 | |
| 12831 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); | |
| 12832 | |
| 12833 if (qimpl->root) | |
| 12834 { | |
| 12835 qimpl->root->optimize(&qimpl->alloc); | |
| 12836 | |
| 12837 _impl = impl.release(); | |
| 12838 _result.error = 0; | |
| 12839 } | |
| 12840 else | |
| 12841 { | |
| 12842 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12843 if (qimpl->oom) _result.error = "Out of memory"; | |
| 12844 #else | |
| 12845 if (qimpl->oom) throw std::bad_alloc(); | |
| 12846 throw xpath_exception(_result); | |
| 12847 #endif | |
| 12848 } | |
| 12849 } | |
| 12850 } | |
| 12851 | |
| 12852 PUGI__FN xpath_query::xpath_query(): _impl(0) | |
| 12853 { | |
| 12854 } | |
| 12855 | |
| 12856 PUGI__FN xpath_query::~xpath_query() | |
| 12857 { | |
| 12858 if (_impl) | |
| 12859 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
| 12860 } | |
| 12861 | |
| 12862 #ifdef PUGIXML_HAS_MOVE | |
| 12863 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
| 12864 { | |
| 12865 _impl = rhs._impl; | |
| 12866 _result = rhs._result; | |
| 12867 rhs._impl = 0; | |
| 12868 rhs._result = xpath_parse_result(); | |
| 12869 } | |
| 12870 | |
| 12871 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
| 12872 { | |
| 12873 if (this == &rhs) return *this; | |
| 12874 | |
| 12875 if (_impl) | |
| 12876 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
| 12877 | |
| 12878 _impl = rhs._impl; | |
| 12879 _result = rhs._result; | |
| 12880 rhs._impl = 0; | |
| 12881 rhs._result = xpath_parse_result(); | |
| 12882 | |
| 12883 return *this; | |
| 12884 } | |
| 12885 #endif | |
| 12886 | |
| 12887 PUGI__FN xpath_value_type xpath_query::return_type() const | |
| 12888 { | |
| 12889 if (!_impl) return xpath_type_none; | |
| 12890 | |
| 12891 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); | |
| 12892 } | |
| 12893 | |
| 12894 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const | |
| 12895 { | |
| 12896 if (!_impl) return false; | |
| 12897 | |
| 12898 impl::xpath_context c(n, 1, 1); | |
| 12899 impl::xpath_stack_data sd; | |
| 12900 | |
| 12901 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); | |
| 12902 | |
| 12903 if (sd.oom) | |
| 12904 { | |
| 12905 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12906 return false; | |
| 12907 #else | |
| 12908 throw std::bad_alloc(); | |
| 12909 #endif | |
| 12910 } | |
| 12911 | |
| 12912 return r; | |
| 12913 } | |
| 12914 | |
| 12915 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const | |
| 12916 { | |
| 12917 if (!_impl) return impl::gen_nan(); | |
| 12918 | |
| 12919 impl::xpath_context c(n, 1, 1); | |
| 12920 impl::xpath_stack_data sd; | |
| 12921 | |
| 12922 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); | |
| 12923 | |
| 12924 if (sd.oom) | |
| 12925 { | |
| 12926 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12927 return impl::gen_nan(); | |
| 12928 #else | |
| 12929 throw std::bad_alloc(); | |
| 12930 #endif | |
| 12931 } | |
| 12932 | |
| 12933 return r; | |
| 12934 } | |
| 12935 | |
| 12936 #ifndef PUGIXML_NO_STL | |
| 12937 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const | |
| 12938 { | |
| 12939 if (!_impl) return string_t(); | |
| 12940 | |
| 12941 impl::xpath_context c(n, 1, 1); | |
| 12942 impl::xpath_stack_data sd; | |
| 12943 | |
| 12944 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); | |
| 12945 | |
| 12946 if (sd.oom) | |
| 12947 { | |
| 12948 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12949 return string_t(); | |
| 12950 #else | |
| 12951 throw std::bad_alloc(); | |
| 12952 #endif | |
| 12953 } | |
| 12954 | |
| 12955 return string_t(r.c_str(), r.length()); | |
| 12956 } | |
| 12957 #endif | |
| 12958 | |
| 12959 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const | |
| 12960 { | |
| 12961 impl::xpath_context c(n, 1, 1); | |
| 12962 impl::xpath_stack_data sd; | |
| 12963 | |
| 12964 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); | |
| 12965 | |
| 12966 if (sd.oom) | |
| 12967 { | |
| 12968 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 12969 r = impl::xpath_string(); | |
| 12970 #else | |
| 12971 throw std::bad_alloc(); | |
| 12972 #endif | |
| 12973 } | |
| 12974 | |
| 12975 size_t full_size = r.length() + 1; | |
| 12976 | |
| 12977 if (capacity > 0) | |
| 12978 { | |
| 12979 size_t size = (full_size < capacity) ? full_size : capacity; | |
| 12980 assert(size > 0); | |
| 12981 | |
| 12982 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); | |
| 12983 buffer[size - 1] = 0; | |
| 12984 } | |
| 12985 | |
| 12986 return full_size; | |
| 12987 } | |
| 12988 | |
| 12989 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const | |
| 12990 { | |
| 12991 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
| 12992 if (!root) return xpath_node_set(); | |
| 12993 | |
| 12994 impl::xpath_context c(n, 1, 1); | |
| 12995 impl::xpath_stack_data sd; | |
| 12996 | |
| 12997 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); | |
| 12998 | |
| 12999 if (sd.oom) | |
| 13000 { | |
| 13001 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 13002 return xpath_node_set(); | |
| 13003 #else | |
| 13004 throw std::bad_alloc(); | |
| 13005 #endif | |
| 13006 } | |
| 13007 | |
| 13008 return xpath_node_set(r.begin(), r.end(), r.type()); | |
| 13009 } | |
| 13010 | |
| 13011 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const | |
| 13012 { | |
| 13013 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
| 13014 if (!root) return xpath_node(); | |
| 13015 | |
| 13016 impl::xpath_context c(n, 1, 1); | |
| 13017 impl::xpath_stack_data sd; | |
| 13018 | |
| 13019 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); | |
| 13020 | |
| 13021 if (sd.oom) | |
| 13022 { | |
| 13023 #ifdef PUGIXML_NO_EXCEPTIONS | |
| 13024 return xpath_node(); | |
| 13025 #else | |
| 13026 throw std::bad_alloc(); | |
| 13027 #endif | |
| 13028 } | |
| 13029 | |
| 13030 return r.first(); | |
| 13031 } | |
| 13032 | |
| 13033 PUGI__FN const xpath_parse_result& xpath_query::result() const | |
| 13034 { | |
| 13035 return _result; | |
| 13036 } | |
| 13037 | |
| 13038 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) | |
| 13039 { | |
| 13040 } | |
| 13041 | |
| 13042 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const | |
| 13043 { | |
| 13044 return _impl ? unspecified_bool_xpath_query : 0; | |
| 13045 } | |
| 13046 | |
| 13047 PUGI__FN bool xpath_query::operator!() const | |
| 13048 { | |
| 13049 return !_impl; | |
| 13050 } | |
| 13051 | |
| 13052 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const | |
| 13053 { | |
| 13054 xpath_query q(query, variables); | |
| 13055 return q.evaluate_node(*this); | |
| 13056 } | |
| 13057 | |
| 13058 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const | |
| 13059 { | |
| 13060 return query.evaluate_node(*this); | |
| 13061 } | |
| 13062 | |
| 13063 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const | |
| 13064 { | |
| 13065 xpath_query q(query, variables); | |
| 13066 return q.evaluate_node_set(*this); | |
| 13067 } | |
| 13068 | |
| 13069 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const | |
| 13070 { | |
| 13071 return query.evaluate_node_set(*this); | |
| 13072 } | |
| 13073 | |
| 13074 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const | |
| 13075 { | |
| 13076 xpath_query q(query, variables); | |
| 13077 return q.evaluate_node(*this); | |
| 13078 } | |
| 13079 | |
| 13080 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const | |
| 13081 { | |
| 13082 return query.evaluate_node(*this); | |
| 13083 } | |
| 13084 } | |
| 13085 | |
| 13086 #endif | |
| 13087 | |
| 13088 #ifdef __BORLANDC__ | |
| 13089 # pragma option pop | |
| 13090 #endif | |
| 13091 | |
| 13092 // Intel C++ does not properly keep warning state for function templates, | |
| 13093 // so popping warning state at the end of translation unit leads to warnings in the middle. | |
| 13094 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| 13095 # pragma warning(pop) | |
| 13096 #endif | |
| 13097 | |
| 13098 #if defined(_MSC_VER) && defined(__c2__) | |
| 13099 # pragma clang diagnostic pop | |
| 13100 #endif | |
| 13101 | |
| 13102 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) | |
| 13103 #undef PUGI__NO_INLINE | |
| 13104 #undef PUGI__UNLIKELY | |
| 13105 #undef PUGI__STATIC_ASSERT | |
| 13106 #undef PUGI__DMC_VOLATILE | |
| 13107 #undef PUGI__UNSIGNED_OVERFLOW | |
| 13108 #undef PUGI__MSVC_CRT_VERSION | |
| 13109 #undef PUGI__SNPRINTF | |
| 13110 #undef PUGI__NS_BEGIN | |
| 13111 #undef PUGI__NS_END | |
| 13112 #undef PUGI__FN | |
| 13113 #undef PUGI__FN_NO_INLINE | |
| 13114 #undef PUGI__GETHEADER_IMPL | |
| 13115 #undef PUGI__GETPAGE_IMPL | |
| 13116 #undef PUGI__GETPAGE | |
| 13117 #undef PUGI__NODETYPE | |
| 13118 #undef PUGI__IS_CHARTYPE_IMPL | |
| 13119 #undef PUGI__IS_CHARTYPE | |
| 13120 #undef PUGI__IS_CHARTYPEX | |
| 13121 #undef PUGI__ENDSWITH | |
| 13122 #undef PUGI__SKIPWS | |
| 13123 #undef PUGI__OPTSET | |
| 13124 #undef PUGI__PUSHNODE | |
| 13125 #undef PUGI__POPNODE | |
| 13126 #undef PUGI__SCANFOR | |
| 13127 #undef PUGI__SCANWHILE | |
| 13128 #undef PUGI__SCANWHILE_UNROLL | |
| 13129 #undef PUGI__ENDSEG | |
| 13130 #undef PUGI__THROW_ERROR | |
| 13131 #undef PUGI__CHECK_ERROR | |
| 13132 | |
| 13133 #endif | |
| 13134 | |
| 13135 /** | |
| 13136 * Copyright (c) 2006-2022 Arseny Kapoulkine | |
| 13137 * | |
| 13138 * Permission is hereby granted, free of charge, to any person | |
| 13139 * obtaining a copy of this software and associated documentation | |
| 13140 * files (the "Software"), to deal in the Software without | |
| 13141 * restriction, including without limitation the rights to use, | |
| 13142 * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 13143 * copies of the Software, and to permit persons to whom the | |
| 13144 * Software is furnished to do so, subject to the following | |
| 13145 * conditions: | |
| 13146 * | |
| 13147 * The above copyright notice and this permission notice shall be | |
| 13148 * included in all copies or substantial portions of the Software. | |
| 13149 * | |
| 13150 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| 13151 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
| 13152 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
| 13153 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
| 13154 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 13155 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
| 13156 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
| 13157 * OTHER DEALINGS IN THE SOFTWARE. | |
| 13158 */ |
