ICU 78.3 78.3
Loading...
Searching...
No Matches
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
28
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
34
35#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
36#include <string>
37#include <string_view>
38#include "unicode/char16ptr.h"
40#include "unicode/utf16.h"
41#endif
42
43#ifndef USET_DEFINED
44
45#ifndef U_IN_DOXYGEN
46#define USET_DEFINED
47#endif
54typedef struct USet USet;
55#endif
56
68enum {
74
102
115
129};
130
186typedef enum USetSpanCondition {
235#ifndef U_HIDE_DEPRECATED_API
241#endif // U_HIDE_DEPRECATED_API
243
244enum {
252};
253
281
282/*********************************************************************
283 * USet API
284 *********************************************************************/
285
293U_CAPI USet* U_EXPORT2
295
306U_CAPI USet* U_EXPORT2
308
318U_CAPI USet* U_EXPORT2
319uset_openPattern(const UChar* pattern, int32_t patternLength,
320 UErrorCode* ec);
321
335U_CAPI USet* U_EXPORT2
336uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
337 uint32_t options,
338 UErrorCode* ec);
339
346U_CAPI void U_EXPORT2
348
349#if U_SHOW_CPLUSPLUS_API
350
351U_NAMESPACE_BEGIN
352
363
364U_NAMESPACE_END
365
366#endif
367
377U_CAPI USet * U_EXPORT2
378uset_clone(const USet *set);
379
389U_CAPI UBool U_EXPORT2
390uset_isFrozen(const USet *set);
391
406U_CAPI void U_EXPORT2
408
419U_CAPI USet * U_EXPORT2
421
431U_CAPI void U_EXPORT2
433 UChar32 start, UChar32 end);
434
459U_CAPI int32_t U_EXPORT2
461 const UChar *pattern, int32_t patternLength,
462 uint32_t options,
463 UErrorCode *status);
464
487U_CAPI void U_EXPORT2
489 UProperty prop, int32_t value, UErrorCode* ec);
490
526U_CAPI void U_EXPORT2
528 const UChar *prop, int32_t propLength,
529 const UChar *value, int32_t valueLength,
530 UErrorCode* ec);
531
541U_CAPI UBool U_EXPORT2
542uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
543 int32_t pos);
544
560U_CAPI int32_t U_EXPORT2
562 UChar* result, int32_t resultCapacity,
563 UBool escapeUnprintable,
564 UErrorCode* ec);
565
574U_CAPI void U_EXPORT2
576
589U_CAPI void U_EXPORT2
590uset_addAll(USet* set, const USet *additionalSet);
591
601U_CAPI void U_EXPORT2
603
613U_CAPI void U_EXPORT2
614uset_addString(USet* set, const UChar* str, int32_t strLen);
615
625U_CAPI void U_EXPORT2
626uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
627
636U_CAPI void U_EXPORT2
638
648U_CAPI void U_EXPORT2
650
660U_CAPI void U_EXPORT2
661uset_removeString(USet* set, const UChar* str, int32_t strLen);
662
672U_CAPI void U_EXPORT2
673uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
674
686U_CAPI void U_EXPORT2
687uset_removeAll(USet* set, const USet* removeSet);
688
701U_CAPI void U_EXPORT2
702uset_retain(USet* set, UChar32 start, UChar32 end);
703
715U_CAPI void U_EXPORT2
716uset_retainString(USet *set, const UChar *str, int32_t length);
717
727U_CAPI void U_EXPORT2
728uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
729
742U_CAPI void U_EXPORT2
743uset_retainAll(USet* set, const USet* retain);
744
753U_CAPI void U_EXPORT2
755
769U_CAPI void U_EXPORT2
771
785U_CAPI void U_EXPORT2
787
798U_CAPI void U_EXPORT2
799uset_complementString(USet *set, const UChar *str, int32_t length);
800
810U_CAPI void U_EXPORT2
811uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
812
824U_CAPI void U_EXPORT2
825uset_complementAll(USet* set, const USet* complement);
826
834U_CAPI void U_EXPORT2
836
865U_CAPI void U_EXPORT2
866uset_closeOver(USet* set, int32_t attributes);
867
874U_CAPI void U_EXPORT2
876
884U_CAPI UBool U_EXPORT2
885uset_isEmpty(const USet* set);
886
892U_CAPI UBool U_EXPORT2
894
903U_CAPI UBool U_EXPORT2
905
915U_CAPI UBool U_EXPORT2
916uset_containsRange(const USet* set, UChar32 start, UChar32 end);
917
926U_CAPI UBool U_EXPORT2
927uset_containsString(const USet* set, const UChar* str, int32_t strLen);
928
939U_CAPI int32_t U_EXPORT2
940uset_indexOf(const USet* set, UChar32 c);
941
957U_CAPI UChar32 U_EXPORT2
958uset_charAt(const USet* set, int32_t charIndex);
959
975U_CAPI int32_t U_EXPORT2
976uset_size(const USet* set);
977
987U_CAPI int32_t U_EXPORT2
989
998U_CAPI int32_t U_EXPORT2
1000
1013U_CAPI const UChar* U_EXPORT2
1014uset_getString(const USet *set, int32_t index, int32_t *pLength);
1015
1026U_CAPI int32_t U_EXPORT2
1028
1059U_CAPI int32_t U_EXPORT2
1060uset_getItem(const USet* set, int32_t itemIndex,
1061 UChar32* start, UChar32* end,
1062 UChar* str, int32_t strCapacity,
1063 UErrorCode* ec);
1064
1073U_CAPI UBool U_EXPORT2
1074uset_containsAll(const USet* set1, const USet* set2);
1075
1086U_CAPI UBool U_EXPORT2
1087uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1088
1097U_CAPI UBool U_EXPORT2
1098uset_containsNone(const USet* set1, const USet* set2);
1099
1108U_CAPI UBool U_EXPORT2
1109uset_containsSome(const USet* set1, const USet* set2);
1110
1130U_CAPI int32_t U_EXPORT2
1131uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1132
1151U_CAPI int32_t U_EXPORT2
1152uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1153
1173U_CAPI int32_t U_EXPORT2
1174uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1175
1194U_CAPI int32_t U_EXPORT2
1195uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1196
1205U_CAPI UBool U_EXPORT2
1206uset_equals(const USet* set1, const USet* set2);
1207
1208/*********************************************************************
1209 * Serialized set API
1210 *********************************************************************/
1211
1261U_CAPI int32_t U_EXPORT2
1262uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1263
1272U_CAPI UBool U_EXPORT2
1273uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1274
1282U_CAPI void U_EXPORT2
1284
1293U_CAPI UBool U_EXPORT2
1295
1305U_CAPI int32_t U_EXPORT2
1307
1321U_CAPI UBool U_EXPORT2
1322uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1323 UChar32* pStart, UChar32* pEnd);
1324
1325#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1326
1327namespace U_HEADER_ONLY_NAMESPACE {
1328
1329// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1330// not intended to be used via export from the ICU DLL.
1331
1337public:
1340
1342 bool operator==(const USetCodePointIterator &other) const {
1343 // No need to compare rangeCount & end given private constructor
1344 // and assuming we don't compare iterators across the set being modified.
1345 // And comparing rangeIndex is redundant with comparing c.
1346 // We might even skip comparing uset.
1347 // Unless we want operator==() to be "correct" for more than iteration.
1348 return uset == other.uset && c == other.c;
1349 }
1350
1352 bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1353
1355 UChar32 operator*() const { return c; }
1356
1362 if (c < end) {
1363 ++c;
1364 } else if (rangeIndex < rangeCount) {
1365 UErrorCode errorCode = U_ZERO_ERROR;
1366 int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1367 if (U_SUCCESS(errorCode) && result == 0) {
1368 ++rangeIndex;
1369 } else {
1370 c = end = U_SENTINEL;
1371 }
1372 } else {
1373 c = end = U_SENTINEL;
1374 }
1375 return *this;
1376 }
1377
1383 USetCodePointIterator result(*this);
1384 operator++();
1385 return result;
1386 }
1387
1388private:
1389 friend class USetCodePoints;
1390
1391 USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1392 : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
1393 c(U_SENTINEL), end(U_SENTINEL) {
1394 // Fetch the first range.
1395 operator++();
1396 }
1397
1398 const USet *uset;
1399 int32_t rangeIndex;
1400 int32_t rangeCount;
1401 UChar32 c, end;
1402};
1403
1423public:
1428 USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1429
1431 USetCodePoints(const USetCodePoints &other) = default;
1432
1435 return USetCodePointIterator(uset, 0, rangeCount);
1436 }
1437
1440 return USetCodePointIterator(uset, rangeCount, rangeCount);
1441 }
1442
1443private:
1444 const USet *uset;
1445 int32_t rangeCount;
1446};
1447
1457 struct iterator {
1459 iterator(UChar32 aC) : c(aC) {}
1460
1462 bool operator==(const iterator &other) const { return c == other.c; }
1464 bool operator!=(const iterator &other) const { return !operator==(other); }
1465
1467 UChar32 operator*() const { return c; }
1468
1474 ++c;
1475 return *this;
1476 }
1477
1483 return c++;
1484 }
1485
1491 };
1492
1496 CodePointRange(const CodePointRange &other) = default;
1498 size_t size() const { return (rangeEnd + 1) - rangeStart; }
1500 iterator begin() const { return rangeStart; }
1502 iterator end() const { return rangeEnd + 1; }
1503
1514};
1515
1521public:
1523 USetRangeIterator(const USetRangeIterator &other) = default;
1524
1526 bool operator==(const USetRangeIterator &other) const {
1527 // No need to compare rangeCount given private constructor
1528 // and assuming we don't compare iterators across the set being modified.
1529 // We might even skip comparing uset.
1530 // Unless we want operator==() to be "correct" for more than iteration.
1531 return uset == other.uset && rangeIndex == other.rangeIndex;
1532 }
1533
1535 bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1536
1539 if (rangeIndex < rangeCount) {
1540 UChar32 start, end;
1541 UErrorCode errorCode = U_ZERO_ERROR;
1542 int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1543 if (U_SUCCESS(errorCode) && result == 0) {
1544 return CodePointRange(start, end);
1545 }
1546 }
1548 }
1549
1555 ++rangeIndex;
1556 return *this;
1557 }
1558
1564 USetRangeIterator result(*this);
1565 ++rangeIndex;
1566 return result;
1567 }
1568
1569private:
1570 friend class USetRanges;
1571
1572 USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1573 : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}
1574
1575 const USet *uset;
1576 int32_t rangeIndex;
1577 int32_t rangeCount;
1578};
1579
1604public:
1609 USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1610
1612 USetRanges(const USetRanges &other) = default;
1613
1616 return USetRangeIterator(uset, 0, rangeCount);
1617 }
1618
1621 return USetRangeIterator(uset, rangeCount, rangeCount);
1622 }
1623
1624private:
1625 const USet *uset;
1626 int32_t rangeCount;
1627};
1628
1634public:
1637
1639 bool operator==(const USetStringIterator &other) const {
1640 // No need to compare count given private constructor
1641 // and assuming we don't compare iterators across the set being modified.
1642 // We might even skip comparing uset.
1643 // Unless we want operator==() to be "correct" for more than iteration.
1644 return uset == other.uset && index == other.index;
1645 }
1646
1648 bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1649
1651 std::u16string_view operator*() const {
1652 if (index < count) {
1653 int32_t length;
1654 const UChar *uchars = uset_getString(uset, index, &length);
1655 // assert uchars != nullptr;
1656 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1657 }
1658 return {};
1659 }
1660
1666 ++index;
1667 return *this;
1668 }
1669
1675 USetStringIterator result(*this);
1676 ++index;
1677 return result;
1678 }
1679
1680private:
1681 friend class USetStrings;
1682
1683 USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
1684 : uset(pUset), index(nIndex), count(nCount) {}
1685
1686 const USet *uset;
1687 int32_t index;
1688 int32_t count;
1689};
1690
1714public:
1719 USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
1720
1722 USetStrings(const USetStrings &other) = default;
1723
1726 return USetStringIterator(uset, 0, count);
1727 }
1728
1731 return USetStringIterator(uset, count, count);
1732 }
1733
1734private:
1735 const USet *uset;
1736 int32_t count;
1737};
1738
1739#ifndef U_HIDE_DRAFT_API
1745public:
1748
1750 bool operator==(const USetElementIterator &other) const {
1751 // No need to compare rangeCount & end given private constructor
1752 // and assuming we don't compare iterators across the set being modified.
1753 // We might even skip comparing uset.
1754 // Unless we want operator==() to be "correct" for more than iteration.
1755 return uset == other.uset && c == other.c && index == other.index;
1756 }
1757
1759 bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1760
1762 std::u16string operator*() const {
1763 if (c >= 0) {
1764 return c <= 0xffff ?
1765 std::u16string({static_cast<char16_t>(c)}) :
1766 std::u16string({U16_LEAD(c), U16_TRAIL(c)});
1767 } else if (index < totalCount) {
1768 int32_t length;
1769 const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1770 // assert uchars != nullptr;
1771 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1772 } else {
1773 return {};
1774 }
1775 }
1776
1782 if (c < end) {
1783 ++c;
1784 } else if (index < rangeCount) {
1785 UErrorCode errorCode = U_ZERO_ERROR;
1786 int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1787 if (U_SUCCESS(errorCode) && result == 0) {
1788 ++index;
1789 } else {
1790 c = end = U_SENTINEL;
1791 }
1792 } else if (c >= 0) {
1793 // assert index == rangeCount;
1794 // Switch from the last range to the first string.
1795 c = end = U_SENTINEL;
1796 } else {
1797 ++index;
1798 }
1799 return *this;
1800 }
1801
1807 USetElementIterator result(*this);
1808 operator++();
1809 return result;
1810 }
1811
1812private:
1813 friend class USetElements;
1814
1815 USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
1816 : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
1817 c(U_SENTINEL), end(U_SENTINEL) {
1818 if (index < rangeCount) {
1819 // Fetch the first range.
1820 operator++();
1821 }
1822 // Otherwise don't move beyond the (index - rangeCount)-th string.
1823 }
1824
1825 const USet *uset;
1826 int32_t index;
1828 int32_t rangeCount;
1838 int32_t totalCount;
1839 UChar32 c, end;
1840};
1841
1870public:
1875 USetElements(const USet *pUset)
1876 : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
1877 stringCount(uset_getStringCount(pUset)) {}
1878
1880 USetElements(const USetElements &other) = default;
1881
1884 return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1885 }
1886
1889 return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1890 }
1891
1892private:
1893 const USet *uset;
1894 int32_t rangeCount, stringCount;
1895};
1896
1897#endif // U_HIDE_DRAFT_API
1898
1899} // namespace U_HEADER_ONLY_NAMESPACE
1900
1901#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1902
1903#endif // __USET_H__
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
"Smart pointer" class, closes a USet via uset_close().
Iterator returned by USetCodePoints.
Definition uset.h:1336
bool operator==(const USetCodePointIterator &other) const
Definition uset.h:1342
USetCodePointIterator & operator++()
Pre-increment.
Definition uset.h:1361
bool operator!=(const USetCodePointIterator &other) const
Definition uset.h:1352
USetCodePointIterator operator++(int)
Post-increment.
Definition uset.h:1382
USetCodePointIterator(const USetCodePointIterator &other)=default
C++ "range" for iterating over the code points of a USet.
Definition uset.h:1422
USetCodePoints(const USetCodePoints &other)=default
USetCodePointIterator end() const
Definition uset.h:1439
USetCodePoints(const USet *pUset)
Constructs a C++ "range" object over the code points of the USet.
Definition uset.h:1428
USetCodePointIterator begin() const
Definition uset.h:1434
Iterator returned by USetElements.
Definition uset.h:1744
USetElementIterator operator++(int)
Post-increment.
Definition uset.h:1806
bool operator==(const USetElementIterator &other) const
Definition uset.h:1750
bool operator!=(const USetElementIterator &other) const
Definition uset.h:1759
USetElementIterator & operator++()
Pre-increment.
Definition uset.h:1781
USetElementIterator(const USetElementIterator &other)=default
A C++ "range" for iterating over all of the elements of a USet.
Definition uset.h:1869
USetElements(const USetElements &other)=default
USetElementIterator end() const
Definition uset.h:1888
USetElements(const USet *pUset)
Constructs a C++ "range" object over all of the elements of the USet.
Definition uset.h:1875
USetElementIterator begin() const
Definition uset.h:1883
Iterator returned by USetRanges.
Definition uset.h:1520
USetRangeIterator & operator++()
Pre-increment.
Definition uset.h:1554
CodePointRange operator*() const
Definition uset.h:1538
bool operator==(const USetRangeIterator &other) const
Definition uset.h:1526
USetRangeIterator operator++(int)
Post-increment.
Definition uset.h:1563
bool operator!=(const USetRangeIterator &other) const
Definition uset.h:1535
USetRangeIterator(const USetRangeIterator &other)=default
C++ "range" for iterating over the code point ranges of a USet.
Definition uset.h:1603
USetRangeIterator end() const
Definition uset.h:1620
USetRangeIterator begin() const
Definition uset.h:1615
USetRanges(const USet *pUset)
Constructs a C++ "range" object over the code point ranges of the USet.
Definition uset.h:1609
USetRanges(const USetRanges &other)=default
Iterator returned by USetStrings.
Definition uset.h:1633
USetStringIterator & operator++()
Pre-increment.
Definition uset.h:1665
USetStringIterator(const USetStringIterator &other)=default
bool operator!=(const USetStringIterator &other) const
Definition uset.h:1648
std::u16string_view operator*() const
Definition uset.h:1651
USetStringIterator operator++(int)
Post-increment.
Definition uset.h:1674
bool operator==(const USetStringIterator &other) const
Definition uset.h:1639
C++ "range" for iterating over the empty and multi-character strings of a USet.
Definition uset.h:1713
USetStrings(const USetStrings &other)=default
USetStringIterator begin() const
Definition uset.h:1725
USetStringIterator end() const
Definition uset.h:1730
USetStrings(const USet *pUset)
Constructs a C++ "range" object over the strings of the USet.
Definition uset.h:1719
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:259
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:279
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:269
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:264
int32_t length
The total length of the array.
Definition uset.h:274
bool operator!=(const iterator &other) const
Definition uset.h:1464
UChar32 c
The current code point in the range.
Definition uset.h:1490
iterator & operator++()
Pre-increment.
Definition uset.h:1473
iterator operator++(int)
Post-increment.
Definition uset.h:1482
bool operator==(const iterator &other) const
Definition uset.h:1462
A contiguous range of code points in a USet/UnicodeSet.
Definition uset.h:1455
CodePointRange(UChar32 start, UChar32 end)
Definition uset.h:1494
UChar32 rangeEnd
Inclusive end of a USet/UnicodeSet range of code points.
Definition uset.h:1513
CodePointRange(const CodePointRange &other)=default
UChar32 rangeStart
Start of a USet/UnicodeSet range of code points.
Definition uset.h:1508
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:196
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:400
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:449
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition umachine.h:469
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI int32_t uset_getStringCount(const USet *set)
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:73
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition uset.h:114
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:101
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:128
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:251
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:186
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:199
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:214
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:240
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:234
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:54
U_CAPI const UChar * uset_getString(const USet *set, int32_t index, int32_t *pLength)
Returns the index-th string (empty or multi-character) in the set.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
C API: 16-bit Unicode handling macros.
#define U16_TRAIL(supplementary)
Get the trail surrogate (0xdc00..0xdfff) for a supplementary code point (0x10000.....
Definition utf16.h:132
#define U16_LEAD(supplementary)
Get the lead surrogate (0xd800..0xdbff) for a supplementary code point (0x10000..0x10ffff).
Definition utf16.h:123
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:509
@ U_ZERO_ERROR
No error, no warning.
Definition utypes.h:544
#define U_SUCCESS(x)
Does the error code indicate success?
Definition utypes.h:822