ICU 78.3 78.3
Loading...
Searching...
No Matches
regex.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2002-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: regex.h
9* encoding: UTF-8
10* indentation:4
11*
12* created on: 2002oct22
13* created by: Andy Heninger
14*
15* ICU Regular Expressions, API for C++
16*/
17
18#ifndef REGEX_H
19#define REGEX_H
20
21//#define REGEX_DEBUG
22
44
45#include "unicode/utypes.h"
46
47#if U_SHOW_CPLUSPLUS_API
48
49#if !UCONFIG_NO_REGULAR_EXPRESSIONS
50
51#include "unicode/uobject.h"
52#include "unicode/unistr.h"
53#include "unicode/utext.h"
54#include "unicode/parseerr.h"
55
56#include "unicode/uregex.h"
57
58// Forward Declarations
59
60struct UHashtable;
61
62U_NAMESPACE_BEGIN
63
64struct Regex8BitSet;
65class RegexCImpl;
66class RegexMatcher;
67class RegexPattern;
68struct REStackFrame;
69class BreakIterator;
70class UnicodeSet;
71class UVector;
72class UVector32;
73class UVector64;
74
75
87class U_I18N_API RegexPattern final : public UObject {
88public:
89
98
106
112 virtual ~RegexPattern();
113
122 bool operator==(const RegexPattern& that) const;
123
132 inline bool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
133
139 RegexPattern &operator =(const RegexPattern &source);
140
149
150
175 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
176 UParseError &pe,
177 UErrorCode &status);
178
205 static RegexPattern * U_EXPORT2 compile( UText *regex,
206 UParseError &pe,
207 UErrorCode &status);
208
233 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
234 uint32_t flags,
235 UParseError &pe,
236 UErrorCode &status);
237
264 static RegexPattern * U_EXPORT2 compile( UText *regex,
265 uint32_t flags,
266 UParseError &pe,
267 UErrorCode &status);
268
291 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
292 uint32_t flags,
293 UErrorCode &status);
294
319 static RegexPattern * U_EXPORT2 compile( UText *regex,
320 uint32_t flags,
321 UErrorCode &status);
322
328 uint32_t flags() const;
329
347 RegexMatcher *matcher(const UnicodeString &input,
348 UErrorCode &status) const;
349
350private:
363 RegexMatcher *matcher(const char16_t *input,
364 UErrorCode &status) const = delete;
365public:
366
367
379 RegexMatcher *matcher(UErrorCode &status) const;
380
381
396 static UBool U_EXPORT2 matches(const UnicodeString &regex,
397 const UnicodeString &input,
398 UParseError &pe,
399 UErrorCode &status);
400
415 static UBool U_EXPORT2 matches(UText *regex,
416 UText *input,
417 UParseError &pe,
418 UErrorCode &status);
419
429
430
441 UText *patternText(UErrorCode &status) const;
442
443
457 int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
458
459
476 int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
477
478
517 int32_t split(const UnicodeString &input,
518 UnicodeString dest[],
519 int32_t destCapacity,
520 UErrorCode &status) const;
521
522
561 int32_t split(UText *input,
562 UText *dest[],
563 int32_t destCapacity,
564 UErrorCode &status) const;
565
566
572 virtual UClassID getDynamicClassID() const override;
573
579 static UClassID U_EXPORT2 getStaticClassID();
580
581private:
582 //
583 // Implementation Data
584 //
585 UText *fPattern; // The original pattern string.
586 UnicodeString *fPatternString; // The original pattern UncodeString if relevant
587 uint32_t fFlags; // The flags used when compiling the pattern.
588 //
589 UVector64 *fCompiledPat; // The compiled pattern p-code.
590 UnicodeString fLiteralText; // Any literal string data from the pattern,
591 // after un-escaping, for use during the match.
592
593 UVector *fSets; // Any UnicodeSets referenced from the pattern.
594 Regex8BitSet *fSets8; // (and fast sets for latin-1 range.)
595
596
597 UErrorCode fDeferredStatus; // status if some prior error has left this
598 // RegexPattern in an unusable state.
599
600 int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
601 // >= this value. For some patterns, this calculated
602 // value may be less than the true shortest
603 // possible match.
604
605 int32_t fFrameSize; // Size of a state stack frame in the
606 // execution engine.
607
608 int32_t fDataSize; // The size of the data needed by the pattern that
609 // does not go on the state stack, but has just
610 // a single copy per matcher.
611
612 UVector32 *fGroupMap; // Map from capture group number to position of
613 // the group's variables in the matcher stack frame.
614
615 int32_t fStartType; // Info on how a match must start.
616 int32_t fInitialStringIdx; //
617 int32_t fInitialStringLen;
618 UnicodeSet *fInitialChars;
619 UChar32 fInitialChar;
620 Regex8BitSet *fInitialChars8;
621 UBool fNeedsAltInput;
622
623 UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
624
625 friend class RegexCompile;
626 friend class RegexMatcher;
627 friend class RegexCImpl;
628
629 //
630 // Implementation Methods
631 //
632 void init(); // Common initialization, for use by constructors.
633 bool initNamedCaptureMap(); // Lazy init for fNamedCaptureMap.
634 void zap(); // Common cleanup
635
636 void dumpOp(int32_t index) const;
637
638 public:
639#ifndef U_HIDE_INTERNAL_API
644 void dumpPattern() const;
645#endif /* U_HIDE_INTERNAL_API */
646};
647
648
649
659class U_I18N_API RegexMatcher final : public UObject {
660public:
661
675 RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
676
691 RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
692
714 uint32_t flags, UErrorCode &status);
715
737 uint32_t flags, UErrorCode &status);
738
739private:
751 RegexMatcher(const UnicodeString &regexp, const char16_t *input,
752 uint32_t flags, UErrorCode &status) = delete;
753public:
754
755
761 virtual ~RegexMatcher();
762
763
771
772
783 UBool matches(int64_t startIndex, UErrorCode &status);
784
785
800
801
815 UBool lookingAt(int64_t startIndex, UErrorCode &status);
816
817
831
832
848
858 UBool find(int64_t start, UErrorCode &status);
859
860
871
872
890 UnicodeString group(int32_t groupNum, UErrorCode &status) const;
891
897 int32_t groupCount() const;
898
899
914 UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
915
936 UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
937
945 int32_t start(UErrorCode &status) const;
946
954 int64_t start64(UErrorCode &status) const;
955
956
970 int32_t start(int32_t group, UErrorCode &status) const;
971
985 int64_t start64(int32_t group, UErrorCode &status) const;
986
1000 int32_t end(UErrorCode &status) const;
1001
1015 int64_t end64(UErrorCode &status) const;
1016
1017
1035 int32_t end(int32_t group, UErrorCode &status) const;
1036
1054 int64_t end64(int32_t group, UErrorCode &status) const;
1055
1065
1066
1082 RegexMatcher &reset(int64_t index, UErrorCode &status);
1083
1084
1103
1104
1119
1120
1146
1147private:
1160 RegexMatcher &reset(const char16_t *input) = delete;
1161public:
1162
1170 const UnicodeString &input() const;
1171
1181
1192 UText *getInput(UText *dest, UErrorCode &status) const;
1193
1194
1213 RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1214
1226 RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1227
1236 int32_t regionStart() const;
1237
1246 int64_t regionStart64() const;
1247
1248
1257 int32_t regionEnd() const;
1258
1267 int64_t regionEnd64() const;
1268
1278
1298
1299
1308
1309
1323
1324
1337 UBool hitEnd() const;
1338
1349
1350
1356 const RegexPattern &pattern() const;
1357
1358
1375 UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1376
1377
1398 UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1399
1400
1422
1423
1448 UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1449
1450
1479 const UnicodeString &replacement, UErrorCode &status);
1480
1481
1510 UText *replacement, UErrorCode &status);
1511
1512
1524
1525
1540
1541
1565 int32_t split(const UnicodeString &input,
1566 UnicodeString dest[],
1567 int32_t destCapacity,
1568 UErrorCode &status);
1569
1570
1595 UText *dest[],
1596 int32_t destCapacity,
1597 UErrorCode &status);
1598
1620 void setTimeLimit(int32_t limit, UErrorCode &status);
1621
1628 int32_t getTimeLimit() const;
1629
1651 void setStackLimit(int32_t limit, UErrorCode &status);
1652
1660 int32_t getStackLimit() const;
1661
1662
1677 const void *context,
1678 UErrorCode &status);
1679
1680
1692 const void *&context,
1693 UErrorCode &status);
1694
1695
1710 const void *context,
1711 UErrorCode &status);
1712
1713
1725 const void *&context,
1726 UErrorCode &status);
1727
1728#ifndef U_HIDE_INTERNAL_API
1734 void setTrace(UBool state);
1735#endif /* U_HIDE_INTERNAL_API */
1736
1742 static UClassID U_EXPORT2 getStaticClassID();
1743
1749 virtual UClassID getDynamicClassID() const override;
1750
1751private:
1752 // Constructors and other object boilerplate are private.
1753 // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1754 RegexMatcher() = delete; // default constructor not implemented
1755 RegexMatcher(const RegexPattern *pat);
1756 RegexMatcher(const RegexMatcher &other) = delete;
1757 RegexMatcher &operator =(const RegexMatcher &rhs) = delete;
1758 void init(UErrorCode &status); // Common initialization
1759 void init2(UText *t, UErrorCode &e); // Common initialization, part 2.
1760
1761 friend class RegexPattern;
1762 friend class RegexCImpl;
1763public:
1764#ifndef U_HIDE_INTERNAL_API
1766 void resetPreserveRegion(); // Reset matcher state, but preserve any region.
1767#endif /* U_HIDE_INTERNAL_API */
1768private:
1769
1770 //
1771 // MatchAt This is the internal interface to the match engine itself.
1772 // Match status comes back in matcher member variables.
1773 //
1774 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1775 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1776 UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
1777 UBool isUWordBoundary(int64_t pos, UErrorCode &status); // perform RBBI based \b test
1778 // Find a grapheme cluster boundary using a break iterator. For handling \X in regexes.
1779 int64_t followingGCBoundary(int64_t pos, UErrorCode &status);
1780 REStackFrame *resetStack();
1781 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1782 void IncrementTime(UErrorCode &status);
1783
1784 // Call user find callback function, if set. Return true if operation should be interrupted.
1785 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1786
1787 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1788
1789 UBool findUsingChunk(UErrorCode &status);
1790 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1791 UBool isChunkWordBoundary(int32_t pos);
1792
1793 const RegexPattern *fPattern;
1794 RegexPattern *fPatternOwned; // Non-nullptr if this matcher owns the pattern, and
1795 // should delete it when through.
1796
1797 const UnicodeString *fInput; // The string being matched. Only used for input()
1798 UText *fInputText; // The text being matched. Is never nullptr.
1799 UText *fAltInputText; // A shallow copy of the text being matched.
1800 // Only created if the pattern contains backreferences.
1801 int64_t fInputLength; // Full length of the input text.
1802 int32_t fFrameSize; // The size of a frame in the backtrack stack.
1803
1804 int64_t fRegionStart; // Start of the input region, default = 0.
1805 int64_t fRegionLimit; // End of input region, default to input.length.
1806
1807 int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $).
1808 int64_t fAnchorLimit; // See useAnchoringBounds
1809
1810 int64_t fLookStart; // Region bounds for look-ahead/behind and
1811 int64_t fLookLimit; // and other boundary tests. See
1812 // useTransparentBounds
1813
1814 int64_t fActiveStart; // Currently active bounds for matching.
1815 int64_t fActiveLimit; // Usually is the same as region, but
1816 // is changed to fLookStart/Limit when
1817 // entering look around regions.
1818
1819 UBool fTransparentBounds; // True if using transparent bounds.
1820 UBool fAnchoringBounds; // True if using anchoring bounds.
1821
1822 UBool fMatch; // True if the last attempted match was successful.
1823 int64_t fMatchStart; // Position of the start of the most recent match
1824 int64_t fMatchEnd; // First position after the end of the most recent match
1825 // Zero if no previous match, even when a region
1826 // is active.
1827 int64_t fLastMatchEnd; // First position after the end of the previous match,
1828 // or -1 if there was no previous match.
1829 int64_t fAppendPosition; // First position after the end of the previous
1830 // appendReplacement(). As described by the
1831 // JavaDoc for Java Matcher, where it is called
1832 // "append position"
1833 UBool fHitEnd; // True if the last match touched the end of input.
1834 UBool fRequireEnd; // True if the last match required end-of-input
1835 // (matched $ or Z)
1836
1837 UVector64 *fStack;
1838 REStackFrame *fFrame; // After finding a match, the last active stack frame,
1839 // which will contain the capture group results.
1840 // NOT valid while match engine is running.
1841
1842 int64_t *fData; // Data area for use by the compiled pattern.
1843 int64_t fSmallData[8]; // Use this for data if it's enough.
1844
1845 int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
1846 // match engine run. Zero for unlimited.
1847
1848 int32_t fTime; // Match time, accumulates while matching.
1849 int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
1850 // Kept separately from fTime to keep as much
1851 // code as possible out of the inline
1852 // StateSave function.
1853
1854 int32_t fStackLimit; // Maximum memory size to use for the backtrack
1855 // stack, in bytes. Zero for unlimited.
1856
1857 URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct.
1858 // nullptr if there is no callback.
1859 const void *fCallbackContext; // User Context ptr for callback function.
1860
1861 URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct.
1862 // nullptr if there is no callback.
1863 const void *fFindProgressCallbackContext; // User Context ptr for callback function.
1864
1865
1866 UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1867
1868 UBool fTraceDebug; // Set true for debug tracing of match engine.
1869
1870 UErrorCode fDeferredStatus; // Save error state that cannot be immediately
1871 // reported, or that permanently disables this matcher.
1872
1873 BreakIterator *fWordBreakItr;
1874 BreakIterator *fGCBreakItr;
1875};
1876
1877U_NAMESPACE_END
1878#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
1879
1880#endif /* U_SHOW_CPLUSPLUS_API */
1881
1882#endif
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:106
UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
void resetPreserveRegion()
int64_t regionStart64() const
Reports the start index of this matcher's region.
UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
UBool hitEnd() const
Return true if the most recent matching operation attempted to access additional input beyond the ava...
int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
UBool requireEnd() const
Return true the most recent match succeeded and additional input could cause it to fail.
int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
int32_t regionStart() const
Reports the start index of this matcher's region.
int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
UBool find(UErrorCode &status)
Find the next pattern match in the input string.
int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
RegexMatcher & reset()
Resets this matcher.
RegexMatcher(const UnicodeString &regexp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
UText * inputText() const
Returns the input string being matched.
void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual ~RegexMatcher()
Destructor.
RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
const UnicodeString & input() const
Returns the input string being matched.
UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
UBool find()
Find the next pattern match in the input string.
int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual ~RegexPattern()
Destructor.
void dumpPattern() const
Dump a compiled pattern.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
bool operator!=(const RegexPattern &that) const
Comparison operator.
Definition regex.h:132
int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static UBool matches(const UnicodeString &regex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
RegexPattern(const RegexPattern &source)
Copy Constructor.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
bool operator==(const RegexPattern &that) const
Comparison operator.
int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
RegexPattern()
default constructor.
UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
static RegexPattern * compile(const UnicodeString &regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
uint32_t flags() const
Get the URegexpFlag match mode flags that were used when compiling this pattern.
UObject is the common ICU "boilerplate" class.
Definition uobject.h:222
A mutable set of Unicode characters and multicharacter strings.
Definition uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:303
struct UHashtable UHashtable
Definition msgfmt.h:43
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition parseerr.h:58
UText struct.
Definition utext.h:1328
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:449
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:269
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition uregex.h:1576
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition uregex.h:1502
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:509
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition utypes.h:316