    # file: sent.txt         Sentence Boundary Rules.
    #
    

    # Separators are line or paragraph ends that will attach to the end of sentences.
    $Sep    =[\n \r \u0085 \u2028 \u2029];
    $SepSeq = $Sep | \u000d\u000a;
    $Sp    = [[:Zs:] - $Sep];
    
    # $ATerm contains ambiguous terminators, characters that may or may not terminate 
    #        sentence depending on the context.
    # $Term  contains $ATerm + all characters that unambiguously end sentences.
    #
    $ATerm = [\u002e \u0589 \u3001];   # same as Terminal_Punctuation2 from TR29
    $Term  = [$ATerm \u0021 \u003f \u037e \u061f \u06d4 \u203c \u203d
			     \u3002 \u2048 \u2049
			     \u0964];      # TODO:  these (this line) not yet decided in TR29.
		
    $Lower     = [[:Ll:] [:Sk:]];
    $Upper     = [[:Lu:] [:Lt:]];
    $NotLetter = [^[:L:] $Term];
    $Open      = [:Ps:];
    $Close     = [[:Pe:] \" \'];
    
    #
    #  Combining chars.   Copied from UNIDATA/DerivedCoreProperties.txt
    #
    $Extend     = 
    	[\u0300-\u034E \u0360-\u036F \u0483-\u0486 \u0488-\u0489 \u0591-\u05A1 \u05A3-\u05B9
    	\u05BB-\u05BD \u05BF   \u05C1-\u05C2 \u05C4   \u064B-\u0655 \u0670   \u06D6-\u06DC
    	\u06DE   \u06DF-\u06E4 \u06E7-\u06E8 \u06EA-\u06ED \u0711   \u0730-\u074A
    	\u07A6-\u07B0 \u0901-\u0902 \u0903   \u093C   \u093E-\u0940 \u0941-\u0948
    	\u0949-\u094C \u0951-\u0954 \u0962-\u0963 \u0981   \u0982-\u0983 \u09BC
    	\u09BE-\u09C0 \u09C1-\u09C4 \u09C7-\u09C8 \u09CB-\u09CC \u09D7   \u09E2-\u09E3
    	\u0A02   \u0A3C   \u0A3E-\u0A40 \u0A41-\u0A42 \u0A47-\u0A48 \u0A4B-\u0A4C
    	\u0A70-\u0A71 \u0A81-\u0A82 \u0A83   \u0ABC   \u0ABE-\u0AC0 \u0AC1-\u0AC5
    	\u0AC7-\u0AC8 \u0AC9   \u0ACB-\u0ACC \u0B01   \u0B02-\u0B03 \u0B3C   \u0B3E
    	\u0B3F   \u0B40   \u0B41-\u0B43 \u0B47-\u0B48 \u0B4B-\u0B4C \u0B56   \u0B57
    	\u0B82   \u0BBE-\u0BBF \u0BC0   \u0BC1-\u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0BD7
    	\u0C01-\u0C03 \u0C3E-\u0C40 \u0C41-\u0C44 \u0C46-\u0C48 \u0C4A-\u0C4C
    	\u0C55-\u0C56 \u0C82-\u0C83 \u0CBE   \u0CBF   \u0CC0-\u0CC4 \u0CC6
    	\u0CC7-\u0CC8 \u0CCA-\u0CCB \u0CCC   \u0CD5-\u0CD6 \u0D02-\u0D03 \u0D3E-\u0D40
    	\u0D41-\u0D43 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D57   \u0D82-\u0D83 \u0DCF-\u0DD1
    	\u0DD2-\u0DD4 \u0DD6   \u0DD8-\u0DDF \u0DF2-\u0DF3 \u0E31   \u0E34-\u0E39
    	\u0E47-\u0E4E \u0EB1   \u0EB4-\u0EB9 \u0EBB-\u0EBC \u0EC8-\u0ECD \u0F18-\u0F19
    	\u0F35   \u0F37   \u0F39   \u0F3E-\u0F3F \u0F71-\u0F7E \u0F7F   \u0F80-\u0F84
    	\u0F86-\u0F87 \u0F90-\u0F97 \u0F99-\u0FBC \u0FC6   \u102C   \u102D-\u1030 \u1031
    	\u1032   \u1036-\u1037 \u1038   \u1056-\u1057 \u1058-\u1059 \u1712-\u1714
    	\u1732-\u1734 \u1752-\u1753 \u1772-\u1773 \u17B4-\u17B6 \u17B7-\u17BD
    	\u17BE-\u17C5 \u17C6   \u17C7-\u17C8 \u17C9-\u17D1 \u17D3   \u180B-\u180D
    	\u18A9   \u20D0-\u20DC \u20DD-\u20E0 \u20E1   \u20E2-\u20E4 \u20E5-\u20EA
    	\u302A-\u302F \u3099-\u309A \uFB1E   \uFE00-\uFE0F \uFE20-\uFE23 \uFF9E-\uFF9F
    	\U0001D165-\U0001D166 \U0001D167-\U0001D169 \U0001D16D-\U0001D172 
    	\U0001D17B-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD];


    $EndSequence       = [^$Term]* $Term ($Close | $Term | $Extend)* $Sp* $SepSeq?;
    $LowerWordFollows  = [^$Term]* $ATerm $Close* $Sp* $SepSeq? $NotLetter* $Lower;
    $UpperWordPrecedes = [^$Term]* $Upper ($Lower | $Extend)* $ATerm $Close* $Sp* $SepSeq?;

    
    ($LowerWordFollows | $UpperWordPrecedes)*  $EndSequence;
    
    #
    # In cases where the input text ends without a normal end-of-sentence sequence,
    #   this rule will match whatever text is there.
    #
    [^$Term]*;
     
     
     #
     #  Reverse Rules
     #
     $RevEndSequence           = [^$Term]* ($Term | $Close | $Extend)* [^$Term]*;
     $ReverseLowerWordFollows  = $Lower ($Close | $Sp | $Sep | $Extend | $NotLetter)* $ATerm [^$Term]*;
     $ReverseUpperWordPrecedes = $ATerm ($Lower | $Extend)* $Upper  [^$Term]*;
     
     ! $RevEndSequence? ($ReverseLowerWordFollows | $ReverseUpperWordPrecedes)* $Term?;
     !.;
 
