From xemacs-m  Fri Feb 28 06:30:24 1997
Received: from pentagana.sonic.jp (root@tokyo-01-044.gol.com [202.243.51.44])
	by xemacs.org (8.8.5/8.8.5) with ESMTP id GAA02684
	for <xemacs-beta@xemacs.org>; Fri, 28 Feb 1997 06:29:49 -0600 (CST)
Received: (from jhod@localhost) by pentagana.sonic.jp (8.7.1+2.6Wbeta4/3.4W3) id VAA07074; Fri, 28 Feb 1997 21:24:34 +0900
Date: Fri, 28 Feb 1997 21:24:34 +0900
Message-Id: <199702281224.VAA07074@pentagana.sonic.jp>
From: P E Jareth Hein <jhod@po.iijnet.or.jp>
To: XEmacs Beta Mailing List <xemacs-beta@xemacs.org>
Subject: Patches to enable categories
Mime-Version: 1.0 (generated by tm-edit 7.105)
Content-Type: multipart/mixed;
 boundary="Multipart_Fri_Feb_28_21:24:32_1997-1"
Content-Transfer-Encoding: 7bit

--Multipart_Fri_Feb_28_21:24:32_1997-1
Content-Type: text/plain; charset=ISO-2022-JP

Well, after re-writing map-char-table and several other functions to
support my idea of how to get categories to work properly, it suddenly
occured to me how to make the old version work. It was much simpler,
and elegant, and would involve MUCH fewer code changes. Sheesh. That
ought to learn me about not always trying to buck the system... 

Anywho, here are some patches to fix category support under Mule, and
add category regexps. These are needed for some of egg's features, as
well as stuff for Japanese line breaking (called kinsoku '$B6XB'(B') for
auto-fill, etc. I'll get the kinsoku stuff done this weekend I hope.

--Jareth


--Multipart_Fri_Feb_28_21:24:32_1997-1
Content-Type: application/octet-stream; type=patch
Content-Disposition: attachment; filename="category.patch"
Content-Transfer-Encoding: 7bit

Index: lisp/mule/mule-category.el
===================================================================
RCS file: /usr/local/CVS/XEmacs/lisp/mule/mule-category.el,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 mule-category.el
--- mule-category.el	1996/12/21 20:50:23	1.1.1.1
+++ mule-category.el	1997/02/28 09:11:42
@@ -29,6 +29,7 @@
 
 ;; Written by Ben Wing <wing@666.com>.  The initialization code
 ;; at the end of this file comes from Mule.
+;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp>
 
 ;;; Code:
 
@@ -93,13 +94,14 @@
       (put-char-table char-range nil table))
   (map-char-table
    #'(lambda (key value)
-       ;; make sure that this range has a bit-vector assigned to it,
-       ;; and set the appropriate bit in that vector.
+       ;; make sure that this range has a bit-vector assigned to it
        (if (not (bit-vector-p value))
-	   (progn
-	     (setq value (make-bit-vector 95 0))
-	     (put-char-table key value table)))
-       (aset value (- designator 32) 1))
+	   (setq value (make-bit-vector 95 0))
+	 (setq value (copy-sequence value)))
+       ;; set the appropriate bit in that vector.
+       (aset value (- designator 32) 1)
+       ;; put the vector back, thus assuring we have a unique setting for this range
+       (put-char-table key value table))
    table char-range))
 
 (defun char-category-list (char &optional table)
@@ -246,7 +248,8 @@
     (setq i (1+ i)))
   (setq l predefined-category-list)
   (while l
-    (if (nth 2 (car l))
+    (if (and (nth 2 (car l))
+	     (not (defined-category-p (nth 2 (car l)))))
 	(define-category (nth 1 (car l)) (nth 2 (car l))))
     (modify-category-entry (car (car l)) (nth 1 (car l)))
     (setq l (cdr l))))
Index: src/chartab.c
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/chartab.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 chartab.c
--- chartab.c	1996/12/18 22:44:08	1.1.1.1
+++ chartab.c	1997/02/28 09:11:43
@@ -29,6 +29,8 @@
 
    Ben Wing: wrote, for 19.13 (Mule).  Some category table stuff
              loosely based on the original Mule.
+   Jareth Hein: fixed a couple of bugs in the implementation, and
+   	     added regex support for categories with check_category_at
  */
 
 #include <config.h>
@@ -786,24 +788,16 @@
 
 #endif /* MULE */
 
-DEFUN ("get-char-table", Fget_char_table, 2, 2, 0, /*
-Find value for char CH in TABLE.
-*/
-       (ch, table))
+static Lisp_Object
+get_char_table (Emchar ch, struct Lisp_Char_Table *ct) 
 {
-  struct Lisp_Char_Table *ct;
-
-  CHECK_CHAR_TABLE (table);
-  ct = XCHAR_TABLE (table);
-  CHECK_CHAR_COERCE_INT (ch);
-
 #ifdef MULE
   {
     Lisp_Object charset;
     int byte1, byte2;
     Lisp_Object val;
     
-    BREAKUP_CHAR (XCHAR (ch), charset, byte1, byte2);
+    BREAKUP_CHAR (ch, charset, byte1, byte2);
     
     if (EQ (charset, Vcharset_ascii))
       val = ct->ascii[byte1];
@@ -830,10 +824,27 @@
     return val;
   }
 #else /* not MULE */
-  return ct->ascii[(unsigned char) XCHAR (ch)];
+  return ct->ascii[(unsigned char)ch];
 #endif /* not MULE */
 }
 
+
+DEFUN ("get-char-table", Fget_char_table, 2, 2, 0, /*
+Find value for char CH in TABLE.
+*/
+       (ch, table))
+{
+  struct Lisp_Char_Table *ct;
+  Emchar chr;
+  
+  CHECK_CHAR_TABLE (table);
+  ct = XCHAR_TABLE (table);
+  CHECK_CHAR_COERCE_INT (ch);
+  chr = XCHAR(ch);
+  
+  return (get_char_table (chr, ct));
+}
+
 DEFUN ("get-range-char-table", Fget_range_char_table, 2, 3, 0, /*
 Find value for a range in TABLE.
 If there is more than one value, return MULTI (defaults to nil).
@@ -1386,7 +1397,7 @@
 
     case CHARTAB_RANGE_ROW:
       {
-	Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset)];
+	Lisp_Object val = ct->level1[XCHARSET_LEADING_BYTE (range->charset) - MIN_LEADING_BYTE];
 	if (!CHAR_TABLE_ENTRYP (val))
 	  {
 	    struct chartab_range rainj;
@@ -1491,6 +1502,7 @@
   return slarg.retval;
 }
 
+
 
 /************************************************************************/
 /*                         Char table read syntax                       */
@@ -1605,7 +1617,7 @@
 character (including space) in the ASCII charset.  Each category
 is designated by one such character, called a \"category designator\".
 They are specified in a regexp using the syntax \"\\cX\", where X is
-a category designator. (This is not yet implemented.)
+a category designator.
 
 A category table specifies, for each character, the categories that
 the character is in.  Note that a character can be in more than one
@@ -1634,6 +1646,44 @@
   return (obj);
 }   
 
+int
+check_category_at(Emchar ch, Lisp_Object table,
+		  unsigned int designator, unsigned int not)
+{
+  register Lisp_Object temp;
+  struct Lisp_Char_Table *ctbl;  
+#if 1 /* ifdef ERROR_CHECK_TYPECHECK */
+  if (NILP (Fcategory_table_p (table)))
+    signal_simple_error("Expected category table", table);
+#endif
+  ctbl = XCHAR_TABLE(table);
+  temp = get_char_table(ch, ctbl);
+  if (EQ (temp, Qnil)) return not;
+  
+  designator -= ' ';
+  return (bit_vector_bit(XBIT_VECTOR (temp), designator) ? !not : not);
+}
+
+DEFUN ("check-category-at", Fcheck_category_at, 2, 4, 0, /*
+Return t if category of a character at POS includes DESIGNATIOR,
+else return nil. Optional third arg specifies which buffer
+(defaulting to current), and fourth specifies the CATEGORY-TABLE,
+(defaulting to the buffer's category table).
+*/
+       (pos, designator, buffer, category_table))
+{
+  Lisp_Object ctbl;
+  Emchar ch;
+  unsigned int des;
+  CHECK_INT (pos);
+  CHECK_CATEGORY_DESIGNATOR (designator);
+  des = XREALINT(designator);
+  ctbl = check_category_table (category_table, Vstandard_category_table);
+  ch = BUF_FETCH_CHAR (decode_buffer(buffer, 0), pos);
+  return (check_category_at(ch, ctbl, des, 0)
+	  ? Qt : Qnil);
+}
+
 DEFUN ("category-table", Fcategory_table, 0, 1, 0, /*
 Return the current category table.
 This is the one specified by the current buffer, or by BUFFER if it
@@ -1737,7 +1787,7 @@
   DEFSUBR (Fstandard_category_table);
   DEFSUBR (Fcopy_category_table);
   DEFSUBR (Fset_category_table);
-
+  DEFSUBR (Fcheck_category_at);
   DEFSUBR (Fcategory_designator_p);
   DEFSUBR (Fcategory_table_value_p);
 #endif /* MULE */
Index: src/chartab.h
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/chartab.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 chartab.h
--- chartab.h	1996/12/18 22:44:08	1.1.1.1
+++ chartab.h	1997/02/28 09:11:43
@@ -192,6 +192,8 @@
 
 
 #ifdef MULE
+int check_category_at(Emchar ch, Lisp_Object ctbl,
+		      unsigned int designator, unsigned int not);
 
 extern Lisp_Object Qcategory_table_p, Qcategory_designator_p;
 extern Lisp_Object Qcategory_table_value_p;
Index: src/frame-x.c
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/frame-x.c,v
retrieving revision 1.2
diff -u -r1.2 frame-x.c
--- frame-x.c	1997/02/28 08:58:56	1.2
+++ frame-x.c	1997/02/28 09:27:55
@@ -2287,7 +2287,7 @@
 			     XtWindow (FRAME_X_SHELL_WIDGET (f)),
 			     &xwa))
     result = 0;
-#ifdef 0
+#if 0
   /* This is wrong. Under XEmacs visible means "Mapped" not really visible.
      Under virtual window managers this causes freezes. JV
 
Index: src/regex.c
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/regex.c,v
retrieving revision 1.2
diff -u -r1.2 regex.c
--- regex.c	1997/02/28 08:59:08	1.2
+++ regex.c	1997/02/28 09:11:43
@@ -534,6 +534,7 @@
 
 	/* Matches any character whose syntax is not that specified.  */
   notsyntaxspec
+
 #endif /* emacs */
 
 #ifdef MULE
@@ -547,6 +548,11 @@
 
   charset_mule_not   /* Same parameters as charset_mule, but match any
 			character that is not one of those specified.  */
+
+  /* 97/2/17 jhod: The following two were merged back in from the Mule
+     2.3 code to enable some language specific processing */
+  ,categoryspec,     /* Matches entries in the character category tables */
+  notcategoryspec    /* The opposite of the above */
 #endif
        
 } re_opcode_t;
@@ -919,6 +925,22 @@
 	  mcnt = *p++;
 	  printf ("/%d", mcnt);
 	  break;
+	  
+#ifdef MULE
+/* 97/2/17 jhod Mule category patch */  
+	case categoryspec:
+	  printf ("/categoryspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+
+	case notcategoryspec:
+	  printf ("/notcategoryspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+/* end of category patch */  
+#endif /* MULE */
 #endif /* emacs */
 
 	case wordchar:
@@ -972,6 +994,7 @@
   printf ("not_eol: %d\t", bufp->not_eol);
   printf ("syntax: %d\n", bufp->syntax);
   /* Perhaps we should print the translate table?  */
+  /* and maybe the category table? */
 }
 
 
@@ -1065,6 +1088,7 @@
 #endif
 #ifdef MULE
     "Ranges may not span charsets",		/* REG_ERANGESPAN */
+    "Invalid category designator",		/* REG_ECATEGORY */
 #endif
   };
 
@@ -2835,6 +2859,26 @@
 		FREE_STACK_RETURN (REG_ESYNTAX);
               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
               break;
+
+#ifdef MULE
+/* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
+	    case 'c':	
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      if (c < 32 || c > 127)
+		FREE_STACK_RETURN (REG_ECATEGORY);
+	      BUF_PUSH_2 (categoryspec, c);
+	      break;
+
+	    case 'C':
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      if (c < 32 || c > 127)
+		FREE_STACK_RETURN (REG_ECATEGORY);
+	      BUF_PUSH_2 (notcategoryspec, c);
+	      break;
+/* end of category patch */
+#endif /* MULE */
 #endif /* emacs */
 
 
@@ -3588,6 +3632,14 @@
 #endif /* ! MULE */
 	  break;
 
+#ifdef MULE
+/* 97/2/17 jhod category patch */
+	case categoryspec:
+	case notcategoryspec:
+	  bufp->can_be_null = 1;
+	  return;
+/* end if category patch */
+#endif /* MULE */
 
       /* All cases after this match the empty string.  These end with
          `continue'.  */
@@ -5466,6 +5518,30 @@
 	  should_succeed = 0;
 	  goto matchornotsyntax;
 
+#ifdef MULE
+/* 97/2/17 jhod Mule category code patch */
+	case categoryspec:
+	  should_succeed = 1;
+        matchornotcategory:
+	  {
+	    Emchar emch;
+
+	    mcnt = *p++;
+	    PREFETCH ();
+	    emch = charptr_emchar ((CONST Bufbyte *) d);
+	    INC_CHARPTR (d);
+	    if (check_category_at(emch, regex_emacs_buffer->category_table,
+				  mcnt, should_succeed))
+	      goto fail;
+	    SET_REGS_MATCHED ();
+	  }
+	  break;
+	  
+	case notcategoryspec:
+	  should_succeed = 0;
+	  goto matchornotcategory;
+/* end of category patch */
+#endif /* MULE */
 #else /* not emacs */
 	case wordchar:
           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
Index: src/regex.h
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/regex.h,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 regex.h
--- regex.h	1996/12/18 22:44:03	1.1.1.1
+++ regex.h	1997/02/28 09:11:43
@@ -279,6 +279,7 @@
 #endif
 #ifdef MULE
   ,REG_ERANGESPAN	/* Ranges may not span charsets. */
+  ,REG_ECATEGORY	/* Invalid category designator */
 #endif
 } reg_errcode_t;
 
Index: src/search.c
===================================================================
RCS file: /usr/local/CVS/XEmacs/src/search.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 search.c
--- search.c	1996/12/18 22:44:03	1.1.1.1
+++ search.c	1997/02/28 09:11:43
@@ -1027,6 +1027,10 @@
 	    case '|': case '(': case ')': case '`': case '\'': case 'b':
 	    case 'B': case '<': case '>': case 'w': case 'W': case 's':
 	    case 'S': case '=':
+#ifdef MULE
+	    /* 97/2/25 jhod Added for category matches */
+	    case 'c': case 'C':
+#endif /* MULE */
 	    case '1': case '2': case '3': case '4': case '5':
 	    case '6': case '7': case '8': case '9':
 	      return 0;

--Multipart_Fri_Feb_28_21:24:32_1997-1--

