From xemacs-m  Sat Jun 28 14:50:05 1997
Received: from jagor.srce.hr (hniksic@jagor.srce.hr [161.53.2.130])
	by xemacs.org (8.8.5/8.8.5) with ESMTP id OAA18864
	for <xemacs-beta@xemacs.org>; Sat, 28 Jun 1997 14:50:04 -0500 (CDT)
Received: (from hniksic@localhost)
          by jagor.srce.hr (8.8.5/8.8.4)
	  id VAA08845; Sat, 28 Jun 1997 21:50:04 +0200 (MET DST)
To: XEmacs Developers <xemacs-beta@xemacs.org>
Subject: Abbreviations containing non-word characters
X-Attribution: Hrv
X-Face: Mie8:rOV<\c/~z{s.X4A{!?vY7{drJ([U]0O=W/<W*SMo/Mv:58:*_y~ki>xDi&N7XG
        KV^$k0m3Oe/)'e%3=$PCR&3ITUXH,cK>]bci&<qQ>Ff%x_>1`T(+M2Gg/fgndU%k*ft
        [(7._6e0n-V%|%'[c|q:;}td$#INd+;?!-V=c8Pqf}3J
From: Hrvoje Niksic <hniksic@srce.hr>
Date: 28 Jun 1997 21:50:04 +0200
Message-ID: <kig205mv76b.fsf@jagor.srce.hr>
Lines: 541
X-Mailer: Gnus v5.4.59/XEmacs 20.3(beta9) - "Sofia"

Finally -- here is the patch that should fully implement this
feature.  It is a drop-in replacement for the old function, and
everything should (hmm, hmm) continue working as before.

After applying this patch, there are two ways of expanding an abbrev:
the old way, looking it up in the obarray, and the new way, mapping
through obarray and comparing the abbrevs with the text before point.
Although the second way is an order of magnitude slower, there is no
noticable slowdown (as tested in an XEmacs compiled without any
debugging, with 10,000 random abbrevs hanging in the obarray.)

By default, XEmacs uses the obarray lookup.  However, the first time
you define an abbreviation with a non-word character, `define-abbrev'
will flag that obarray for the second algorithm (the flag is set by
interning a symbol ` ' -- suggestion by RMS.)  From that moment on,
the new algorithm is used.  Although I could have used the new one all
the time, I felt somehow silly having an obarray and never looking it
up.

Apply the patch, byte-compile abbrev.el (because of the change in
`define-abbrev'), and build XEmacs.  And *do not* let it corrupt your
precious bodily fluids!


1997-06-28  Hrvoje Niksic  <hniksic@srce.hr>

	* abbrev.c (abbrev_match): New function.
	(abbrev_oblookup): New function.
	(obarray_has_blank_p): New function.
	(abbrev_count_case): New function.
	(Fexpand_abbrev): Use them.  Allow abbreviations to contain
	arbitrary characters.


1997-06-28  Hrvoje Niksic  <hniksic@srce.hr>

	* modes/abbrev.el (define-abbrev): If NAME contains a non-word
 	character, intern a ` ' symbol in the obarray.


--- etc/NEWS.orig	Sat Jun 28 21:20:49 1997
+++ etc/NEWS	Sat Jun 28 21:24:57 1997
@@ -138,6 +138,13 @@
 creating a new frame with `C-x 5 2' also raises and selects that
 frame.  The behavior of window system frames is unchanged.
 
+** Abbreviations can now contain non-word characters.
+
+This means that it is finally possible to do such simple things as
+define `#if' to expand to `#include' in C mode, `s-c-b' to
+`save-current-buffer' in Lisp mode, `call/cc' to
+`call-with-current-continuation' in Scheme mode, etc.
+
 ** `C-x n d' now runs the new command `narrow-to-defun',
 which narrows the accessible parts of the buffer to just
 the current defun.
--- lisp/modes/abbrev.el.orig	Sat Jun 28 16:43:07 1997
+++ lisp/modes/abbrev.el	Sat Jun 28 19:09:53 1997
@@ -30,9 +30,11 @@
 ;;; Code:
 
 ;jwz: this is preloaded so don't ;;;###autoload
-(defconst only-global-abbrevs nil "\
-*t means user plans to use global abbrevs only.
-Makes the commands to define mode-specific abbrevs define global ones instead.")
+(defcustom only-global-abbrevs nil "\
+*Non-nil means user plans to use global abbrevs only.
+Makes the commands to define mode-specific abbrevs define global ones instead."
+  :type 'boolean
+  :group 'abbrev)
 
 ;;; XEmacs: the following block of code is not in FSF
 (defvar abbrev-table-name-list '()
@@ -85,11 +87,14 @@
   (let* ((sym (intern name table))
          (oexp (and (boundp sym) (symbol-value sym)))
          (ohook (and (fboundp sym) (symbol-function sym))))
-    (if (not (and (equal ohook hook)
-                  (stringp oexp)
-                  (stringp expansion)
-                  (string-equal oexp expansion)))
-        (setq abbrevs-changed t))
+    (unless (and (equal ohook hook)
+		 (stringp oexp)
+		 (stringp expansion)
+		 (string-equal oexp expansion))
+      (setq abbrevs-changed t)
+      ;; If there is a non-word character in the string, set the flag.
+      (if (string-match "\\W" name)
+	  (set (intern " " table) nil)))
     (set sym expansion)
     (fset sym hook)
     (setplist sym (or count 0))
--- src/abbrev.c.orig	Thu Jun 26 16:03:23 1997
+++ src/abbrev.c	Sat Jun 28 21:03:30 1997
@@ -26,6 +26,7 @@
    FSF: Original version; a long time ago.
    JWZ or Mly: Mostly moved into Lisp; maybe 1992.
    Ben Wing: Some changes for Mule for 19.12.
+   Hrvoje Niksic: Largely rewritten in June 1997.
 */
 
 /* This file has been Mule-ized. */
@@ -70,158 +71,319 @@
 /* Character address of start of last abbrev expanded */
 int last_abbrev_point;
 
+Lisp_Object oblookup (Lisp_Object, CONST Bufbyte *, Bytecount);
+
 /* Hook to run before expanding any abbrev.  */
 Lisp_Object Vpre_abbrev_expand_hook, Qpre_abbrev_expand_hook;
 
 
-/* Expand the word before point, if it is an abbrev.
-   Returns Qt if an expansion is done. */
+/* Match the buffer text against names of symbols in obarray.  Returns
+   the matching symbol, or 0 if not found.  */
+
+static struct Lisp_Symbol *
+abbrev_match (struct buffer *buf, Lisp_Object obarray)
+{
+  Bufpos point = BUF_PT (buf);
+  Bufpos maxlen = point - BUF_BEGV (buf);
+  Charcount idx;
+
+  struct Lisp_Char_Table *chartab = XCHAR_TABLE (buf->mirror_syntax_table);
+  struct Lisp_String *abbrev;
+  struct Lisp_Vector *obvec;
+  struct Lisp_Symbol *sym;
+  Charcount abbrev_length;
+  Lisp_Object tail;
+  int i, found;
+
+  CHECK_VECTOR (obarray);
+  obvec = XVECTOR (obarray);
+
+  /* The obarray-traversing code is copied from `map_obarray'. */
+  found = 0;
+  for (i = vector_length (obvec) - 1; i >= 0; i--)
+    {
+      tail = vector_data (obvec)[i];
+      if (SYMBOLP (tail))
+	while (1)
+	  {
+	    sym = XSYMBOL (tail);
+	    if (UNBOUNDP (symbol_value (sym)) || NILP (symbol_value (sym)))
+	      {
+		/* The symbol value of nil means that abbrev got
+                   undefined. */
+		goto next;
+	      }
+	    abbrev = symbol_name (sym);
+	    abbrev_length = string_char_length (abbrev);
+	    if (abbrev_length > maxlen)
+	      {
+		/* This abbrev is too large -- it wouldn't fit. */
+		goto next;
+	      }
+	    /* If `bar' is an abbrev, and a user presses `fubar<SPC>',
+	       we don't normally want to expand it.  OTOH, if the
+	       abbrev begins with non-word syntax, it is OK to
+	       abbreviate it anywhere.  */
+	    if (abbrev_length < maxlen && abbrev_length > 0
+		&& (WORD_SYNTAX_P (chartab, string_char (abbrev, 0)))
+		&& (WORD_SYNTAX_P (chartab,
+				   BUF_FETCH_CHAR (buf, point
+						   - (abbrev_length + 1)))))
+	      {
+		goto next;
+	      }
+	    /* Match abbreviation string against buffer text.  */
+	    for (idx = abbrev_length - 1; idx >= 0; idx--)
+	      {
+		if (DOWNCASE (buf, BUF_FETCH_CHAR (buf, point -
+						   (abbrev_length - idx)))
+		    != DOWNCASE (buf, string_char (abbrev, idx)))
+		  break;
+	      }
+	    if (idx < 0)
+	      {
+		found = 1;
+		break;
+	      }
+	  next:
+	    sym = symbol_next (XSYMBOL (tail));
+	    if (!sym)
+	      break;
+	    XSETSYMBOL (tail, sym);
+	  } /* while */
+      if (found)
+	break;
+    } /* for */
+
+  return found ? sym : 0;
+}
+
+/* Take the word before point, and look it up in OBARRAY, and return
+   the symbol (or nil).  This used to be the default method of
+   searching, with the obvious limitation that the abbrevs may consist
+   only of word characters.  It is an order of magnitued faster than
+   the proper `abbrev_match', but then again, vi is an order of
+   magnitude faster than Emacs.  */
+static struct Lisp_Symbol *
+abbrev_oblookup (struct buffer *buf, Lisp_Object obarray)
+{
+  Bufpos wordstart, wordend;
+  Bufbyte *word, *p;
+  Bytecount idx;
+  Lisp_Object lookup;
+
+  CHECK_VECTOR (obarray);
+
+  if (!NILP (Vabbrev_start_location))
+    {
+      wordstart = get_buffer_pos_char (buf, Vabbrev_start_location,
+				       GB_COERCE_RANGE);
+      Vabbrev_start_location = Qnil;
+      if (wordstart != BUF_ZV (buf)
+	  && BUF_FETCH_CHAR (buf, wordstart) == '-')
+	{
+	  buffer_delete_range (buf, wordstart, wordstart + 1, 0);
+	}
+      wordend = BUF_PT (buf);
+    }
+  else
+    {
+      Bufpos point = BUF_PT (buf);
+
+      wordstart = scan_words (buf, point, -1);
+      if (!wordstart)
+	return 0;
+
+      wordend = scan_words (buf, wordstart, 1);
+      if (!wordend)
+	return 0;
+      if (wordend > BUF_ZV (buf))
+	wordend = BUF_ZV (buf);
+      if (wordend > point)
+	wordend = point;
+      /* Unlike the original function, we allow expansion only after
+	 the abbrev, not preceded by a number of spaces.  This is
+	 because of consistency with abbrev_match. */
+      if (wordend < point)
+	return 0;
+      if (wordend <= wordstart)
+	return 0;
+    }
+
+  p = word = (Bufbyte *) alloca (MAX_EMCHAR_LEN * (wordend - wordstart));
+  for (idx = wordstart; idx < wordend; idx++)
+    {
+      Emchar c = BUF_FETCH_CHAR (buf, idx);
+      if (UPPERCASEP (buf, c))
+	c = DOWNCASE (buf, c);
+      p += set_charptr_emchar (p, c);
+    }
+  lookup = oblookup (obarray, word, p - word);
+  if (SYMBOLP (lookup) && !NILP (symbol_value (XSYMBOL (lookup))))
+    return XSYMBOL (lookup);
+  else
+    return NULL;
+}
+
+/* Return non-zero if OBARRAY contains an interned symbol ` '. */
+static int
+obarray_has_blank_p (Lisp_Object obarray)
+{
+  Lisp_Object lookup;
+
+  lookup = oblookup (obarray, (Bufbyte *)" ", 1);
+  return SYMBOLP (lookup);
+}
+
+/* Analyze case in the buffer substring, and report it.  */
+static void
+abbrev_count_case (struct buffer *buf, Bufpos pos, Charcount length,
+		   int *lccount, int *uccount)
+{
+  Emchar c;
+
+  *lccount = *uccount = 0;
+  while (length--)
+    {
+      c = BUF_FETCH_CHAR (buf, pos);
+      if (UPPERCASEP (buf, c))
+	++*uccount;
+      else if (LOWERCASEP (buf, c))
+	++*lccount;
+      ++pos;
+    }
+}
 
 DEFUN ("expand-abbrev", Fexpand_abbrev, 0, 0, "", /*
-Expand the abbrev before point, if there is an abbrev there.
+Expand the abbrev before point, if any.
 Effective when explicitly called even when `abbrev-mode' is nil.
 Returns t if expansion took place.
 */
        ())
 {
   /* This function can GC */
-  REGISTER Bufbyte *buffer, *p;
-  REGISTER Bufpos wordstart, wordend, idx;
-  Charcount whitecnt;
-  Charcount uccount = 0, lccount = 0;
-  REGISTER Lisp_Object sym;
-  Lisp_Object expansion, hook, value;
   struct buffer *buf = current_buffer;
-  Lisp_Object lbuf;
   int oldmodiff = BUF_MODIFF (buf);
+  Lisp_Object pre_modiff_p;
+  Bufpos point;			/* position of point */
+  Bufpos abbrev_start;		/* position of abbreviation beginning */
+
+  struct Lisp_Symbol *(*fun) (struct buffer *, Lisp_Object);
+
+  struct Lisp_Symbol *abbrev_symbol;
+  struct Lisp_String *abbrev_string;
+  Lisp_Object expansion, count, hook;
+  Charcount abbrev_length, idx;
+  int lccount, uccount;
 
-  XSETBUFFER (lbuf, buf);
   run_hook (Qpre_abbrev_expand_hook);
   /* If the hook changes the buffer, treat that as having "done an
      expansion".  */
-  value = (BUF_MODIFF (buf) != oldmodiff ? Qt : Qnil);
+  pre_modiff_p = (BUF_MODIFF (buf) != oldmodiff ? Qt : Qnil);
 
-  wordstart = 0;
+  abbrev_symbol = NULL;
   if (!BUFFERP (Vabbrev_start_location_buffer) ||
       XBUFFER (Vabbrev_start_location_buffer) != buf)
     Vabbrev_start_location = Qnil;
-  if (!NILP (Vabbrev_start_location))
+  /* We use the more general `abbrev_match' if the obarray blank flag
+     is not set, and Vabbrev_start_location is nil.  Otherwise, use
+     `abbrev_oblookup'. */
+#define MATCHFUN(tbl) ((obarray_has_blank_p (tbl)		 \
+			&& NILP (Vabbrev_start_location))	 \
+		       ? abbrev_match : abbrev_oblookup)
+  if (!NILP (buf->abbrev_table))
     {
-      wordstart = get_buffer_pos_char (buf, Vabbrev_start_location, GB_COERCE_RANGE);
-      Vabbrev_start_location = Qnil;
-      if (wordstart < BUF_BEGV (buf) || wordstart > BUF_ZV (buf))
-        wordstart = 0;
-      if (wordstart && wordstart != BUF_ZV (buf) &&
-          BUF_FETCH_CHAR (buf, wordstart) == '-')
-	buffer_delete_range (buf, wordstart, wordstart + 1, 0);
-    }
-  if (!wordstart)
-    wordstart = scan_words (buf, BUF_PT (buf), -1);
-
-  if (!wordstart)
-    return value;
-
-  wordend = scan_words (buf, wordstart, 1);
-  if (!wordend)
-    return value;
-
-  if (wordend > BUF_PT (buf))
-    wordend = BUF_PT (buf);
-  whitecnt = BUF_PT (buf) - wordend;
-  if (wordend <= wordstart)
-    return value;
-
-  p = buffer = (Bufbyte *) alloca (MAX_EMCHAR_LEN*(wordend - wordstart));
-
-  for (idx = wordstart; idx < wordend; idx++)
+      fun = MATCHFUN (buf->abbrev_table);
+      abbrev_symbol = fun (buf, buf->abbrev_table);
+    }
+  if (!abbrev_symbol && !NILP (Vglobal_abbrev_table))
     {
-      REGISTER Emchar c = BUF_FETCH_CHAR (buf, idx);
-      if (UPPERCASEP (buf, c))
-	c = DOWNCASE (buf, c), uccount++;
-      else if (! NOCASEP (buf, c))
-	lccount++;
-      p += set_charptr_emchar (p, c);
+      fun = MATCHFUN (Vglobal_abbrev_table);
+      abbrev_symbol = fun (buf, Vglobal_abbrev_table);
     }
+  if (!abbrev_symbol)
+    return pre_modiff_p;
 
-  if (VECTORP (buf->abbrev_table))
-    sym = oblookup (buf->abbrev_table,
-		    buffer,
-		    p - buffer);
+  /* NOTE: we hope that `pre-abbrev-expand-hook' didn't do something
+     nasty, such as changed (or killed) the buffer.  */
+  point = BUF_PT (buf);
+
+  /* OK, we're out of the must-be-fast part.  An abbreviation matched.
+     Now find the parameters, insert the expansion, and make it all
+     look pretty. */
+  abbrev_string = symbol_name (abbrev_symbol);
+  abbrev_length = string_char_length (abbrev_string);
+  abbrev_start = point - abbrev_length;
+
+  expansion = symbol_value (abbrev_symbol);
+  CHECK_STRING (expansion);
+
+  count = symbol_plist (abbrev_symbol); /* Gag */
+  if (NILP (count))
+    count = make_int (0);
   else
-    sym = Qzero;
-  if (INTP (sym) || NILP (XSYMBOL (sym)->value))
-    sym = oblookup (Vglobal_abbrev_table,
-		    buffer,
-		    p - buffer);
-  if (INTP (sym) || NILP (XSYMBOL (sym)->value))
-    return value;
-
-  if (INTERACTIVE && !EQ (minibuf_window, Fselected_window (Qnil)))
-    {
-      /* Add an undo boundary, in case we are doing this for
-         a self-inserting command which has avoided making one so far.  */
-      BUF_SET_PT (buf, wordend);
-      Fundo_boundary ();
-    }
-  BUF_SET_PT (buf, wordstart);
-  Vlast_abbrev_text =
-    make_string_from_buffer (buf, wordstart, wordend - wordstart);
-  buffer_delete_range (buf, wordstart, wordend, 0);
+    CHECK_NATNUM (count);
+  symbol_plist (abbrev_symbol) = make_int (1 + XINT (count));
 
-  /* Now sym is the abbrev symbol. */
-  Vlast_abbrev = sym;
-  last_abbrev_point = wordstart;
-
-  if (INTP (XSYMBOL (sym)->plist))
-    XSETINT (XSYMBOL (sym)->plist,
-	     XINT (XSYMBOL (sym)->plist) + 1);	/* Increment use count */
+  /* Count the case in the original text. */
+  abbrev_count_case (buf, abbrev_start, abbrev_length, &lccount, &uccount);
 
-  expansion = XSYMBOL (sym)->value;
+  /* Remember the last abbrev text, location, etc. */
+  XSETSYMBOL (Vlast_abbrev, abbrev_symbol);
+  Vlast_abbrev_text =
+    make_string_from_buffer (buf, abbrev_start, abbrev_length);
+  last_abbrev_point = abbrev_start;
+
+  /* Add an undo boundary, in case we are doing this for a
+     self-inserting command which has avoided making one so far.  */
+  if (INTERACTIVE)
+    Fundo_boundary ();
+
+  /* Remove the abbrev */
+  buffer_delete_range (buf, abbrev_start, point, 0);
+  /* And insert the expansion. */
   buffer_insert_lisp_string (buf, expansion);
-  BUF_SET_PT (buf, BUF_PT (buf) + whitecnt);
+  point = BUF_PT (buf);
 
+  /* Now fiddle with the case. */
   if (uccount && !lccount)
     {
       /* Abbrev was all caps */
-      /* If expansion is multiple words, normally capitalize each word */
-      /* This used to be if (!... && ... >= ...) Fcapitalize; else Fupcase
-	 but Megatest 68000 compiler can't handle that */
-      if (!abbrev_all_caps)
-	if (scan_words (buf, BUF_PT (buf), -1) >
-	    scan_words (buf, wordstart, 1))
-	  {
-	    Fupcase_initials_region (make_int (wordstart),
-				     make_int (BUF_PT (buf)),
-				     lbuf);
-	    goto caped;
-	  }
-      /* If expansion is one word, or if user says so, upcase it all. */
-      Fupcase_region (make_int (wordstart), make_int (BUF_PT (buf)),
-		      lbuf);
-    caped: ;
+      if (!abbrev_all_caps
+	  && scan_words (buf, point, -1) > scan_words (buf, abbrev_start, 1))
+	{
+	  Fupcase_initials_region (make_int (abbrev_start), make_int (point),
+				   make_buffer (buf));
+	}
+      else
+	{
+	  /* If expansion is one word, or if user says so, upcase it all. */
+	  Fupcase_region (make_int (abbrev_start), make_int (point),
+			  make_buffer (buf));
+	}
     }
   else if (uccount)
     {
       /* Abbrev included some caps.  Cap first initial of expansion */
-      Bufpos pos = wordstart;
-
+      Bufpos pos = abbrev_start;
       /* Find the initial.  */
-      while (pos < BUF_PT (buf)
-             && !WORD_SYNTAX_P (XCHAR_TABLE (buf->mirror_syntax_table),
+      while (pos < point
+	     && !WORD_SYNTAX_P (XCHAR_TABLE (buf->mirror_syntax_table),
 				BUF_FETCH_CHAR (buf, pos)))
-        pos++;
-
+	pos++;
       /* Change just that.  */
-      Fupcase_initials_region (make_int (pos), make_int (pos + 1), lbuf);
+      Fupcase_initials_region (make_int (pos), make_int (pos + 1),
+			       make_buffer (buf));
     }
 
-  hook = XSYMBOL (sym)->function;
+  hook = symbol_function (abbrev_symbol);
   if (!NILP (hook) && !UNBOUNDP (hook))
     call0 (hook);
 
   return Qt;
 }
 
+
 void
 syms_of_abbrev (void)
 {
@@ -272,7 +434,7 @@
   Vabbrev_start_location_buffer = Qnil;
 
   DEFVAR_BOOL ("abbrev-all-caps", &abbrev_all_caps /*
-*Set non-nil means expand multi-word abbrevs all caps if abbrev was so.
+*Non-nil means expand multi-word abbrevs all caps if abbrev was so.
 */ );
   abbrev_all_caps = 0;
 


-- 
Hrvoje Niksic <hniksic@srce.hr> | Student at FER Zagreb, Croatia
--------------------------------+--------------------------------
I'm a Lisp variable -- bind me!

