#! /usr/local/bin/perl
# Change the above row to the location of your perl script
# 		mathsci2bibtex
# This is a beta version! Send your comments to ilya@math.mit.edu
# $Id: convert,v 1.1 1993/04/19 06:01:35 ilya Exp ilya $
# $Log: convert,v $
# Revision 1.1  1993/04/19  06:01:35  ilya
# Initial revision
#

sub configure {
	#  user modifiable definitions

  $opt_rm='\\operatorname{'; #what to substitute instead of "{\rm" code
  $opt_bf='{\\Bbb ';         #what to substitute instead of "{\bf" code
							 	#separately for any letter to make bold
  $opt_scr='{\\cal ';        #what to substitute instead of "{\scr" code
  $opt_germ='\\frac ';        #what to substitute instead of "\germ" code

	$opt_crc32='n';                
															# use complicated algorithm?
  $opt_key_db='mathsci.dbk';        # name of the database of keys
  $opt_r_key_db='y';            # use existing database of keys?
  $opt_w_key_db='y';            # update existing database of keys?
  $opt_del_dub='y';            # search previous keys for dublicate of the given?
  $opt_exp_name='y';        # take the expanded field for the person name?
  $opt_exp_jrnl='y';     # take the expanded field for the journal name?
  $opt_believe_jrnl_year='y';    # the year in the JN field has a precedence

  $opt_believe_jrnl='translation'; # if 2 JN fields, take the translation
  #     $opt_believe_jrnl='original'; # if 2 JN fields, take the original
	$opt_translated_title='n';				# what to do with translation for a generic language
	$opt_transl_lang='Russian';
	$opt_orig_lang='';
	$opt_gener_lang='French:German';
	if (@ARGV && ($ARGV[$[])=~/^-./) {
		require "newgetopt.pl";
    &NGetOpt('h','crc32:s','rm=s','scr=s','germ=s','bf=s','key_db=s','r_key_db:s','w_key_db:s',
						'del_dup:s','exp_name:s','exp_jrnl:s','believe_jrnl=s',
						'believe_jrnl_year:s','translated_title:s','translated_title:s',
						'transl_lang:s','orig_lang:s','gener_lang:s');
	}
	else {
		$opt_h='y'	unless @ARGV;
	}

	#print STDERR $opt_crc32,"\n";
	$opt_crc32=~s/^.*[-n].*$//;                
	#print STDERR $opt_crc32,"\n";
	$opt_r_key_db=~s/^.*[-n].*$//;         
	$opt_w_key_db=~s/^.*[-n].*$//;         
	$opt_del_dup=~s/^.*[-n].*$//;          
	$opt_exp_name=~s/^.*[-n].*$//;         
	$opt_exp_jrnl=~s/^.*[-n].*$//;
	$opt_believe_jrnl_year=~s/^.*[-n].*$//;
	$opt_translated_title=~s/^.*[-n].*$//;	
	grep(($languages{$_}='translate'),split(":",$opt_transl_lang));
	grep(($languages{$_}='original'),split(":",$opt_orig_lang));
	grep(($languages{$_}='generic'),split(":",$opt_gener_lang));
	#print STDERR %languages;

	#%languages=('Russian','translate'
	#		,'French','generic'
	#		,'German','generic'
	#		);                      # what to do with title and series, now: translate,
															#	original, or generic (for recognition of a word)

	#  user modifiable definitions - end
$Usage = 
"Converter from the MathSci database format to a BibTeX format.
Usage:   $0 [-options] [MathSci_file]
    options:                                               (default)
      -h              give this message                       (n)
      -crc32    [yn]  use the real crc32 for dublicate search ($opt_crc32)
      -key_db FILE    name of the database of existing keys   ($opt_key_db)
      -r_key_db [yn]  read existing database of keys?         ($opt_r_key_db)
      -w_key_db [yn]  write existing database of keys?        ($opt_w_key_db)
      -del_dup  [yn]  delete duplicate records?               ($opt_del_dup)
      -exp_name [yn]  use the expanded name field?            ($opt_exp_name)
      -exp_jrnl [yn]  use the expanded journal name field?    ($opt_exp_jrnl)
      -believe_jrnl_year [yn]  
                      believe the year in the journal field?  ($opt_believe_jrnl_year)
      -believe_jrnl translation | original  
                      which journal field to believe?         ($opt_believe_jrnl)
      -translated_title [yn]  
                      which title to use if there are 2 (for generic language)?
                                                              ($opt_translated_title)
      -transl_lang (:-separated list of Languages)
                      For which languages use the translated titles?
                                                              ($opt_transl_lang)
      -orig_lang   (:-separated list of Languages)
                      For which languages use the original titles?
                                                              ($opt_orig_lang)
      -gener_lang  (:-separated list of Languages)
                      Some useful names of languages for pattern recognition
                                                        ($opt_gener_lang)
      -rm   STRING    the string to change '{\\rm ' to         ($opt_rm)
      -bf   STRING    the string to change '{\\bf ' to         ($opt_bf)
      -scr  STRING    the string to change '{\\scr ' to        ($opt_scr)
      -germ STRING    the string to change '\\germ ' to        ($opt_germ)
To work from STDIN use $0 -     or   $0 --
"   ;
  die "$Usage" if defined $opt_h;
}

&configure;
$rn=0;            # record counter
%codes=('AU','author'
,'MR','mr_number'
,'ED','editor'
,'CT','contributor'
,'TI','title'
,'NT','note'
,'TIC','collection_title'
,'SE','series'
,'PY','year'
,'JN','journal'
,'JNT','journal_translated'
,'JNO','journal_original'
,'RF','reviewed_from'
,'PUBL','publisher'
,'LA','language'
,'SL','summary_language'
,'PC','primary_code'
,'SC','secondary_code'
,'RL','review_length'
,'RE','reviewer'
,'RN','reviewer_nonpersonal'
,'RT','review_type'
,'AB','abstract'
,'DEM','primary_descriptor'
,'DER','secondary_descriptor'
,'DT','document_type'
,'IS','issn_isbn'
,'CO','coden'
,'XN','cross_ref_mr_number'
,'XP','cross_paper_number'
,'AN','accesion_number'
,'MRI','mr_issue'
,'SF','subfile'
,'CMP','CMP_volume_issue');
  # what to do with special substitutions
%proc=('author','author'
    ,'journal','journal'
    ,'journal_translated','journal'
    ,'journal_original','journal'
    ,'publisher','publisher'
    ,'series','series'
    ,'editor','editor'
    ,'collection_title','collection_title'
    ,'title','title'
    ,'year','general'
    ,'note','general'
    ,'language','general'
    ,'document_type','general'
    );

  # begins job
&init_crc32 if $opt_crc32;
# read database of keys
$r_key_db="<$opt_key_db";
$w_key_db=">>$opt_key_db";
if ($opt_r_key_db && -f $opt_key_db) {
  open(r_key_db) || die "cannot open $opt_key_db for read";
  while (<r_key_db>) {
		(($key,$cs)=/^(\w+),\s+(\d+)$/) || warn "wrong entry $_ in the checksum database";
		$cs{$key}=$cs;
		#$kkey=substr($key,0,1);
		#eval "\$cs\$kkey{\$key}=\$cs;";
	}
  close(r_key_db) || die "cannot close $opt_key_db for read";
}
elsif ($opt_r_key_db) {
  print STDERR "there is no checksum database $opt_key_db\n";
}
(open(w_key_db) || die "cannot open $opt_key_db for write") if $opt_w_key_db;
#select(STDOUT);

$*=1;
$/="|\n\n";

while (<>) {
  #s/\r$//g;   # DOS files in UNIX - anyway $/  won't work
  $searchhistory=0;
  $searchhistory=1, print STDERR 
			"There was a search history in the record $rn of $ARGV.\n"  
#      if s/^(\f\nNo\.[^\f]*\f)/\f\f/g;  # the search history deletion
      if s/(\f\nNo\.[^\f]*\f)/\f\f/g;  # the search history deletion
  $searchhistory=1, print STDERR 
			"There was a SilverPlatter mark in the record $rn of $ARGV.\n"  
      if s/^SilverPlatter.*$/\f\f/gi;  # the Silverplatter mark deletion
  #if ($searchhistory) {
    #foreach (split(/^\f{2,}/)) {
    foreach (split(/\f+/)) {
      if (/\w/) {&dorecord;}
    }
  #}
  #else {&dorecord;};
}
(close(w_key_db) || die "cannot write to $opt_key_db; disk full?!")
     if $opt_w_key_db;;


sub dorecord {
  local($code,$lname,%add_list,%item_list,@authors,$key,$incollection,$reference_type,$lang);
	++$rn;
	$incollection=0;
  s/^\f+$//g;  # kill Page breaks
  s/\|\s+$/\|/g;  # just in case...
  foreach (split(/\|$/)) {
    s/^(\n+|\s*MathSci\s+Disc[\-\s\d\/]+)\s*\d+\s+of\s+\d+(\s|\n)*\n+//i; # kill this "7 of 67" stuff
    if ( ( ($code,$lname)=/^([^-:\s]+)\s*([^-:]*)[-:]/ ) && defined $codes{$code}
              && defined $proc{$codes{$code}}) {
        eval "&$proc{$codes{$code}}($codes{$code});";
      }
    else  {&unknown_field;} 
    #print join(':',%item_list), "\n";
  }
  #print join(':',%item_list), ":\n";
  # set year
	if (!$item_list{'year'}) {
		$item_list{'year'}=$add_list{'publisher_year'} if $add_list{'publisher_year'};
		$item_list{'year'}=$add_list{'journal_year'} if $add_list{'journal_year'};
	}
	if ($add_list{'publisher_year'} 
				&& $add_list{'publisher_year'} != $item_list{'year'})  {
		$add_list{'???'} .= "; " . $add_list{'publisher_year'};
		#delete $add_list{'publisher_year'};
	}
	if ($add_list{'journal_year'} 
				&& $add_list{'journal_year'} != $item_list{'year'})  {
		$add_list{'???'} .= "; " . $add_list{'journal_year'};
	}
  if ($opt_believe_jrnl_year && $add_list{'journal_year'} ) {
    $item_list{'year'}=$add_list{'journal_year'};
  }
	#delete $add_list{'journal_year'};
  ($tmp)=($item_list{'address'} =~ /(\d+)\s*$/);
  ($item_list{'year'})=($item_list{'address'} =~ /([-\d\/]+)\s*$/) unless ($tmp < 1500) | ($tmp>2100) | $item_list{'year'};
  $item_list{'year'} =~ s/;\s*\d{2}(\d{2})/\/\1/g;
  $item_list{'address'} =~ s/,\s+$item_list{'year'}$// 
      if $item_list{'address'} && $item_list{'year'};
	$translate_now=$opt_translated_title;
	$translate_now=1 if $languages{"\u\L$item_list{'language'}"} eq 'translate';
	$translate_now=0 if $languages{"\u\L$item_list{'language'}"} eq 'original';
  if ($item_list{'title'} =~ /\.\s+\[(.*)\]\s*$/) {
    $item_list{'title'} = $translate_now ? $1 : $` ;
    $item_list{'language'}='' unless $translate_now;
  }    
  if ($item_list{'series'} =~ /\.\s+\[(.*)\]\s*$/) {
    $item_list{'series'} = $translate_now ? $1 : $` ;
    $item_list{'language'}='' unless $translate_now;
  }    
	if ($add_list{'volume'}) {
		if ($item_list{'volume'} && $add_list{'volume'}!=$item_list{'volume'}) {
			 	$item_list{'title'} .= ", Vol. $add_list{'volume'}";
		}
		else {
	    $item_list{'volume'}=$add_list{'volume'};
		}
	}
	#foreach $lang (split(/\W+/,$item_list{'language'})) {
  #	$item_list{'organization'} = '' 
	#			if "\L$item_list{'organization'}" eq "\L$lang";
	#			print STDERR "if \L$item_list{'organization'}"," eq \L$lang\n";
	#}			
	while (1) {
  	($tmp=$item_list{'organization'})=~s/\W/\\\1/g;
  	$item_list{'organization'} = '' 
			if $item_list{'language'} =~ /$tmp/i;
  	$item_list{'organization'} = '' if 
				defined $languages{"\u\L$item_list{'organization'}"};
		$item_list{'organization'}=$item_list{'organization?'}, 
				delete $item_list{'organization?'}, next
				if (!$item_list{'organization'} && $item_list{'organization?'});
		last;
	}
	$add_list{'???'} .= "; $item_list{'organization?'}" if $item_list{'organization?'};
  $item_list{'language'} = $opt_believe_jrnl eq 'original' ? $` : $' 
      if $qualifier && $opt_believe_jrnl && $item_list{'language'} =~ /;/;
  $item_list{'language'} =~ s/\s*;?\s*English\s*;?\s*//;
  $item_list{'volume'}=$1 
		 if $item_list{'volume'} eq '' && 
			$item_list{'booktitle'} =~ s/,?\s*vol\.\s+([\divxlcdm]+)$//i	;
	$item_list{'booktitle'} =~ s/,?\s*vol\.\s+$item_list{'volume'}$//i	;
  if ($item_list{'author'}) {   # set key basing on authors
    &dokey('author');
  }
  elsif ($item_list{'editor'}) {   # set key basing on editors
    &dokey('editor');
  }
  elsif ($item_list{'title'}) {   # set key basing on title
    &dotitlekey('title');
  }
  else {                    #something strange happened, set random key
    $key=int rand(1048576);
    print STDERR "Cannot construct a key, temporary key $key in the record $rn of $ARGV.\n";
  }
  #print join(':',%item_list), "\n";
  if ($item_list{'journal'}) {
    $reference_type='article';
  }
  elsif ($item_list{'document_type'} =~ /Proceedings-Paper/) { #'Proceedings-Paper'
    $reference_type='inproceedings';
    }
  elsif ($item_list{'document_type'} =~ /Proceedings/) { #'Proceedings'
    $reference_type='proceedings';
    }
  elsif ($item_list{'document_type'} =~ /Book/) { #'Book'
    $reference_type='book';
    }
  elsif ($item_list{'series'} ) { #'Book'???? In fact I don't know...
    $reference_type='book';
    }
  elsif ($incollection) {$reference_type='inproceedings';}
  else {
    print STDERR "unknown type of reference, @misc substituted in the record $rn of $ARGV.\n";
    $reference_type='misc';
  }
  $item_list{'pages'}=$add_list{'book_pages'}
		#, delete $add_list{'book_pages'} 
		unless $item_list{'pages'};
  $item_list{'pages'}=$add_list{'publisher_pages'}
		#, delete $add_list{'publisher_pages'}
		unless $item_list{'pages'}; 
																									#publisher_pages
  #undef $item_list{'document_type'};
  # ship out
	&latexize('title');
	&latexize('booktitle');
	$add_list{'???'} =~ s/^;\s+//;
	print STDERR "In $key in the record $rn of $ARGV: Cannot recognize: ", $add_list{'???'}, "\n"
			if $add_list{'???'};
  next if $opt_del_dup && &cs==0;
  (print "\n@$reference_type{\t$key\n") 
			|| die "cannot write to STDOUT; disk full?!";
  while (@_=each(%item_list)) {
    (print ",\t$_[ $[ ]\t={$_[ $[ +1 ]}\n" ) 
					|| die "cannot write to STDOUT; disk full?!"
        if $_[ $[ +1 ] && ($_[ $[ ] ne 'document_type');
  }
  (print "}\n") || die "cannot write to STDOUT; disk full?!";
  ((print w_key_db "$cs{$key}\n") 
      || die "cannot write to $opt_key_db; disk full?!") if $opt_w_key_db;
					# divided in 2 parts for possible error in output writing, the second
					# part is written after the work is finished
  #undef %item_list;
}

sub author {
    $item_list{'author'} = &name ;
    }

sub editor {
    $item_list{'editor'} = &name ;
    }

sub name {
    s/^[^-:]+[-:]\s+//;
    s/((-\w\.)?)[.;,]?\s*$/\1/;   # delete the trailing "." if not initial - just
                                 # in case 
    s/,-/, /g;
    s/,\s*\([^()]*\)//g; #the address ", (1-CA)" field
    if (!$opt_exp_name) {
      s/\[[^][]*\]//g;}     # delete the expanded name [Name Name1] field
    else {
      s/(^|;)[^[;]*\[([^];]*)\]/$1$2/g;}
              # change the name to the expanded name [Name Name1] field
    s/;/ and /g;
    s/\s+/ /g;
    s/^\s+//;
    s/-(\w{1,2}\.)/ $1/g;    # work with initial
    s/\s(\w{1,2}\.)-/ $1 /g;    # work with initial
														 # work with prefixes
		substr($_,length($`)+length($1),length($2)) =~ s/-/ /g 
				while /(^|\sand\s+)((([a-z]{1,3}|De)-)+)[A-Z]/;
		$_=join(" and ",grep(s/^([^,]+),([^,]+),\s+(Jr|III|IV).?\s*$/\2 \1, \3/||1,
				split(" and "))) if /,\s*(Jr|III|IV)\.?(\s|$)/;
    $_;
    }

sub journal {
    $qualifier='';        # translation and original
    s/^[^-:]+[-:]\s+//;
    s/[.;,]?\s*$//;
    $qualifier="\L$1" if s/^(translation|original):\s+//i;
    return if ($qualifier && $opt_believe_jrnl && $opt_believe_jrnl ne $qualifier
         && $item_list{'journal'});
    $add_list{'journal_year'}=$item_list{'volume'}=$item_list{'number'}=
        $item_list{'pages'} = undef if $item_list{'journal'};  
                            # believe the second instance, hence erase the first
    #$item_list{'journal'}=/^(\S+)(\s*\(\d{1,3}\))?/; # chose the first word before [
    $item_list{'journal'}=/^(.+)\[/; # chose the first word before [
    $item_list{'journal'}= $1 if $opt_exp_jrnl && /\[([^]]*)\]/ ;
              # change the name to the expanded name [Name ] field
    $item_list{'journal'}  =~ s/-/ /g;
    #s/^\s*\S+\s*(\(\d{1,3}\)\s*)?//; # kill the first word - can be j.-math. (2)
    s/^.*\[[^]]*\]//;  # kill the expanded name and name
    s/\.\s*$//;
		$wasPage = 0;
		$wasYear = 0;
    foreach (split(/,/)) {
			$wasPage++ if  $wasPage>0;
      if (/^\s*([-\d\/]+(\([-\d\/]+\))?)\s+\((\d{4}(\/\d+)*)\)\s*$/) { 
																# as: 40/41(141/142) (1990/91)
        if ($add_list{'journal_year'}) {
					if ($add_list{'journal_year'} ne $3) {
						{$add_list{'???'} .= "; " . $3;}
					}
					if ($wasPage) {
						$item_list{'pages'} .=  ", $1";
					}
					else  {$item_list{'pages'}=$1;}
				}
				else		{
					$add_list{'journal_year'} = $3;
                                            # there is another year field
        	if ($item_list{'volume'}) {
						if ($wasPage) {
							$item_list{'pages'} .=  ", $1";
						}
						elsif  ($item_list{'pages'} eq '') {
          		$_=$1;
          		if (/--/) {$item_list{'pages'} = $_ ;}
							else {
								$add_list{'???'} .= "; " . $_;
							}
						}
						else {$add_list{'???'} .= "; " . $1;}
        	}
        	else {$item_list{'volume'} =  $1;}
				 }
      }
      elsif (/^\s*no\.\s*([-\/\d]+(\s*\([-\/\d]+\))?(\s*exp(\.|\w+)(\s*no\.)?\s*[-\/\d]+)?(\s*[ivxlc]+\.?)?)\s*$/i) { 
																						#as:  no. 3-4(75-76) Exp. No. 708
																						#as:  no. 6 i.
        if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;}
				else {$item_list{'number'} =  $1;}
      }
      elsif (/^\s*\((\d{4}(\/d+)*)\)\s*$/) { #as:  (1956/57)
        if ($item_list{'year'}) {
					if ($item_list{'year'} eq $1) {$add_list{'journal_year'} = $1;}
					else {$add_list{'???'} .= "; " . $_;}
				}
        else {$item_list{'year'} = $1;}
      }
      elsif (/^\s*(\d+)\s*$/) { #as:  56
				if ($wasPage) {$item_list{'pages'} .=  ", $1";$wasPage--;}
				elsif ($item_list{'volume'} ) { # is it year?
					if ($1 > 1500 && $1 < 2100) {
        		if ($item_list{'year'} && $item_list{'year'} ne $1) 
								{$add_list{'???'} .= "; " . $_;}
						else {$item_list{'year'} =  $1;}
					} 
					else {
						if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;}
						else	{$item_list{'pages'} =  $1;}
					}
				}
				else {$item_list{'volume'} = $1;}
      }
      elsif (/^\s*(pp\.\s*)?(([ivxlcd]+\+)?([-\d]+)|((\w)\d+-+\6\d+))(\s*pp\.?)?\s*$/) { 
																#as:  567--897 or L567--L897 or viii+65 pp.
				if ($wasPage) {$item_list{'pages'} .=  ", $2";$wasPage--;}
        elsif ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;}
				else {
					$item_list{'pages'} =  $2;
					$wasPage = 1;
				}
      }
      elsif (/^\s*No\.\s+([-\d\/]+(\s*\([-\/\d]+\))?)\s+(pp\.\s*)?([-\d]+)(\s*pp\.?)?\s*$/i) { 
														#as:  No. 62 567--897 pp
        if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;}
				else {$item_list{'number'} =  $1;}
        if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;}
				else {
					$item_list{'pages'} =  $4;
					$wasPage = 1;
				}
      }
      elsif (/^\s*No\.\s+([-\d\/]+(\s*\([-\/\d]+\))?)\s+(pp\.\s*)?([-\d]+)(\s*pp\.?)?(\s*\((\d{4})\))\s*$/i) { 
														#as:  No. 62 567--897 pp. (1989)
        if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;}
				else {$item_list{'number'} =  $1;}
        if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;}
				else {
					$item_list{'pages'} =  $4;
				}
        if ($item_list{'year'}) {
					if ($item_list{'year'} eq $7) {$add_list{'journal_year'} = $7;}
					else {$add_list{'???'} .= "; " . $7;}
				}
        else {$item_list{'year'} = $7;}
      }
      elsif (/^\s*Numero\s+Hors\s+Serie\s*$/i) { 
        $item_list{'number'} = "Hors Serie" unless $item_list{'number'};
      }
      elsif (/^\s*(suppl\.|part)\s*([-\d\/]+)\s*$/i) {  # suppl. 2
        $item_list{'number'} .= " $2" ;
      }
      elsif (/^\s*((\w+\.?\s+)*sci\.)\s*$/i) {  # Phys.  Sci.
        $item_list{'number'} .= ", " . $1 ;
      }
      elsif (length($_)>9 && /^\s*(\D+\s+[\divxlcd]+)\s*$/i) {  
							# Voprosy  Kvant. Teor. Polya i Statist. Fiz. 5
        $item_list{'number'} .= ", " . $1 ;
      }
			else {$add_list{'???'} .= "; " . $_;}
			$wasPage=0 if  $wasPage>1;
    }
		$item_list{'number'} =~ s/,\s*//;
    #print join(':',%item_list), "\n";
    }

sub publisher {
        #
        #  Dont know how to split into publisher and address
        #  Three cases: with "Inc.,", "Ltd."  and "Akad. Nauk" recognized so while
        #
    s/^[^-:]+[-:]\s*//;
    s/[.;,]?\s*$//;
    $_=$`, $add_list{'publisher_year'}=$1 if /,\s+(\d{4})[,.;]/;    # find year
    $item_list{'publisher'} = $_;
    $_=$' ? $' : ''; #search rest
    ($add_list{'publisher_pages'} = $2) =~ s/\s*pp[.,;]?\s*//g 
       if s/(^|[,.:])\s+((((Vol\.\s+[\divxlcdm]+:)?([^.,:]*pp[.,;]?(;\s+)?|pp\.[^.,;]*))|(Vol\.\s+[\divxlcdm]+:)(\sand\s|[\s\divxlcdm+\-])+)+)//i;
                # possible forms xii+356 pp. or : pp. xii+356
                # want also ", Vol. 1: xvi+717 pp.;  Vol. 2: xii+458 pp.; 
								#						Vol. 3: i--xii and 459--834, "
                # so: allow Vol. OR pp
								# i.e.  (^|[,.:])\s+($2), where $2=$pages+, 
								# 		$pages=$Pages | $volumemark $pagesmark+,
								# 		$Pages=$volumemark? $pagespart
								#			$pagespart=$partpp | $pppart
								#			$partpp=[^.,:]*pp[.,;]?(;\s+)?
								#			$pppart=pp\.[^.,;]*
								#			$volumemark=Vol\.\s+[\divxlcdm]+:  $pagesmark= and | "digits-+"
#    if (/(^|[,.:])\s+([^.,:]*pp[.,;]|pp\.[^.,;]*)/)  {
#        $add_list{'publisher_pages'} = $2;
#        $add_list{'publisher_pages'} =~ s/\s*pp[.,;]?\s*//; 
#    }
    if ($item_list{'publisher'}=~/,\s+/)
        {
          $item_list{'publisher'} = $`;
          $item_list{'address'} = $';
          if (($item_list{'publisher'} =~ /^Akad\.\s+Nauk/) || 
              $item_list{'address'}=~/^(Inc|Ltd)\.,/)
            {
              if ($item_list{'address'}=~/,\s+/)
                {  
                  $item_list{'publisher'} .= ", ";
                  $item_list{'publisher'} .= $`;
                  $item_list{'address'} = $';
                }
            }    
        } 
}

sub title {
    s/^[^-:]+[-:]\s+//;
    s/[.;,]?\s*$//;
		if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*)\s*$//i)
    		{	
			$add_list{'volume'}=$1;
    	s/[.;,]?\s*$//;
		} 
    $item_list{'title'} = $_;
    }

  

sub collection_title {
	$incollection=1;
    s/^[^-:]+[-:](\s+Collection:)?\s*//;
    s/[.;,]?\s*$//;
												# I don't know what "pp. 307--400, 678" means
						# now the problem: I don't know what to do when where is a volume
						# both in collection_title and series, and if organization
						# and volume can be in an arbitrary order.
						# Hence I delete organization now, ignoring the volume,
						# and leave volume to the end, when I know if there is a series
						# volume.
						#
						#  Moreover, there can be something like 
						#     vol. 47 (Russian), pp. 27 (MIT), vol. 14.
						#
processing:
		while (1) {
    	s/[,.;]?\s*$//;
			if (s/\(([^()]*)\)\s*$//)   #or language? (MIT) or (French)
    			{	
				if (!$item_list{'organization'}) {$item_list{'organization'}=$1;}
				else {$add_list{'organization?'}.="; ".$1;}
				next;
			}
			if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*)\s*$//i)
    			{	
				if (!$item_list{'volume'}) {$item_list{'volume'}=$1;}
				else {$add_list{'???'}.="; ".$1;}
				next;
			} 
			if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*),\s*(pp\.\s*)?\s([\divxlcdm]+(-+[\divxlcdm]+)?(,\s+[\divxlcdm]+(-+[\divxlcdm]+)?)*)\s*$//i)
    			{	
				if (!$item_list{'volume'}) {$item_list{'volume'}=$1;}
				else {$add_list{'???'}.="; ".$1;}
				if ($add_list{'book_pages'}) {
					$add_list{'???'}.="; ".$4;
				}
				else	{$add_list{'book_pages'} = $4;}
				next;
			} 
    	do {
				$add_list{'book_pages'} = $2;
				next;
				}   if !$add_list{'book_pages'} && s/(,|pp\.)\s+([\divxlcdm]+(-+[\divxlcdm]+)?(,\s+[\divxlcdm]+(-+[\divxlcdm]+)?)*)\s*$//;
			$add_list{'book_pages'} =~ s/,\s*$//;
			last;
		}
    $item_list{'booktitle'} = $_;
}
  
#sub document_type {
#    s/^[^-:]+[-:]\s+//;
#    s/\.\s*$//;
#    $item_list{'document_type'} = $_;
#    #print join(':',%item_list), "\n";
#    }

sub series {
    s/^[^-:]+[-:]\s+//;
    s/[.;,]?\s*$//;
    $item_list{'volume'} = $1 if s/\s*,\s*([\divxlcdm\-\/]+)\s*$//i;
    $item_list{'number'} = $1 if s/\s*,\s*No\.\s+(\d+)\s*$//i;
    $item_list{'volume'} = $1, $item_list{'number'} = $2
			if s/\s*,\s*([\divxlcdm\-\/]+),?\s*part\s*([\divxlcdm]+)\s*$//i;
    $item_list{'series'} = $_;
    #print join(':',%item_list), "\n";
    }

sub general {
    s/^[^-:]+[-:]\s+//;
    s/[.;,]?\s*$//;
    $item_list{$_[0]} = $_;
    #print join(':',%item_list), "\n";
    }

sub unknown_field {
    #print join(':',%item_list), "\n";
    #print STDERR "unknown field $lname encountered\n";
}

#sub reset_list {
#  local(@tmp);
#  while (@tmp=each(%item_list)) {$item_list{$tmp[ $[ ]}='';}
#  #undef %item_list;        #memory is tight!
#}

sub dokey {
    @authors=split(/ and /,$item_list{ $_[ $[ ] });
    @authors=sort @authors;
    $i=$[;
    $key='';
    do {
      $authors[$i] =~ s/,.*//;               # Delete the first names
      #$authors[$i] =~ s/^.*\s(\S+)$/$1/;     # Delete the titles like von
      $authors[$i] =~ s/^([a-z]{1,3}\s+)+([A-Z])/$2/;     
																							# Delete the titles like von
      $authors[$i] =~ s/\s//g;               # Delete the spaces
      $key .= &trunk_sound($authors[$i]);
      #print $key;
    } while ($i++<2+$[ && $i <= $#authors); 
    $key .= $& if $item_list{'year'} =~ /\d+/; 
          # year can be 1985; 1986, we choose first
    $key =~ s/\d{2}(\d{2})/\1/;   # delete 19 from 1980
    $key .= "\u\L$1" if $item_list{'title'} =~ /(\w{3})\w/;
    #undef @authors;      #memory is tight!
}

sub dotitlekey {
    $key=$item_list{ $_[ $[ ] };
    $key =~ s/\..*//;
    @authors=($key =~ /(\w{4,})/g);
    $i=$[;
    $key='';
    do {
      $tkey = &trunk_sound($authors[$i]);
      $key .= "\u\L$tkey";
    } while ($i++<2+$[ && $i <= $#authors); 
    $key .= $1 if $item_list{'year'} =~ /(\d+)/; 
          # year can be 1985; 1986, we choose first
    $key =~ s/\d{2}(\d{2})/\1/;   # delete 19 from 1980
}

sub latexize {
	return unless $item_list{$_[ $[ ] };
				# Capitalize outside of math
	local(@pieces)=split(/\$/," " . $item_list{$_[ $[ ] } . " "); 
						#add space to avoid losing the first and last "\$"
	local($i)=0;
	$item_list{$_[ $[ ] }='';
	foreach (@pieces) {
		s/^\s// unless  $i++;
		s/(.)([A-Z]+)/\1{\2}/g if $i % 2 ; # we assume that capital can never
																					 # follow math immediately
		#$i++ unless /(^|[^\\])(\\\\)*\\$/;
		if (/(^|[^\\])(\\\\)*\\$/) {   # this $ is backslashed
			$item_list{$_[ $[ ] } .= "\$$_" ;
			$i++;		# now the evenness of $i isn't changed
		}
		else {
			if ($i%2) { # nonmath!
				$item_list{$_[ $[ ] } .= "\$}$_" ;
			}
			else {
				$item_list{$_[ $[ ] } .= "{\$$_" ;
			}
		}
	}
	#($item_list{$_[ $[ ] }=join("\$",@pieces)) =~ s/(^\s+)|(\s+$)//g;
	# there is an extra '$}' in the beginning
	$item_list{$_[ $[ ] } =~ s/(^\$\}\s*)|(\s+$)//g;
  $item_list{$_[ $[ ] } =~ s/\\germ\s+/$opt_germ/g;
  $item_list{$_[ $[ ] } =~ s/\{\\scr\s+/$opt_scr/g;
  $item_list{$_[ $[ ] } =~ s/\{\\rm\s+/$opt_rm/g;
  $item_list{$_[ $[ ] } =~ s/\{\\bf\s+([^\}]+)\}/$opt_bf.join("}$opt_bf",split("",\1)).'}'/ge;
}

sub cs {
	local($cs,$keymod)=(0,'');
	#foreach (('title','booktitle','journal','year','volume','number','editor'))
	foreach (keys(%item_list)) {
		#now the problem is to reach the system independence (like \13\10=\10)
		($_=$item_list{$_}) =~ s/\s+/ /g;
		$cs += unpack('%32C*',$_) unless $opt_crc32;
		$cs += &crc32($_) if $opt_crc32;
	}
	$cs=1 if $cs==0;
	while ($cs{$key.$keymod}) {
		#print stderr $key.$keymod,"\n";
		if ($cs{$key.$keymod}==$cs) {
			print STDERR 
			 		"Repeated reference $key$keymod skipped in the record $rn of $ARGV.\n";
			return 0;
		}
		$keymod ? $keymod++ : ($keymod = "A") ;
	}
	$key .= $keymod;
	(print STDERR 
			"Double reference, now $key in the record $rn of $ARGV.\n") if $keymod;
	$cs{$key}=$cs;
  ((print w_key_db "$key, ") 
      || die "cannot write to $opt_key_db; disk full?!") if $opt_w_key_db;
					# divided in 2 parts for possible error in output writing, the second
					# part is written after the work is finished
	$cs;
}

sub trunk_sound {
	# "ei" is 2 sounds!
  $_[ $[ ] =~ /^((schtsch)|(shtsh)|(tsch)|(sch)|(wr)|(ck)|(ch)|(ng)|(zh)|(kh)|(sh)|(kn)|(th)|(ph)|(rh)|(wh)|(qu)|(gh)|(ea)|(ee)|(eu)|(au)|(ou)|(oa)|(oo)|(oe)|(ue)|(ae)|(ie)|([a-zA-Z])\31|[a-zA-Z]){0,3}/i;
  #$_[ $[ ] =~ /^((schtsch)|(shtsh)|(tsch)|(sch)|(wr)|(ck)|(ch)|(ng)|(zh)|(kh)|(sh)|(kn)|(th)|(ph)|(rh)|(wh)|(qu)|(gh)|(ea)|(ee)|(eu)|(au)|(ou)|(oa)|(oo)|(oe)|(ue)|(ae)|(ie)|([a-zA-Z])\31+|[a-zA-Z]){0,3}/i;
  ($_[ $[ ]=$&) =~ s/([^\Weo])\1$/$1/;
  $_[ $[ ];
}

sub crc32 {

  $crc = 0xffffffff;     #  /* preload shift register, per CRC-32 spec */
  grep( ($crc = ($crc << 8) ^ $crc32_table[($crc >> 24) ^ unpack('C',$_)]),
		split("",$_[ $[ ]));
  return ~$crc;          #  /* transmit complement, per CRC-32 spec */
}

#define CRC32_POLY 0x04c11db7     /* AUTODIN II, Ethernet, & FDDI */

sub init_crc32 {
				local($i,$c);
				local($CRC32_POLY)=0x04c11db7 ;
        foreach  (0 .. 255) {
								$c=$_<<24;
                foreach (1..8) #(c = i << 24, j = 8; j > 0; --j)
                  {$c = $c & 0x80000000 ? ($c << 1) ^ $CRC32_POLY : ($c << 1);}
                $crc32_table[$_] = $c;
        }
}
	
