head	1.4;
access;
symbols
	groff-1_20_1_real:1.4
	groff-1_20_1:1.4
	groff-1_20:1.4;
locks; strict;
comment	@# @;


1.4
date	2009.01.05.20.11.03;	author wl;	state Exp;
branches;
next	1.3;
commitid	LOYcfESGVCXMFixt;

1.3
date	2009.01.04.14.51.05;	author wl;	state Exp;
branches;
next	1.2;
commitid	RfDBP1nYqohhW8xt;

1.2
date	2006.09.26.21.24.55;	author wl;	state Exp;
branches;
next	1.1;

1.1
date	2005.12.09.18.12.30;	author wl;	state Exp;
branches;
next	;


desc
@@


1.4
log
@Update copyright year.
@
text
@#! /bin/sh
#
# make-uniuni -- script for creating the file uniuni.cpp
#
# Copyright (C) 2005, 2006, 2009
# Free Software Foundation, Inc.
#      Written by Werner Lemberg <wl@@gnu.org>
#
# This file is part of groff.
#
# groff is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# groff is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

#
# usage:
#
#   make-uniuni <version-string> < UnicodeData.txt > uniuni.cpp
#
# `UnicodeData.txt' is the central database file from the Unicode standard.
# Unfortunately, it doesn't contain a version number which must be thus
# provided manually as a parameter to the filter.
#
# This program needs a C preprocessor.
#

CPP=cpp

prog="$0"

if test $# -ne 1; then
  echo "usage: $0 <version-string> < UnicodeData.txt > uniuni.cpp"
  exit 1
fi

version_string="$1"

# Remove ranges and control characters,
# then extract the decomposition field,
# then remove lines without decomposition,
# then remove all compatibility decompositions.
sed -e '/^[^;]*;</d' \
| sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
| sed -e '/^[^;]*;$/d' \
| sed -e '/^[^;]*;</d' > $$1

# Prepare input for running cpp.
cat $$1 \
| sed -e 's/^\([^;]*\);/#define \1 /' \
      -e 's/ / u/g' > $$2
cat $$1 \
| sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2

# Run C preprocessor to recursively decompose.
$CPP $$2 $$3

# Convert it back to original format.
cat $$3 \
| sed -e '/#/d' \
      -e '/^$/d' \
      -e 's/ \+/ /g' \
      -e 's/ *$//' \
      -e 's/u//g' \
      -e 's/^\([^ ]*\) /\1;/' > $$4

# Write preamble.
cat <<END
// -*- C++ -*-
/* Copyright (C) 2002, 2003, 2004, 2005
   Free Software Foundation, Inc.
     Written by Werner Lemberg <wl@@gnu.org>

This file is part of groff.

groff is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or
(at your option) any later version.

groff is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */

// This code has been algorithmically derived from the file
// UnicodeData.txt, version $version_string, available from unicode.org,
// on `date '+%Y-%m-%d'`.

#include "lib.h"
#include "stringclass.h"
#include "ptable.h"

#include "unicode.h"

struct unicode_decompose {
  char *value;
};

declare_ptable(unicode_decompose)
implement_ptable(unicode_decompose)

PTABLE(unicode_decompose) unicode_decompose_table;

// the first digit in the composite string gives the number of composites

struct S {
  const char *key;
  const char *value;
} unicode_decompose_list[] = {
END

# Emit Unicode data.
cat $$4 \
| sed -e 's/ /_/g' \
      -e 's/\(.*\);\(.*_.*_.*_.*\)$/  { "\1", "4\2" },/' \
      -e 's/\(.*\);\(.*_.*_.*\)$/  { "\1", "3\2" },/' \
      -e 's/\(.*\);\(.*_.*\)$/  { "\1", "2\2" },/' \
      -e 's/\(.*\);\(.*\)$/  { "\1", "1\2" },/'

# Write postamble.
cat <<END
};

// global constructor

static struct unicode_decompose_init {
  unicode_decompose_init();
} _unicode_decompose_init;

unicode_decompose_init::unicode_decompose_init()
{
  for (unsigned int i = 0;
       i < sizeof(unicode_decompose_list)/sizeof(unicode_decompose_list[0]);
       i++) {
    unicode_decompose *dec = new unicode_decompose[1];
    dec->value = (char *)unicode_decompose_list[i].value;
    unicode_decompose_table.define(unicode_decompose_list[i].key, dec);
  }
}

const char *decompose_unicode(const char *s)
{
  unicode_decompose *result = unicode_decompose_table.lookup(s);
  return result ? result->value : 0;
}
END


# Remove temporary files.
rm $$1 $$2 $$3 $$4

# EOF
@


1.3
log
@

* */*: Update GPL2 to GPL3.
@
text
@d5 1
a5 1
# Copyright (C) 2005, 2006
@


1.2
log
@

formatting
@
text
@d13 2
a14 2
# Software Foundation; either version 2, or (at your option) any later
# version.
d21 2
a22 3
# You should have received a copy of the GNU General Public License along
# with groff; see the file COPYING.  If not, write to the Free Software
# Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
d86 2
a87 2
Software Foundation; either version 2, or (at your option) any later
version.
d94 2
a95 3
You should have received a copy of the GNU General Public License along
with groff; see the file COPYING.  If not, write to the Free Software
Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
@


1.1
log
@

* src/libs/libgroff/make-uniuni: New script to generate uniuni.cpp.

* src/libs/libgroff/uniuni.cpp: Regenerated, using data from Unicode
4.1.0.


* src/utils/afmtodit/make-afmtodit-tables: New script to generate
afmtodit.tables.

* src/utils/afmtodit/afmtodit.tables: New file, representing Unicode
4.1.0 data.

* src/utils/afmtodit/afmtodit.pl (%unicode_decomposed,
%AGL_to_unicode): Removed.  Replace it with a tag.

* src/utils/afmtodit/Makefile.sub (afmtodit): Adjust sed expression
to replace tag with actual file contents.
@
text
@d5 1
a5 1
# Copyright (C) 2005
d144 2
a145 1
unicode_decompose_init::unicode_decompose_init() {
@

