#!/bin/sh
# sorter – Sorter fullformsordlistene.
#
# Copyright © 2008, 2009, 2010, 2018–2020 Karl Ove Hufthammer <karl@huftis.org>.
#
#     This file is part of Ordbanken.
#
#     Ordbanken is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.

export LC_ALL=C # For å unngå feilaktig sortering av «aa»
                # (nødvendig for at «look»-verktøyet skal fungera).

# Finn språkkoden me skal bruka
sluttfil=$1
sprak=${sluttfil%.dat}
sprak=${sprak#fullform_}
sluttfil=fullform_$sprak.txt
echo "$sluttfil" # Fila som skal lagast

# Mellombelse filer for å helda dei sorterte ordlistene.
fullform_tmp=$(mktemp)
lemma_tmp=$(mktemp)

# Slå så saman lemma- og fullformfilene, slik at resultatet
# vert på same format som ordlistefilene hadde «i gamle dagar».
cut -f 1,2 -d '	' lemma_"$sprak".txt | tail -n +2 | sort --stable -k 1b,1 > "$lemma_tmp"
cut -f 1-4 -d '	' fullformsliste_"$sprak".txt | tail -n +2 | sort --stable -k 1b,1 > "$fullform_tmp"
join -t '	' "$lemma_tmp" "$fullform_tmp" | sort --stable -t'	' -k2,2 -k1,1n > "$sluttfil"

# Fjern dei mellombelse filene.
rm -f "$fullform_tmp" "$lemma_tmp"
