# -*- coding: utf-8 -*-
# Copyright (C) 2005, 2008, 2009 Nicolas Burrus
# This file is part of Tidydoc.
#
# Tidydoc is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Tidydoc is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

import re

def latex_to_plain_text(s):
    # TODO: to be completed.
    substitutions = {
        r"\\'e": r"é",
        r"\\`e": r"è",
        r"\\`a": r"à"
    }
    s = re.sub("{|}", "", s)
    s = re.sub("[\n\r]+", " ", s)
    for (key,value) in substitutions.items():
        s = re.sub(key, value, s)
    return s

def parse_pages(s):
    (first,sep,last) = s.partition("--")
    return str(int(last)-int(first)+1)

def parse_authors(s):
    authors = s.split("and")
    result = ""
    for author in authors:
        (lastname,sep,firstname) = author.partition(", ")
        author = firstname + " " + lastname
        result = result + re.sub("\s+", " ", author) + "\n"
    result = result.rstrip("\n")
    return result

def parse_bibtex_entry(entry):
    dict = {}
    keywords = ["title", "author", "pages", "year", "journal", "booktitle"]
    for keyword in keywords:
        m = re.search("%s\s*?=\s*?\{(.*?)\},?[\n\r]+" % keyword, 
                      entry, 
                      re.MULTILINE | re.DOTALL)
        if m:
            value = latex_to_plain_text(m.group(1))
            if (keyword == "year"):
                keyword = "date"
            if (keyword == "author"):
                value = parse_authors(value)
            if (keyword == "pages"):
                value = parse_pages(value)
            if (keyword == "journal" or keyword == "booktitle"):
                keyword = "conf"
            dict[keyword] = value
    return dict

def main():
    parse_bibtex_entry("""
@article{muse2006contrario,
  title={{An a contrario decision method 
for shape element recognition}},
  author={Mus{\'e}, P. and Sur, F. and Cao, F. and Gousseau, Y. and Morel, J.M.},
  journal={International Journal of Computer Vision},
  volume={69},
  number={3},
  pages={295--315},
  year={2006},
  publisher={Springer}
}
""")

if __name__ == "__main__":
    main()
