/* geoloc -- translates a string into a geographic location.
 *
 * Copyright (C) 2002 John Kodis <kodis@jagunet.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include <geoloc.h>

#include <ctype.h>
#include <fcntl.h>
#include <math.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>

#ifndef PKGDATADIR
#define PKGDATADIR "."
#endif

static double
rad2deg(double radians)
{
  return 180. * (radians / M_PI);
}

static double
deg2rad(double degrees)
{
  return M_PI * (degrees / 180.);
}

static char *
skipbl(char *s)
{
  while (isspace(*s))
    s++;
  return s;
}

static char *
trimtb(char *str, int sep)
{
  char *end = str + strlen(str);

  while (str <= --end && *end == ' ')
    ;

  if (sep && *end == sep)
    while (str <= --end && *end == ' ')
      ;

  end[1] = '\0';
  return str;
}

static double
str_to_float(char *s0, char **tail)
{
  char *s = skipbl(s0);
  double val = 0, dec = 0.1;

  *tail = s0;

  if (isdigit(*s))
    val = strtoul(s, &s, 10);
  else if (s[0] != '.' || !isdigit(s[1]))
    return val;

  if (s[0] == '.')
    for (s++; isdigit(*s); s++, dec /= 10)
      val += dec * (*s - '0');

  *tail = s;
  return val;
}

Geoloc *
geoloc_new(void)
{
  Geoloc *geo = malloc(sizeof(*geo));

  geo->lat = geo->lon = geo->err = 0;
  geo->str = geo->end = geo->norm = NULL;
  geo->first = geo->last = geo->key = NULL;
  geo->msg = strdup("");

  return geo;
}

void
geoloc_free(Geoloc *geo)
{
  if (geo->str)   free(geo->str);
  if (geo->norm)  free(geo->norm);
  if (geo->msg)   free(geo->msg);
  if (geo->key)   free(geo->key);
  free(geo);
}

static void
geo_msg(Geoloc *geo, char *fmt, ...)
{
  size_t len;
  char msg[1000];
  va_list args;

  va_start(args, fmt);
  len = vsnprintf(msg, sizeof(msg), fmt, args);
  va_end(args);

  geo->msg = realloc(geo->msg, strlen(geo->msg) + len + 2);
  strcat(geo->msg, msg);
}

static int
geo_get_rad(Geoloc *geo, int lon)
{
  double rad;
  char dir = '\0', *tail;
  static const char *nsew[2] = { "+-NnSs", "+-EeWw" };

  lon = !!lon;
  geo->end = skipbl(geo->end);
  if (strchr(nsew[lon], *geo->end))
    dir = *geo->end++;
  rad = deg2rad(str_to_float(geo->end, &tail));
  if (tail == geo->end)
    {
      geo_msg(geo, lon ?
	"E, W, +, -, or digit expected\n" :
	"N, S, +, -, or digit expected\n" );
      return geo->err = -1;
    }
  geo->end = tail;
  if (!dir && *geo->end && strchr(nsew[lon], *geo->end))
    dir = *geo->end++;

  if (dir && strchr("-swSW", dir))
    rad = -rad;
  if (lon) geo->lon = rad; else geo->lat = rad;
  return geo->err = 0;
}

static int
geo_get_lat_lon(Geoloc *geo)
{
  char *end = geo->end;

  geo_msg(geo, "    Looking for latitude... ");
  if (geo_get_rad(geo, 0))
    {
      geo->end = end;
      return geo->err;
    }

  geo_msg(geo, "found %+.3f\n", rad2deg(geo->lat));
  if (*geo->end == ',')
    geo->end++;

  geo_msg(geo, "    Looking for longitude... ");
  if (geo_get_rad(geo, 1))
    {
      geo->end = end;
      return geo->err;
    }

  geo_msg(geo, "found %+.3f\n", rad2deg(geo->lon));
  return geo->err;
}

static int
str_cmp(const void *vkey, const void *vstr)
{
  const char *key = vkey;
  const char *str = vstr;
  return strncasecmp(key, str, strlen(key));
}

static const char *
prev_bol(const char *now, const char *first)
{
  now -= 2;
  while (now >= first && *now != '\n')
    now--;
  return ++now;
}

static const char *
next_bol(const char *now, const char *last)
{
  for ( ; now < last && *now != '\n'; now++)
    ;
  return now + 1;
}

static void *
vsearch(const void *key, const void *base, size_t len,
  int (*cmp_fn)(const void *key, const void *str))
{
  const char *first, *last, *lo, *hi;

  lo = first = base;
  hi = last = first + len - 1;

  while (lo < hi)
    {
      int cmp;
      ptrdiff_t span = hi - lo;
      const char *bol, *mid = lo + span / 2;

      bol = prev_bol(mid + 1, first); 
      cmp = cmp_fn(key, bol);

      if (cmp == 0)
	return (void *)bol;
      else if (cmp < 0)
	hi = bol - 1;
      else if (cmp > 0)
	lo = next_bol(mid, last);
    }

  return NULL;
}

static int
vsearch_map(char *path, int (*fn)(), Geoloc *geo)
{
  int fd, pass = 0;
  char *map = NULL, *rec = NULL;
  struct stat stat;

  geo_msg(geo, "    Opening %s... ", path);
  if ((fd = open(path, O_RDONLY)) >= 0)
    {
      fstat(fd, &stat);
      map = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
    }
  geo_msg(geo, map ? "succeded.\n" : "failed.\n");
  if (map == NULL)
    return -1;

  geo->first = map;
  geo->last = map + stat.st_size - 1;

  while (fn(geo, pass++, rec))
    {
      geo_msg(geo, "    Trying %s... ", geo->key);
      rec = vsearch(geo->key, map, stat.st_size, str_cmp);
      geo_msg(geo, "%sfound.\n", rec ? "" : "not ");
    }

  close(fd);
  return geo->err;
}

/*
 * geo_get_callsign -- searches the callsign.loc file generated by the
 * station-info-call-loc program for a callsign.
 */
static int
callsign_fn(Geoloc *geo, int pass, char *rec)
{
  if (pass == 0)
    {
      if (geo->key)
	free(geo->key);
      geo->key = malloc(strlen(geo->end) + 2);
      sprintf(geo->key, "%s:", geo->end);
      return 1;
    }

  if (pass == 1 && rec == NULL)
    {
      sprintf(geo->key, "%s-", geo->end);
      return 1;
    }

  if (rec == NULL)
    return 0;

  geo->norm = malloc(strcspn(rec, ":") + 1);
  if (sscanf(rec, "%[^:]:%lf:%lf", geo->norm, &geo->lat, &geo->lon) == 3)
    {
      geo->lat = deg2rad(geo->lat);
      geo->lon = deg2rad(geo->lon);
      geo->end += strlen(geo->end);
      geo->err = 0;
    }
  return 0;
}

static int
geo_get_callsign(Geoloc *geo)
{
  return vsearch_map(PKGDATADIR "/callsigns.loc", callsign_fn, geo);
}

/*
 * geo_get_zipcode -- gets the lat and lon for a zip code.  Uses the
 * data from the US Census zip code tabulation area file obtained from
 * http://www.census.gov/ftp/pub/tiger/tms/gazetteer/zcta5.zip using
 *     gzip -dc zcta5.zip | cut -c 3-7,137-156 | 
 *       awk '{printf "%s:%.3f:%.3f\n", $1,$2,$3}' |
 *       sort > zipcodes.loc
 */
static int
zipcode_fn(Geoloc *geo, int pass, char *rec)
{
  static const char *digits = "0123456789";

  if (pass == 0)
    {
      if (strspn(geo->end, digits) != 5)
	{
	  geo_msg(geo, "    Expecting 5 digits.  Not found.\n");
	  return 0;
	}
      else
	{
	  if (geo->key)
	    free(geo->key);
	  geo->key = strdup(geo->end);
	  geo->key[5] = '\0';
	  return 1;
      }
    }

  if (pass == 1 && rec == NULL)
    {
      strcpy(geo->key + 3, "XX");
      return 1;
    }

  if (pass == 2 && rec == NULL)
    {
      strcpy(geo->key + 3, "HH");
      return 1;
    }

  if (rec == NULL)
    return 0;

  geo->norm = malloc(strcspn(rec, ":") + 1);
  if (sscanf(rec, "%[^:]:%lf:%lf", geo->norm, &geo->lat, &geo->lon) == 3)
    {
      geo->lat = deg2rad(geo->lat);
      geo->lon = deg2rad(geo->lon);
      geo->end += 5;
      if (*geo->end == '-' && strspn(geo->end + 1, digits) == 4)
	geo->end += 5;
      geo->err = 0;
    }

  return 0;
}

static int
geo_get_zipcode(Geoloc *geo)
{
  return vsearch_map(PKGDATADIR "/zipcodes.loc", zipcode_fn, geo);
}

static char *state_names =
"AL:Alabama:AK:Alaska:AZ:Arizona:AR:Arkansas:CA:California:Cal:Calif:"
"CO:Colorado:Colo:CT:Connecticut:Conn:DE:Delaware:Del:FL:Florida:GA:Georgia:"
"HI:Hawaii:ID:Idaho:IL:Illinois:Ill:IN:Indiana:Ind:IA:Iowa:KS:Kansas:Kan:"
"KY:Kentucky:Ken:LA:Louisiana:Lou:ME:Maine:MD:Maryland:MA:Massachusetts:Mass:"
"MI:Michigan:Mich:MN:Minnesota:Minn:MS:Mississippi:Miss:MO:Missouri:"
"MT:Montana:NE:Nebraska:Neb:NV:Nevada:Nev:NH:New Hampshire:NJ:New Jersey:"
"NM:New Mexico:NY:New York:NC:North Carolina:ND:North Dakota:OH:Ohio:"
"OK:Oklahoma:OR:Oregon:Ore:PA:Pennsylvania:Penn:RI:Rhode Island:"
"SC:South Carolina:SD:South Dakota:TN:Tennessee:Tenn:TX:Texas:UT:Utah:"
"VT:Vermont:VA:Virginia:WA:Washington:Wash:WV:West Virginia:WI:Wisconsin:"
"WY:Wyoming:DC:District of Columbia:PR:Puerto Rico:";

static int
abbreviate_state(char *key, char **state)
{
  int key_len, name_len, state_off;
  char *name, *code = "";

  key_len = strlen(key);
  name = state_names;

  while (*name)
    {
      name_len = strcspn(name, ":");
      state_off = key_len - name_len - 1;
      if (name_len == 2)
	code = name;

      if (name_len < key_len
	&& (key[state_off - 1] == ' ' || key[state_off - 1] == ',')
	&& strncasecmp(name, key + state_off, name_len + 1) == 0)
	{
	  if (state)
	    *state = code;
	  return state_off;
	}
      name += name_len + 1;
    }

  return 0;
}

/*
 * geo_get_place -- gets the location of a US city based on data from
 * the US Census Bureau's gazetteer of US place names.
 */
static int
place_fn(Geoloc *geo, int pass, const char *rec)
{
  int pop, len;
  char city[50], state[4], *abbrev;

  if (pass == 0)
    {
      if (geo->key)
	free(geo->key);
      geo->key = malloc(strlen(geo->end) + 50);
      sprintf(geo->key, "%s:", geo->end);
      return 1;
    }

  if (pass == 1 && rec == NULL)
    {
      char *state;
      int state_off = abbreviate_state(geo->key, &state);
      if (state_off)
	{
	  geo->key[state_off] = '\0';
	  trimtb(geo->key, ',');
	  sprintf(geo->key + strlen(geo->key), ":%.3s", state);
	}
      return 1;
    }

  if (rec == NULL)
    return 0;

  if (pass == 1)
    {
      int max_pop;
      const char *r, *max_rec;

      r = max_rec = rec;
      sscanf(r, "%[^:]:%2s:%d", city, state, &max_pop);
      while (r > geo->first)
	{
	  r = prev_bol(r, geo->first);
	  if (str_cmp(geo->key, r) != 0)
	    break;

	  sscanf(r, "%[^:]:%2s:%d", city, state, &pop);
	  if (max_pop < pop)
	    {
	      max_pop = pop;
	      max_rec = r;
	    }
	}
      while (r < geo->last)
	{
	  r = next_bol(r, geo->last);
	  if (str_cmp(geo->key, r) != 0)
	    break;

	  sscanf(r, "%[^:]:%2s:%d", city, state, &pop);
	  if (max_pop < pop)
	    {
	      max_pop = pop;
	      max_rec = r;
	    }
	}
      rec = max_rec;
    }

  if (sscanf(rec, "%[^:]:%2s:%d:%lf:%lf",
    city, state, &pop, &geo->lat, &geo->lon) == 5)
    {
      geo->lat = deg2rad(geo->lat);
      geo->lon = deg2rad(geo->lon);
      geo->end += strlen(geo->end);
      strcpy(state + 2, ":");
      abbrev = strstr(state_names, state) + 3;
      len = strcspn(abbrev, ":");
      geo->norm = malloc(strlen(city) + len + 15);
      sprintf(geo->norm, "%s, %.*s (%d)", city, len, abbrev, pop);
      geo->err = 0;
    }

  return 0;
}

static int
geo_get_place(Geoloc *geo)
{
  return vsearch_map(PKGDATADIR "/places2k.loc", place_fn, geo);
}

int
geoloc_from_str(Geoloc *geo, char *str)
{
  struct { int (*geo_fn)(Geoloc *); char *desc; } *fd, fn_desc[] = 
    {
      { geo_get_lat_lon,  "a lat,lon pair" },
      { geo_get_zipcode,  "a zip code" },
      { geo_get_callsign, "a station callsign" },
      { geo_get_place,    "a US city" },
      { NULL, NULL }
    };

  if (str == NULL || *skipbl(str) == '\0')
    {
      geo_msg(geo, "Missing location string\n");
      return geo->err = -1;
    }
  geo->str = strdup(str);
  geo->end = skipbl(geo->str);
  geo->err = -1;

  for (fd = fn_desc; fd->geo_fn && geo->err; fd++)
    {
      geo_msg(geo, "  Looking for %s...\n", fd->desc);
      fd->geo_fn(geo);
    }

  if (*skipbl(geo->end))
    geo_msg(geo, "Remaining: %s\n", geo->end);
  return geo->err == 0;
}

#ifdef MAIN
int
main(int argc, char**argv)
{
  int argn;
  char *arg = strdup("");
  Geoloc *geo = geoloc_new();

  for (argn = 1; argn < argc; argn++)
    {
      arg = realloc(arg, strlen(arg) + strlen(argv[argn]) + 2);
      sprintf(arg + strlen(arg), " %s", argv[argn]);
    }
  arg = skipbl(trimtb(arg, 0));

  printf("Geolocating %s...\n", arg);
  geoloc_from_str(geo, arg);
  printf("%s", geo->msg);
  if (geo->err)
    printf("FAILURE: %d\n", geo->err);
  else
    {
      printf("Found %s ", arg);
      if (geo->norm)
	printf("(%s) ", geo->norm);
      printf("at %+.3f,%+.3f\n", rad2deg(geo->lat), rad2deg(geo->lon));
    }

  geoloc_free(geo);
  return EXIT_SUCCESS;
}
#endif
/*
 * Local Variables:
 * compile-command: "gcc -Wall -g -DMAIN -I. -o geoloc geoloc.c"
 * End:
 */
