# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Mobius Forensic Toolkit
# Copyright (C) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Eduardo Aguiar
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
import pymobius.ant.visited_urls
import mobius
import datetime
import os.path

ANT_ID = 'text-searches'
ANT_NAME = 'Text Searches'
ANT_VERSION = '1.0'

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Load URL search data from url_text_search file
# WWW_QUERIES: (Host, path) -> query variables ('*' = any host)
# WWW_PATHS: Host -> [(start1, end1)...(startn,endn)]
# WWW_FRAGMENTS: (Host, path) -> fragment variables
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
WWW_QUERIES = {}
WWW_PATHS = {}
WWW_FRAGMENTS = {}

app = mobius.core.application ()
path = app.get_data_path ('data/url_text_search.txt')

if os.path.exists (path):
  group = None
  fp = open (path)

  for line in fp:
    line = line.rstrip ()

    if line.startswith ('['):
      group = line[1:-1]

    elif line:

      if group == 'query':
        host, path, var = line.split ('\t')
        WWW_QUERIES.setdefault ((host, path), []).append (var)

      elif group == 'path':
        data = line.split ('\t')
        host = data[0]
        s = data[1]

        if len (data) > 2:
          e = data[2]
        else:
          e = ''

        WWW_PATHS.setdefault (host, []).append ((s, e))

      if group == 'fragment':
        host, path, var = line.split ('\t')
        WWW_FRAGMENTS.setdefault ((host, path), []).append (var)

  fp.close ()

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Text Search class
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
class TextSearch (object):

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Initialize object
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __init__ (self):
    pass

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Parse URL query
# @param q Query
# @return var/value dict
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
def parse_query (q):
  values = {}

  for v in q.split ('&'):
    try:
      name, value = v.split ('=', 1)
      value = value.replace ('+', ' ')
      values[name] = value
    except Exception, e:
      pass
    
  return values

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# @brief Ant: Text Search
# @author Eduardo Aguiar
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
class Ant (object):

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Initialize object
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __init__ (self, item):
    self.id = ANT_ID
    self.name = ANT_NAME
    self.version = ANT_VERSION
    self.__item = item

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Run ant
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def run (self):
    if not self.__item.datasource:
      return

    self.__entries = []
    self.__retrieve_browser_history ()

    self.entries = self.__entries
    self.__save_data ()

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Retrieve data from browser history
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __retrieve_browser_history (self):

    # run ant.visited-urls, if necessary
    if not self.__item.has_ant ('visited-urls'):
      ant = pymobius.ant.visited_urls.Ant (self.__item)
      ant.run ()

    # retrieve data
    for h in self.__item.get_visited_urls ():
      uri = mobius.io.uri (h.url)
      host = uri.get_host ()

      text = self.__retrieve_browser_history_fragment (h) or            \
             self.__retrieve_browser_history_query (h) or               \
             self.__retrieve_browser_history_path (h)

      if text:
        ts = TextSearch ()
        ts.timestamp = h.timestamp
        ts.type = 'web/' + host
        ts.text = text

        ts.metadata = mobius.pod.map ()
        ts.metadata.set ('url', h.url)
        ts.metadata.set ('host', host)

        profile = h.profile
        if profile:
          ts.username = profile.username
          ts.metadata.set ('profile-path', profile.path)
          
          app = profile.application
          if app:
            ts.metadata.set ('application-id', app.id)
            ts.metadata.set ('application-name', app.name)

        self.__entries.append (ts)

      else:
        if 'busca' in h.url.lower () or 'search' in h.url.lower ():
          mobius.core.logf ('DEV ant.text_search (URL): ' + h.url)
        
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Retrieve data from browser history using subpath
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __retrieve_browser_history_path (self, h):
    text = ''

    try:
      uri = mobius.io.uri (h.url)
      host = uri.get_host ()
      path = uri.get_path ()

      for l, r in WWW_PATHS.get (host, []):
        if path.startswith (l):
          if r:
            if path.endswith (r):
              text = path[len (l):-len(r)]
          else:
            pos = path.find ('/', len (l))

            if pos == -1:
              text = path[len (l):]
            else:
              text = path[len (l):pos]

        if text:
          text = text.replace ('+', ' ')
          break
    except Exception, e:
      pass

    return text

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Retrieve data from browser history using query variables
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __retrieve_browser_history_query (self, h):
    text = ''

    try:
      uri = mobius.io.uri (h.url)
      host = uri.get_host ()
      path = uri.get_path ()
      query = uri.get_query ()

      if query and ((host, path) in WWW_QUERIES or ('*', path) in WWW_QUERIES):
        qvars = WWW_QUERIES.get ((host, path)) or WWW_QUERIES.get (('*', path)) or []
        values = parse_query (query)
       
        for var in qvars:
          text = text or values.get (var)
    except Exception, e:
      pass

    return text

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Retrieve data from browser history using fragment variables
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __retrieve_browser_history_fragment (self, h):
    text = ''

    try:
      uri = mobius.io.uri (h.url)
      host = uri.get_host ()
      path = uri.get_path ()
      fragment = uri.get_fragment ()

      if fragment and ((host, path) in WWW_FRAGMENTS or ('*', path) in WWW_FRAGMENTS):
        qvars = WWW_FRAGMENTS.get ((host, path)) or WWW_FRAGMENTS.get (('*', path)) or []
        values = parse_query (fragment)

        for var in qvars:
          text = text or values.get (var)

    except Exception, e:
      pass

    return text

  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  # @brief Save data into model
  # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  def __save_data (self):
    case = self.__item.case
    transaction = case.new_transaction ()

    # remove old data
    self.__item.remove_text_searches ()

    # save text searches
    for ts in self.__entries:
      text_search = self.__item.new_text_search (ts.timestamp, ts.type, ts.text)
      text_search.username = ts.username
      text_search.metadata = ts.metadata

    # set ant run
    self.__item.set_ant (ANT_ID, ANT_NAME, ANT_VERSION)
    transaction.commit ()
