-- local ArtCiteStats = ArtCiteStats
-- local Scorelimit = Scorelimit
-- local MaxXposts = MaxXposts
-- local MaxAge = MaxAge
-- local MaxBytes = MaxBytes
-- local FilterSettings = FilterSettings
-- local KeywordSearches = KeywordSearches
-- local ArchiveMatchers = ArchiveMatchers
-- local ScoredMatches = ScoredMatches
--
-- local fn_headertable_hooks = fn_headertable_hooks
-- local fn_headertxt_hooks = fn_headertxt_hooks
-- local fn_bodytxt_hooks = fn_bodytxt_hooks
-- local fn_headertxt_bodytxt_hooks = fn_headertxt_bodytxt_hooks

module(..., package.seeall)

--[[-------------------------------------------------------------------

lua hooks

testing your filters and settings:

run >>lua<<
enter >>chunk, err = loadfile("scripthooks.lua")<<
enter >>= err<<.  if it says "nil", you go, like, yaknow,
enter >>chunk()<<.  look for error messages, debug prints etc.
note the function-call parenthesis to call the loaded chunk!
(commence testing by calling functions, checking results etc.)
else you got propably the filename of the hooks wrong 8-)

a one line alternative is, from the shell command line:

$ lua -e 'c,e=loadfile("scripthooks.lua"); print(tostring(e))'

if this outputs "nil", you can safey use the file, if any errors are
mentioned, correct them. lua gives very informative error messages, it
should be easy.  to copy it over into the configuration directory,
i use:  >>rsync -avu scripthooks.lua /etc/leafnode/scripthooks.lua<<.

activate debug prints by setting >>s_debug = true<< or
>>s_deb = assert<<.

-------------------------------------------------------------------]]--

--[[--------------------------------------------------------------------

constants you can use in the return table, they are predefined. you
cannot use anything else!

SCRIPT_NOERROR
SCRIPT_REJECT
SCRIPT_NOCHANGE
SCRIPT_NOCHANGE_HEADER
SCRIPT_NOCHANGE_BODY
SCRIPT_UNAVAILABLE
SCRIPT_IGNORE_ARTICLE
SCRIPT_IGNORE_GROUP

SCRIPT_NOCHANGE_BODY, SCRIPT_NOCHANGE_HEADER and SCRIPT_NOCHANGE are
currently treated the same, but this may change in the future.

constants to be used in ln_log(severity, context, message_string):
severities: LNLOG_SCRIT, LNLOG_SERR, LNLOG_SWARNING, LNLOG_SNOTICE,
LNLOG_SINFO, LNLOG_SDEBUG, LNLOG_SMIN contexts: LNLOG_CTOP,
LNLOG_CSERVER, LNLOG_CGROUP, LNLOG_CARTICLE, LNLOG_CALL

if the names are too long for your taste, just define shorter ones:

--------------------------------------------------------------------]]--

--[[--------------------------------------------------------------------
-- defaults in case user doesn't specify them
-------------------------------------------------------------------]]--

spam_ngs = spam_ngs or "local.spam"
reject_ngs = reject_ngs or "local.reject"
archive_ngs_prefix = archive_ngs_prefix or "local.archive"

-- the name of the header where hooks summarize their action
lua_header = lua_header or "X-ln2-luascript"
-- regex with two captures, one for a headers name, one for its value
head_val_regex = head_val_regex or "^([%w_%-]-):%s*(.*)$"
-- regex with one capture describing valid newsgroup names
ng_regex = ng_regex or "%s*([%w_%.%-]+)%s*"
-- filters may change headers
if type(force_headerchange) == "nil" then
    force_headerchange = true
end
-- each filter receives the preceeding filters output as input.
-- set to false if filters get fresh copies of header and body
if type(chain_filters) == "nil" then
    chain_filters = true
end
spam_tag_subject = spam_tag_subject or false

-- this is the bayes filter.
bogofilter = bogofilter or "/usr/local/bin/bogofilter -v"

max_scorelimit = max_scorelimit or 55
max_size = max_size or LN_BODY_SIZE or 55000
reject_cutoff = reject_cutoff or -(max_scorelimit+13)
spam_cutoff = spam_cutoff or -max_scorelimit
max_xposts = max_xposts or 4
maximum_age = maximum_age or 7

see_string_not_pattern = see_string_not_pattern or false

--[[--------------------------------------------------------------------
names of configuration directories.  archive threads are stored in
"ln2_config_dir" named after the current group, with non-alnums in the
name replaced by underscores.
-------------------------------------------------------------------]]--
local_config_prefix = local_config_prefix or "/etc"
ln2_config_dir = ln2_config_dir or (local_config_prefix .. "/leafnode")
arc_thread_store = arc_thread_store or (ln2_config_dir .. "/arc_threads")

--[[--------------------------------------------------------------------
"arc_thread_maxage" is the maximum age a message-ID may have until
expired.  this value should really be group configurable.
-------------------------------------------------------------------]]--
arc_thread_maxage = arc_thread_maxage or (3 * 31 * 24 * 60 * 60)

--[[--------------------------------------------------------------------
these are the operators prefixed to an archiving groups name in table
"KeywordSearches".  they _MUST_ be all of the same length:  two ascii
characters, this is hardcoded in hook_archive().
-------------------------------------------------------------------]]--
arc_plus = arc_plus or " +"
arc_is = arc_is or " ="
arc_plus_threaded = arc_plus_threaded or "t+"
arc_is_threaded = arc_is_threaded or "t="

--[[--------------------------------------------------------------------
the message-IDs for identifying threads via articles References: headers
are listed one MID per line in a file initialized on every fetchnews
run.
-------------------------------------------------------------------]]--
archive_mids = {}
archive_mids_inited = false
archive_mids_updated = false
-- arc_name_time_sep = "_%-=%-_"
arc_name_time_sep = " "
arc_name_time_match = "^(.-)" .. arc_name_time_sep .. "(%d+)$"
arc_mid_last_update = "__very_unlikely_messitsch_ei_di__"

-- debugging with print()
if type(s_debug) == "nil" then
    -- s_debug = false
    s_debug = true
end
-- which function to use instead of assert()
-- s_deb = s_deb or debug_fun
s_deb = s_deb or assert

--[[--------------------------------------------------------------------
-- end_of_user_configurable_values_comma_you_missed_it
-------------------------------------------------------------------]]--

--[[--------------------------------------------------------------------
abbrev names exported from "string" module
-------------------------------------------------------------------]]--
s_find = string.find
s_format = string.format
s_gmatch = string.gmatch
s_gsub = string.gsub
s_len = string.len
s_lower = string.lower
s_match = string.match
s_repeat = string.rep
s_substr = string.sub

--[[--------------------------------------------------------------------
-- these constants _must not_ match any legal RFC header name!
-------------------------------------------------------------------]]--
search_body = search_body or "search_body::ln2lua"
search_header = search_header or "search_header::ln2lua"

--[[--------------------------------------------------------------------
-- abbreviations for the lenghty, but unambiguious backend status codes
-------------------------------------------------------------------]]--
ok = SCRIPT_NOERROR
rej = SCRIPT_REJECT
noch = SCRIPT_NOCHANGE

--[[--------------------------------------------------------------------
-- make select_match() controls readable
-------------------------------------------------------------------]]--
match_best = false
match_list = true

--[[--------------------------------------------------------------------
-- some counters for the "spamminess" statistic
-------------------------------------------------------------------]]--
grand_total = 0
spam_total = 0
ham_total = 0
n_ucase_total = 0
n_lcase_total = 0
n_cites_total = 0
n_origs_total = 0

--[[--------------------------------------------------------------------
-- make cleaned up strings usable in headers
-------------------------------------------------------------------]]--
function filterstring(s)
    local fromto = {
        { "\n", "\\n" },
        { "%s", "." },
    }
    local ss = s
    for _,v in ipairs(fromto) do
        ss = s_gsub(ss, v[1], v[2])
    end
    return ss
end
function shortstring(s, maxlen)
    local slen
    s = s or ""
    slen = s_len(s)
    maxlen = maxlen or 33
    if slen < maxlen then
        maxlen = slen
    end
    return (">>" .. filterstring(s_substr(s, 1, maxlen)) .. "<<")
end
--[[--------------------------------------------------------------------
given a prefix naming a directory and any funny name,
mk_result_filename() returns that directory with a slash and that funny
name appended, but any non-alphanumeric characters in it have been
replaced by underscores.
-------------------------------------------------------------------]]--
function mk_result_filename(directory, any_name)
    local accu = ""
    local weird_sub = "_"
    for part in s_gmatch(any_name, "%a+") do
        accu = accu .. weird_sub .. part
    end
    if s_substr(accu, 1, 1) == weird_sub then
        accu = s_substr(accu, 2, -1)
    end
    return directory .. "/" .. accu
    -- return os.tmpname()
end

-- debugging with print()
-- s_debug = true
-- s_debug = false
function dprint(...)
    if s_debug then
        print(...)
        io.flush()
    end
end
function debug_fun(...)
    return ...
end
-- s_deb = assert
-- s_deb = debug_fun

--[[--------------------------------------------------------------------
my feeble attempt on displaying tables.  circular structures aren't
handled, although lua digests most of them fine.
-------------------------------------------------------------------]]--
function disptab(t, indent)
    local iam = "disptab: "
    local seen = {}
    indent = indent or 0
    local function _dt(tt, ii)
        local prefix = s_repeat(" ", ii)
        local pprefix = ""
        if type(tt) ~= "table" then
            dprint(iam .. "not a table")
            return -1
        end
        if ii > 80 then
            dprint(prefix .. iam .. "indent>80, cycles?")
            return -1
        end
        dprint(prefix .. tostring(tt) .. " {")
        for k,v in pairs(tt) do
            pprefix = s_repeat(" ", ii+2)
            if type(v) == "table" then
                if type(next(v)) == "nil" then
                    dprint(pprefix .. "{},")
                elseif seen[v] then
                    dprint(pprefix .. iam .. "see: " .. tostring(v) .. ",")
                else
                    seen[v] = true
                    --io.output():write(pprefix.."("..type(k)..")["..tostring(k).."]=(")
                    io.stdout:write(pprefix.."("..type(k)..")["..tostring(k).."]=")
                    if _dt(v, ii+2) < 0 then
                        return -1
                    end
                end
            else
                dprint(pprefix.."("..type(k)..")["..tostring(k).."]=("..type(v)..")"..tostring(v)..",")
            end
        end
        dprint(prefix .. "}")
        return ii
    end
    if _dt(t, indent) < 0 then
        dprint(iam .. "error")
    end
end

-- this is for tables with non-int keys/non-arrays: count no. of elements
function num_table_entries(tab)
    local count = 0
    for i, _ in pairs(tab) do
        count = count + 1
    end
    return count
end

--[[--------------------------------------------------------------------
-- central routine for running all functions in a table
-------------------------------------------------------------------]]--
function run_hooks(hooktab, match_type, result_fun, group, article, ...)
    local success, err
    local iam = "run_hooks"
    local groupname = group.name
    local logmess = iam .. ": " .. groupname
    -- _G is a regular table containing all the globals
    local real_hook_tab = _G[hooktab]
    local group_hooks
    local hooks, hook
    local result
    local art_type = type(article)
    --dprint(logmess .. " running hook " .. hooktab)
    if not real_hook_tab or type(real_hook_tab) ~= "table" then
        logmess = logmess .. " no such hook: " .. hooktab
        ln_log(LNLOG_SERR, LNLOG_CARTICLE, logmess)
        return nil, logmess
        -- return article.score
    end
    if art_type ~= "table" then
        logmess = logmess .. " article must be table, got: " .. art_type
        ln_log(LNLOG_SERR, LNLOG_CARTICLE, logmess)
        return nil, logmess
    end
    -- if some hooktab is empty, return a neutral result
    if not next(real_hook_tab) then
        logmess = logmess .. " hook empty: " .. hooktab
        ln_log(LNLOG_SWARNING, LNLOG_CGROUP, logmess)
        return true
    end
    group_hooks = select_match(real_hook_tab, groupname, match_type)
    -- fake a table when match_best returns just one (set of) items
    if match_type == match_best then group_hooks = {{ groupname, group_hooks }} end
    if #group_hooks < 1 or type(group_hooks) ~= "table" then
        logmess = logmess .. " no hooks match: " .. groupname
        ln_log(LNLOG_SWARNING, LNLOG_CGROUP, logmess)
        return true
    end
    for _, hooks in ipairs(group_hooks) do
        --dprint(iam .. " hook has " .. #group_hooks .. " entries.")
        for i, hook in ipairs(hooks[2]) do
            logmess = iam .. ": testing: >>" .. hooks[1] .. "<< hook " .. i
            ln_log(LNLOG_SINFO, LNLOG_CGROUP, logmess)
            local hook_type = type(hook)
            if hook_type == "function" then
                success, result, err = pcall(hook, group, article, ...)
                if success then
                    result_fun(result, article)
                else
                    logmess = iam .. ": hook " .. i .. " failed: "
                    logmess = logmess .. tostring(result or "???")
                    if err then logmess = logmess .. ": " .. err end
                    ln_log(LNLOG_SERR, LNLOG_CARTICLE, logmess)
                end
            else
                logmess = iam .. ": hook " .. i .. " failed: type of hook is: "
                logmess = logmess .. hook_type
                dprint(logmess)
                ln_log(LNLOG_SERR, LNLOG_CARTICLE, logmess)
            end
        end
    end
    return true
end

--[[--------------------------------------------------------------------
genMatch() performs the equivalent of regular expression matching.
luas builtin matcher is a very pragmatic, very fast and very small
implementation doing most of what posix regcomp(3), regexec(3) et.al.
do, but lacks the alternation operator ("|" in most implementations).
you still can use the "|" operator, but it is a simple hack.
eg., parenthesis don't work as expected: a regex may look like
"^some|^string", but not "^(some|string)". also, if you use "|" in
character classes, the feature is switched off. if this were not done,
character classes with "|" in them would break that particular regex.
-------------------------------------------------------------------]]--
function genMatch(subject, regex)
    local iam = "genMatch"
    local logmess = ""
    local ignore_case = true
    local match = false
    if ignore_case then
        subject = s_lower(subject)
        regex = s_lower(regex)
    end
    if s_match(regex, "%[[^|%]]-|[^%]]-%]") then
            match = s_match(subject, regex)
            if match then return match end
    else
        for part in s_gmatch(regex, "[^|]+") do
            --logmess = iam .. ": comparing alternative: " .. shortstring(part)
            --logmess = logmess .. " against subject: " .. shortstring(subject)
            --dprint(logmess)
            match = s_match(subject, part)
            if match then return match end
        end
    end
    return false
end

--[[-------------------------------------------------------------------

all tables are indexed by the group name. the results are usually tables
of functions or simple numbers, depending on the pattern currently run.

actually, select_match() returns lists of tuples (<pattern>, <entry>)
when return_list == match_list, where <pattern> is the group name
matching key and <entry> the configured value, both from the users
table. if you want the actual group name matched instead of what
the user key, you would change the line "intermed[#intermed + 1] =
{match_len, name, result}" into "intermed[#intermed + 1] = {match_len,
test, result}".  this means that users might not find the "wrong key"
when they messed up their config, though.

-------------------------------------------------------------------]]--

-- find the most specific, ie. longest match. if `return_list' is set,
-- return a table, longest matches first.
function select_match(tab, groupname, return_list)
    local iam = "select_match"
    local len_max = 0
    local intermed, res_table = {}, {}
    local the_result
    local match_len = 0
    local match_str = ""
    local test
    for name, result in pairs(tab) do
        test = genMatch(groupname, name)
        if test then
            match_len = s_len(test)
            intermed[#intermed + 1] = {match_len, name, result}
            if match_len > len_max then
                len_max = match_len
                match_str = name
                the_result = result
            end
        end
    end
    --if len_max > 0 then
    if #intermed > 0 then
        local logmess = (iam .. ": result=>" .. match_str .. "< length=" .. len_max)
        --ln_log(LNLOG_SDEBUG, LNLOG_CARTICLE, logmess)
        if return_list then
            table.sort(intermed, function(e1, e2) return e1[1] > e2[1] end)
            for i,v in ipairs(intermed) do
                res_table[#res_table + 1] = {v[2], v[3]}
            end
            return res_table
        else
            return the_result
        end
    else
        return nil
    end
end

--[[-------------------------------------------------------------------

all tables are indexed by the group name. the results are usually tables
of functions, strings or numbers, depending on the pattern currently
run.

The article structure.  It is filled in various hooks:
"fetchnews_headertable" is run when the header is completely scanned
by fetchnews, but not yet the body.  It is used here to set up the
"Article.i_header" array with a complete copy of all the header lines,
indexed by integer numbers.  "Article.s_header" is a derivate, an
association list: you can use it for fast access to headers indexed
by lower-cased strings.  The keys are header names, the values their
text.  Since tables cannot contain duplicate keys, you loose all the
eg. "Received:" lines but one, whereas article.i_header has all the
headers, but indexed by integers starting from one.

"Article.score" accumulates the scores assigned by the various hook
functions.  If the score is positive, the article is stored in its
original groups and perhaps some archiving groups, if it is negativ or
zero, but above the "reject_cutoff" value, it is only filed into the
group configured as "spam_ngs", but if it is at or below the
"reject_cutoff", it is rejected entirely.  You can opt to put it into
the group named in "reject_ngs" until you are sure about your filtering.

"Article_age" keeps the value returned by leafnodes article age function,
currently in days.  This could change to be seconds as the original
C-code is written, but for scoring, counting days is enough.

"Article.header" and "Article.body" contain the original articles header
and body as simple text blobs.

Note that all headers are complete in that indented continuation lines
are folded into one long line each with newlines removed.

"Article.cur_group" points to the group currently worked on.

-------------------------------------------------------------------]]--

Article = {
    score = 0,
    article_age = 0,
    s_header={},
    i_header={},
    header="",
    body="",
    cur_group = {}
}
function emptyArticleData(art)
    local article = art or {}
    article.score = 0
    article.article_age = 0
    article.s_header = {}
    article.i_header = {}
    article.header = ""
    article.body = ""
    article.cur_group = {}
    return article
end

Group = {
    name = "",
    cur_art = {},
    ham = 0, spam = 0,
    n_cites = 0, n_origs = 0,
    n_ucase = 0, n_lcase = 0
}
function emptyGroupData(group)
    local group = group or {}
    group.name = ""
    group.cur_art = {}
    group.ham = 0
    group.spam = 0
    group.n_cites = 0
    group.n_origs = 0
    group.n_ucase = 0
    group.n_lcase = 0
    return group
end

--[[-------------------------------------------------------------------

"addHeader" is the function to use when building or adding an articles
headers as they will appear on disk. set the very last argument to
"true" of you want to replace a header instead.  Only the first matching
header will be replaced, though.

-------------------------------------------------------------------]]--

function addHeader(article, field, value, replace)
    local iam = "addHeader"
    local logmess = iam
    local field_number = 0
    local n_headers = #(article.i_header)
    local idx, header_line
    if replace then
        for idx = 1, n_headers do
            local head, val
            header_line = article.i_header[idx]
            head, val = s_match(header_line, head_val_regex)
            if head and (head == field) then
                article.i_header[idx] = (field .. ": " .. value)
                return true, idx
            end
        end
        field_number = n_headers + 1
        article.i_header[field_number] = (field .. ": " .. value)
        return true, field_number
    else
        field_number = n_headers + 1
        article.i_header[field_number] = (field .. ": " .. value)
        return true, field_number
    end
    return false, 0
end

--[[--------------------------------------------------------------------
tag_header(
article structure,
header with its original (not lowercased!) name,
tag (will always be prepended to current value)
)
-------------------------------------------------------------------]]--
function tag_header(article, header, tag)
    local head = article.s_header[s_lower(header)]
    if tag and (type(tag) == "string") then
        head = tag .. (head or "")
        addHeader(article, header, head, true)
    end
    return true
end

--[[-------------------------------------------------------------------
stringifyHeader() takes an article structure and joins all its headers
into one long, newline separated string to be used as the munged headers
returned to the application.
-------------------------------------------------------------------]]--
function stringifyHeader(tab)
    local iam = "stringifyHeader"
    local logmess = iam
    if not (type(tab) == "table" and tab.i_header) then
        logmess = logmess .. ": " .. type(tab)
        if type(tab) == "table" then
            logmess = logmess .. " tab.i_header: " .. (tab.i_header or "nil")
        end
        ln_log(LNLOG_SERR, LNLOG_CARTICLE, logmess .. ": illegal arguments")
        return nil
    end
    return table.concat(tab.i_header, "\n") .. "\n"
end

--[[--------------------------------------------------------------------
ext_filter() takes a program invocation line (executed by popen(3), ie.
the shell), feeds it both headers and body as long strings and returns
a result in the form specified by <result_fmt>.  it does not interpret
the result in any way, which is up to the caller.  <result_fmt> can be
anything applicable to the lua builtin "read()":

The available formats are

* "*n": reads a number; this is the only format that returns a number
  instead of a string.
* "*a": reads the whole file, starting at the current position. On end
  of file, it returns the empty string.
* "*l": reads the next line (skipping the end of line), returning nil on
  end of file. This is the default format.
* number: reads a string with up to this number of characters, returning
  nil on end of file. If number is zero, it reads nothing and returns an
  empty string, or nil on end of file.

most of the times you will want one line ("*l") for the result of
a classifier such as bogofilter(1), or the entire file ("*a") for the
result of a text filter such as normalizemime(1).
-------------------------------------------------------------------]]--
bogofilter = "/usr/local/bin/bogofilter -v"
function ext_filter(program, result_fmt, headerstr, bodystr)
    local iam = "ext_filter"
    local fh
    local io_result, io_err
    local istart, iend, result = 0, 0, ""
    local prefix = "/tmp"
    local function check_io(handle, status, err)
        -- for status checking, the result/status is nil/false on error
        local st = not status
        if st and io.type(handle) == "file" then
            s_deb(handle:close())
        end
        return st
    end
    local tmp_result = mk_result_filename(prefix, "fn_filter_" .. program) .. ".txt"

    if s_len(program) < 1 then
        program = bogofilter
    end
    local fh, io_err = s_deb(io.popen(program .. " > " .. tmp_result, "w"))
    if check_io(fh, fh, io_err) then return nil, io_err end

    io_result, io_err = s_deb(fh:write(headerstr))
    if check_io(fh, io_result, io_err) then return nil, io_err end

    io_result, io_err = s_deb(fh:write("\n"))
    if check_io(fh, io_result, io_err) then return nil, io_err end

    io_result, io_err = s_deb(fh:write(bodystr))
    if check_io(fh, io_result, io_err) then return nil, io_err end

    io_result, io_err = s_deb(fh:close())
    if check_io(fh, io_result, io_err) then return nil, io_err end

    fh, io_err = s_deb(io.open(tmp_result, "r"))
    if check_io(fh, fh, io_err) then return nil, io_err end

    io_result, io_err = s_deb(fh:read(result_fmt))
    s_deb(fh:close())
    if check_io(fh, io_result, io_err) then return nil, io_err end
    -- io_result, io_err = s_deb(os.remove(tmp_result))
    -- if not check_io(fh, io_result, io_err) then
    --     return nil, io_err
    -- end

    return io_result, io_err
end

--[[--------------------------------------------------------------------
these routines perform the equivalent of regular expression matching.
luas builtin matcher is a very pragmatic implementation doing most of
what posix regcomp(3), regexec(3) et.al. do, but lacks the alternation
operator ("|" in most implementations).  Art() takes an article,
the name of a field to search on and a list <alternatives> of lua
regex' to match against.  The special field names "search_header" and
"search_body" serve to match all of the headers or the body of an
article.  The optional last argument is checked the same way as the
list of alternatives, but matches in alternatives that also match in
<butnot> are rejected.  The meaning of this is:  "Check for matches
in alternatives, bu tnot if they also match in <butnot>".  You can
use the "|" operator, but it is a simple hack. eg., parenthesis don't
work as expected: a regex may look like "^some|^string", but not
"^(some|string)". also, if you use "|" in character classes, the feature
is switched off. if this were not done, character classes with "|" in
them would break that particular regex.

ArtAnd() takes the same arguments, but returns true only if _all_ of the
alternatives match.  it is used rarely, but is needed at times.  Note
that the "and" semantics do not count for the <butnot> list:  if any
member of that list is matched, the entire search pattern is considered
a fail.
-------------------------------------------------------------------]]--
function Art(art, field, alternatives, field_n, butnot)
    local iam = "Art"
    local testfield = ""
    local test_y = ""
    local test_n = ""
    local log_match = ""
    local function mk_searchfield(art, field)
        local test = art.s_header[s_lower(field)]
        if not test then
            if field == search_body then
                test = art.body
            elseif field == search_header then
                test = art.header
            else
                return false
            end
        end
        return test
    end
    testfield = mk_searchfield(art, field)
    if not testfield then return false end
    for _, test_y in ipairs(alternatives) do
        local match = genMatch(testfield, test_y)
        if match then
            if field_n then
                local testfield_n = mk_searchfield(art, field_n)
                for _, test_n in ipairs(butnot or {}) do
                    if genMatch(testfield_n, test_n) then
                        return false
                    end
                end
            end
            if see_string_not_pattern then
                log_match = match
            else
                log_match = test_y
            end
            ln_log(LNLOG_SINFO, LNLOG_CARTICLE,
            iam .. ": " .. shortstring(field, 13) .. " matches " .. shortstring(log_match, 33))
            addHeader(art, lua_header,
            iam .. ": " .. shortstring(field, 13) .. " matches " .. shortstring(log_match, 33))
            return true
        end
    end
    return false
end
function ArtAnd(art, field, alternatives, field_n, butnot)
    local iam = "ArtAnd"
    local testfield = ""
    local disp = ""
    local test_y = ""
    local test_n = ""
    local log_match = ""
    local function mk_searchfield(art, field)
        local test = art.s_header[s_lower(field)]
        if not test then
            if field == search_body then
                test = art.body
            elseif field == search_header then
                test = art.header
            else
                return false
            end
        end
        return test
    end
    testfield = mk_searchfield(art, field)
    if not testfield then return false end
    for _, test_y in ipairs(alternatives) do
        local match = genMatch(testfield, test_y)
        if not match then
            return false
        end
        if see_string_not_pattern then
            log_match = match
        else
            log_match = test_y
        end
        disp = disp .. ", " .. shortstring(log_match, 23)
    end
    if field_n then
        local testfield_n = mk_searchfield(art, field_n)
        for _, test_n in ipairs(butnot or {}) do
            if genMatch(testfield_n, test_n) then
                return false
            end
        end
    end
    disp = s_substr(disp, 3)
    ln_log(LNLOG_SINFO, LNLOG_CARTICLE,
    iam .. ": " .. shortstring(field, 13) .. " matches " .. disp)
    addHeader(art, lua_header,
    (iam .. ": " .. shortstring(field, 13) .. " matches " .. disp))
    return true
end

--[[--------------------------------------------------------------------
hook_htable_age() compares an articles age against per-group
configurable values and scores old articles down a little.
-------------------------------------------------------------------]]--
function hook_htable_age(group, article, htab)
    local iam = "hook_htable_age"
    local testresult = 0
    local logmess = iam .. ": "
    local group_maxage = select_match(MaxAge, group.name)[1]
    local score = select_match(MaxAge, group.name)[2] or spam_cutoff
    if article.article_age > group_maxage then
        testresult = score
        logmess = logmess .. group.name .. " age > " .. group_maxage
    elseif article.article_age > group_maxage-3 then
        testresult = reject_cutoff - spam_cutoff
        logmess = logmess .. group.name .. " age > " .. group_maxage-3
    else
        logmess = logmess .. group.name .. " ok"
    end
    if testresult < 0 then
        ln_log(LNLOG_SNOTICE, LNLOG_CARTICLE, logmess)
        addHeader(article, lua_header, (iam .. ": " .. logmess))
    end
    return testresult
end

--[[-------------------------------------------------------------------
This filter relies on the helper program "normalizemime", which is
meant to do base64, QP and HTML decoding for spam filters.  If you
don't have it, don't use this hook, ie. remove it from the table
"fn_headertxt_bodytxt_hooks".  Also, "normalizemime" seems to output
bodies twice!  Other filters may only output the decoded body, so test
for and remove the header.
-------------------------------------------------------------------]]--
function has_header(s)
    return (s_find(s, "\nFrom:.-\n") or s_find(s, "^From:.-\n") and
           (s_find(s, "\nMessage-ID:.-\n") or s_find(s, "^Message-ID:.-\n")))
end
function hook_base64(group, article, header_string, body_string)
    local iam = "hook_base64"
    local testresult = 0
    local logmess = ""
    local filterprog = "/usr/local/bin/normalizemime"
    local filter_result = ""
    local filterr = ""
    -- the body_delim is filter-specific, this one is normalizemime's
    local body_delim = "\n%-%-%-%-%-%-\n"
    local body_delim_fallback = "\n%s-\n"
    local header = header_string
    local body = body_string
    if chain_filters then
        header = article.header
        body = article.body
    end
    if Art(article, "Content-Transfer-Encoding", {"base64"}) then
        filter_result, filterr = ext_filter(filterprog, "*a", header, body)
        if filter_result then
            if has_header(filter_result) then
                if not s_find(filter_result, body_delim) then
                    body_delim = body_delim_fallback
                end
                article.body = s_gsub(filter_result, "^.-" .. body_delim, "")
            end
            addHeader(article, "Content-Transfer-Encoding", "8bit", true)
            logmess = ": base64-body textified: " .. article.s_header.mid
            testresult = -1
        else
            logmess = ": could not textify base64-body: " .. filterr
            logmess = logmess .. ": " .. article.s_header.mid
        end
        addHeader(article, lua_header, (iam .. logmess))
        ln_log(LNLOG_SWARNING, LNLOG_CARTICLE, logmess)
    end
    return testresult
end

--[[--------------------------------------------------------------------
hook_maxbytes() compares an articles body size against per-group
configurable values and scores articles down when out of bounds.
-------------------------------------------------------------------]]--
function hook_maxbytes(group, article, header_string, body_string)
    local iam = "hook_maxbytes"
    local testresult = 0
    local logmess = iam .. ": " .. group.name
    local slimit = select_match(MaxBytes, group.name)
    --local artsize = s_len(header_string) + s_len(body_string)
    local artsize = s_len(body_string)
    if artsize < slimit[1] or artsize > slimit[2] then
        testresult = spam_cutoff
        logmess = logmess .. " " .. article.s_header.mid
        logmess = logmess .. " size " .. artsize .. " [bytes]"
        logmess = logmess .. " beyond " .. slimit[1] .. "-" ..  slimit[2]
        ln_log(LNLOG_SWARNING, LNLOG_CARTICLE, logmess)
        addHeader(article, lua_header,
        (iam .. ": " .. group.name .. " size limits: " .. slimit[1] .. "-" ..  slimit[2]))
    end
    return testresult
end

--[[-------------------------------------------------------------------
"merge_unique_assoc" returns a table of key, value pairs in both "tab"
and "elements", removing duplicates.  set "overwrite" to have the values
in "elements" dominate.
-------------------------------------------------------------------]]--

function merge_unique_assoc(tab, elements, overwrite)
    local unique_eles = {}
    for k, v in pairs(tab) do
        if not unique_eles[k] then
            unique_eles[k] = v
        end
    end
    for k, v in pairs(elements) do
        if unique_eles[k] then
            if overwrite then
                unique_eles[k] = v
            end
        else
            unique_eles[k] = v
        end
    end
    return unique_eles
end

--[[-------------------------------------------------------------------
"split_unique" returns a table of each capture in "line", described in
"catch_re" and optionally delimited by "delim".
-------------------------------------------------------------------]]--

function split_unique(line, catch_re, delim)
    local cache = {}
    local ary = {}
    local last_or_only = ""
    for alnum in s_gmatch(line, (catch_re .. (delim or ""))) do
        if not cache[alnum] then
            cache[alnum] = 1
            ary[#ary + 1] = alnum
        end
    end
    last_or_only = s_match(line, (catch_re .. "$"))
    if last_or_only and not cache[last_or_only] then
        ary[#ary + 1] = last_or_only
    end
    return ary
end

--[[--------------------------------------------------------------------
we need a workaround for the fact that luas io-module doesn't let us
access "errno", so we test for a specific error message to determine if
we can ignore a file-not-found on read opens.
-------------------------------------------------------------------]]--
function file_exists(groupname, mode, callerid)
    local filename = mk_result_filename(arc_thread_store, groupname) .. ".mids"
    -- this is a workaround: check error message, cause there's no other
    -- way to reliably and portably find out if some file doesn't exist.
    -- local no_exista = "file not found"
    local no_exista = "no such file"
    local hdl, err = io.open(filename, mode)
    if not hdl then
        err = err or ""
        if s_find(s_lower(err), no_exista, 1, true) and s_find(mode, "r") then
            return false
        end
        logmess = callerid .. ": " .. err
        ln_log(LNLOG_SERR, LNLOG_CGROUP, logmess)
        return false
    end
    return hdl
end
function seconds_to_string(seconds)
    local format = "%y%m%d-%H%M%S"
    return os.date(format, seconds)
end
function add_to_arc_mids(mid, time, just_populate)
    local iam = "add_to_arc_mids"
    local logmess = ""
    local now = os.time()
    local arc_time = time or now
    --local time_s = s_format("%d", arc_time)
    local time_s = seconds_to_string(arc_time)
    archive_mids[mid] = arc_time
    if not just_populate then
        archive_mids[arc_mid_last_update] = now
        archive_mids_updated = true
    end
    logmess = iam .. ": added M-ID: " .. mid .. arc_name_time_sep .. time_s
    -- logmess = logmess .. " for group: " .. article.cur_group.name
    dprint(logmess)
    -- ln_log(LNLOG_SINFO, LNLOG_CGROUP, logmess)
    -- addHeader(article, lua_header, logmess)
    return arc_time
end
function check_arc_mid(article)
    local iam = "check_arc_mid"
    local logmess = ""
    local hdrs_checked = { "in-reply-to", "references", "message-id" }
    local refs = ""
    for _, hdr in ipairs(hdrs_checked) do
        if article.s_header[hdr] then
            refs = refs .. hdr
        end
    end
    if (s_len(refs) > 0) then
        for mid in s_gmatch(refs, "<.-@.->") do
            local hit = archive_mids[mid]
            if hit then
                --local time_s = s_format("%d", hit)
                local time_s = seconds_to_string(hit)
                logmess = iam .. ": found M-ID: " .. mid
                logmess = logmess .. arc_name_time_sep .. time_s
                logmess = logmess .. " in group: " .. article.cur_group.name
                ln_log(LNLOG_SINFO, LNLOG_CGROUP, logmess)
                addHeader(article, lua_header, logmess)
                return hit
            end
        end
    end
    return false
end
function populate_arc_mids(group)
    -- NB: beside its name, nothing is known about the group at this time
    local iam = "populate_arc_mids"
    local groupname = group.name
    local hdl = file_exists(groupname, "r", iam)
    archive_mids = {}
    archive_mids_inited = false
    archive_mids_updated = false
    if io.type(hdl) == "file" then -- "closed file", nil
        local now = os.time()
        for line in hdl:lines() do
            local mid, time_s = s_match(line, arc_name_time_match)
            if mid and time_s then
                local time = tonumber(time_s, 10)
                if ((now - time) <= arc_thread_maxage) then
                    add_to_arc_mids(mid, time, true)
                end
            end
        end
        hdl:close()
        archive_mids_inited = true
        return true
    else
        --archive_mids_inited = false
        archive_mids_inited = true
        return false
    end
end
function save_arc_mids(groupname)
    local iam = "save_arc_mids"
    local logmess = ""
    local hdl = false
    local result = false
    if archive_mids_updated then
        hdl = file_exists(groupname, "w", iam)
        if io.type(hdl) == "file" then -- "closed file", nil
            local now = os.time()
            hdl:setvbuf("line") -- for debugging
            -- hdl:setvbuf("full") -- for production
            for mid, time in pairs(archive_mids) do
                if ((now - time) <= arc_thread_maxage) then
                    local time_s = s_format("%d", time)
                    logmess = (mid .. arc_name_time_sep .. time_s)
                    hdl:write(logmess .. "\n")
                    hdl:flush() -- for debugging
                    logmess = iam .. ": M-ID " .. mid .. arc_name_time_sep
                    logmess = logmess .. seconds_to_string(time)
                    dprint(logmess)
                end
            end
            hdl:close()
            result = true
        else
            result = false
        end
    end
    archive_mids_inited = false
    archive_mids_updated = false
    return result
end

--[[-------------------------------------------------------------------

This hook may change the articles "Newsgroups:" header.  Currently,
the user may specify entries in a table called ArchiveMatchers,
with each entry having a newsgroup pattern to match against and
its value being a list of functions.  Each function is run in
turn, expected to test arbitrary article features and returning a
string on a match.  That string should be "+<newsgroup[s]_to_add>"
or "=<replacing_newsgroup[s]>".  Other operators such as
"-<newsgroup[s]_to_add>" (for subtracting newsgroups) are reserved for
future use.

Note that several functions might return the same newsgroup_to_add,
such as when archiving articles originating from the local site, so
newsgroups are added only when not already present.

Note:  If you want to see changed headers, you need to edit the
article.i_header table.  Article.s_header contains lower-cased header
lines for easy access and analysis, but it is an association list.
The keys are header names, the values their text.  Since tables cannot
contain duplicate keys, you loose all the "Received:" lines but one,
whereas article.i_header has all the headers, but indexed by integers
starting from one.

-------------------------------------------------------------------]]--

function archive_keywords(group, article, header_s, body_s)
    local iam = "archive_keywords"
    local logmess = ""
    local test_list = select_match(KeywordSearches, group.name)
    local arc_group = ""
    local i = 0
    local s_pat
    -- local header = header_s
    -- local body = body_s
    -- if chain_filters then
    --     header = article.header
    --     body = article.body
    -- end
    for i, s_pat in ipairs(test_list) do
        arc_group = s_pat[1]
        for idx = 2, #s_pat do
            logmess = iam .. ": item " .. i
            logmess = logmess .. " field " .. shortstring(s_pat[idx][1], 13)
            logmess = logmess .. " for archive " .. arc_group
            ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
            if check_arc_mid(article) then
                return arc_group
            end
            if Art(article, s_pat[idx][1], s_pat[idx][2],
                            s_pat[idx][3], s_pat[idx][4]) then
                return arc_group
            end
        end
    end
    return false
end

--[[--------------------------------------------------------------------

hook_archive() runs all the functions in table "ArchiveMatchers" for
the matching current group name. these functions don't score, but on
match return newsgroup names in the following format: "++newsgroup
[,newsgroup]*" when the user wants to add to the original articles
newsgroups: header, or "==newsgroup[,newsgroup]*" when he wants to
replace that header.  the net-effect of this is redirecting matching
articles to eg. local archiving groups with possibly different expiry
times.

the forms "t+Tnewsgroup[,newsgroup]*" is similar to "++...", but
prepares to have the entire thread archived.

-------------------------------------------------------------------]]--
function hook_archive(group, article, header_s, body_s)
    local iam = "hook_archive"
    local result = true
    local logmess = ""
    --local test_list = select_match(ArchiveMatchers, group.name)
    local idx, test
    local ng_modified = false
    local newsgroups = Article.s_header.newsgroups
    local header = header_s
    local body = body_s
    local function newsgroup_fun(result, article)
        if (result and type(result) == "string") then
            local cmd = s_substr(result, 1, 2)
            local arc_group = s_substr(result, 3, -1)
            local arc_group_valid = (s_len(arc_group) > 0)
            if arc_group_valid then
                if cmd == arc_plus then
                    newsgroups = newsgroups .. "," .. arc_group
                    logmess = iam .. ": added archive ng: " .. arc_group
                elseif cmd == arc_is then
                    newsgroups = arc_group
                    logmess = iam .. ": replaced by archive ng: " .. arc_group
                elseif cmd == arc_plus_threaded then
                    if check_arc_mid(article) then
                        newsgroups = newsgroups .. "," .. arc_group
                        logmess = iam .. ": added (thread) archive ng: " .. arc_group
                    else
                        add_to_arc_mids(article.s_header.mid)
                        newsgroups = newsgroups .. "," .. arc_group
                        logmess = iam .. ": added (thread) archive ng: " .. arc_group
                    end
                elseif cmd == arc_is_threaded then
                    if check_arc_mid(article) then
                        newsgroups = arc_group
                        logmess = iam .. ": replaced (thread) by archive ng: " .. arc_group
                    else
                        add_to_arc_mids(article.s_header.mid)
                        newsgroups = arc_group
                        logmess = iam .. ": replaced (thread) by archive ng: " .. arc_group
                    end
                end
                ng_modified = true
                ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
            end
        end
        return false
    end
    --
    if chain_filters then
        header = article.header
        body = article.body
    end
    result = run_hooks("ArchiveMatchers", match_best, newsgroup_fun, group, article, header_s, body_s)
    if result and ng_modified then
    --if ng_modified then
        newsgroups = table.concat(split_unique(newsgroups, ng_regex, ","), ",")
        Article.s_header.newsgroups = newsgroups
        -- to make change user visible in article
        if force_headerchange then
            addHeader(article, "Newsgroups", newsgroups, true)
        end
        logmess = (iam .. ": Newsgroups: " .. newsgroups)
        -- dprint(logmess)
        ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
        addHeader(article, lua_header, logmess)
    end
    return 0
end

function hook_count_newsgroups(group, article, htable)
    local iam = "hook_count_newsgroups"
    local testresult, limit, score = 0, 0, 0
    local logmess = iam .. ": "
    local cnt=0
    local nxposted, nxp = 0, 0
    local hdrs_checked = { "newsgroups", "followup-to" }
    local xlimit = select_match(MaxXposts, group.name)
    for _, hdr in ipairs(hdrs_checked) do
        if article.s_header[hdr] then
            _, nxp = s_gsub(article.s_header[hdr], "(%,)", "")
            --function(s) cnt = cnt + 1 return cnt end)
            nxposted = nxposted + nxp
        end
    end
    -- dprint(logmess .. "nxposted=" .. nxposted .. ", cnt=" .. cnt)
    -- we're counting commas, newsgroups are one more!
    nxposted = nxposted + 1
    limit = xlimit[1] or max_xposts
    score = xlimit[2] or (reject_cutoff - spam_cutoff)
    if nxposted > limit then
        --testresult = reject_cutoff
        testresult = score
        logmess = logmess .. group.name
        logmess = logmess .. " " .. article.s_header.mid
        logmess = logmess .. " Xpost limit: " .. limit
        ln_log(LNLOG_SWARNING, LNLOG_CARTICLE, logmess)
        addHeader(article, lua_header,
        (iam .. ": " .. group.name .. " Xpost limit: " .. limit))
    end
    return testresult
end

function hook_bayes_filter(group, article, header_string, body_string)
    local iam = "hook_bayes_filter"
    local testresult = 0
    local logmess = iam .. ": "
    local settings = select_match(FilterSettings, group.name)
    local prog
    local filter_result, filterr
    --
    if type(settings) ~= "table" then
        logmess = logmess .. group.name .. " has no FilterSettings"
        ln_log(LNLOG_SNOTICE, LNLOG_CARTICLE, logmess)
        return testresult
    end
    prog = settings.program
    if prog then
        filter_result, filterr = ext_filter(prog, "*l", header_string, body_string)
    else
        filter_result = "no valid settings for group: " .. group.name
    end
    filter_result = filter_result or filterr or "filter prob!"
    -- dprint("filter_result=" .. tostring(filter_result) .. ", filterr=" .. filterr)
    logmess = logmess .. "filter status: " .. filter_result
    if s_match(filter_result, settings["is_spam"]) then
        -- if spam_tag_subject and force_headerchange then
        tag_header(article, "Subject", spam_tag_subject)
        addHeader(article, lua_header, (iam .. ": " .. filter_result))
        testresult = spam_cutoff
    elseif s_match(filter_result, settings["is_ham"]) then
        addHeader(article, lua_header, (iam .. ": " .. filter_result))
    else
        logmess = logmess .. "result: >" .. filter_result .. "< matches neither: >"
        logmess = logmess .. tostring(settings["is_spam"])
        logmess = logmess .. "< nor >" .. tostring(settings["is_ham"]) ..  "<"
    end
    ln_log(LNLOG_SNOTICE, LNLOG_CARTICLE, logmess)
    return testresult
end

-- long strings don't expand backslash escapes
-- should come after bayes, so that bayes sees HTML body
function hook_drop_html(group, article, header_string, body_string)
    local iam = "hook_drop_html"
    local testresult = 0
    local logmess = iam .. ": "
    local htmlpattern = "(\n%-%-.-)\n[Cc]ontent%-[Tt]ype%:%s-[Tt][Ee][Xx][Tt]%/[Hh][Tt][Mm][Ll]%;.-%1"
    local body = body_string
    if chain_filters then
        body = article.body
    end
    --
    if body_string:find(htmlpattern) then
        article.body = s_gsub(body, htmlpattern, "%1")
        logmess = logmess .. "html-body removed"
        ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
        addHeader(article, lua_header, (iam .. ": html-body removed"))
        testresult = reject_cutoff - spam_cutoff
        --testresult = -1
    end
    --
    return testresult
end

function hook_shouting(group, article, body_string)
    local iam = "hook_shouting"
    local testresult = 0
    local logmess = ""
    local lines = tonumber(article.s_header.lines) or 0
    local n_ucase, n_lcase, q = 0, 0, 0
    local threshold = 0.23
    local body = body_string
    if chain_filters then
        body = article.body
    end
    --
    _, n_ucase = s_gsub(body, "%u", "")
    _, n_lcase = s_gsub(body, "%l", "")
    if n_lcase < 1 then n_lcase = 1 end
    group.n_ucase = group.n_ucase + n_ucase
    group.n_lcase = group.n_lcase + n_lcase
    q = n_ucase / n_lcase
    if (q > threshold) and (lines > 7) then
        q = s_format("%.3f", q)
        addHeader(article, lua_header, (iam .. ": upper/lowercase = " .. q))
        --testresult = reject_cutoff - spam_cutoff
        testresult = -1
        logmess = iam .. ": upper/lowercase = " .. q
        ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
    end
    --
    return testresult
end

--[[--
this hook scores articles where non-cited material is very low and the
ratio of cited to non-cited material is very high, which is typically
a sign of mockery, provocation, vandalism or spam.  configure the
"original material" threshold with variable "min_origs", minimum article
length and ratio in global table "ArtCiteStats".
--]]--
function hook_count_cites(group, article, body_string)
    local iam = "hook_count_cites"
    local testresult = 0
    local logmess = ""
    local n_cites, n_cites_html, n_origs, q = 0, 0, 0, 0
    local threshold = 3
    local min_lines = 33
    local min_origs = 5
    local lines = tonumber(article.s_header.lines) or 555333
    local settings = select_match(ArtCiteStats, group.name)
    local score = 0
    local body = body_string
    if chain_filters then
        body = article.body
    end
    --
    threshold = settings[1] or 7
    min_lines = settings[2] or 33
    score = settings[3] or -5
    _, n_cites_html = s_gsub(body, "\n%&gt;", "")
    _, n_cites = s_gsub(body, "\n>", "")
    n_cites = n_cites + n_cites_html
    --_, n_origs = s_gsub(body, "\n[^>]", "")
    n_origs = lines - n_cites
    min_origs = math.ceil(lines/10)
    if min_origs < 5 then min_origs = 5 end
    if n_origs < 1 then n_origs = 1 end
    group.n_cites = group.n_cites + n_cites
    group.n_origs = group.n_origs + n_origs
    if n_origs < min_origs and lines > min_lines then
        q = n_cites / n_origs
        if q > threshold then
            q = s_format("%.3f", q)
            addHeader(article, lua_header, (iam .. ": cites/origs = " .. q))
            testresult = score
            logmess = iam .. ": cites/origs = " .. q
            ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
        end
    end
    --
    return testresult
end

function google_mid(article, score)
    if Art(article, "message-id", {"<[%x_-]+@.+%.googlegroups%.com>"}) then
        return score
    end
    return 0
end

--[[--------------------------------------------------------------------

these definitions are only set if the global variable "SCRIPT_NOERROR"
is _not_ defined, which happens when "scripthooks.lua" is not executed
from inside fetchnews(8).  here you can define anything needed for
debugging of the script when not live.

--------------------------------------------------------------------]]--
if not SCRIPT_NOERROR then
    ln_log = function(_, _, ...) print(...) return nil end
    ln_age = function(...) return 1 end
end

--[[--------------------------------------------------------------------
  lua_getinfo

[-(0|1), +(0|1|2), m]

int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar);

Returns information about a specific function or function invocation.

To get information about a function invocation, the parameter ar must
be a valid activation record that was filled by a previous call to
lua_getstack or given as argument to a hook (see lua_Hook).

To get information about a function you push it onto the stack and start
the what string with the character '>'. (In that case, lua_getinfo pops
the function in the top of the stack.) For instance, to know in which
line a function f was defined, you can write the following code:

     lua_Debug ar;
     lua_getfield(L, LUA_GLOBALSINDEX, "f");  /* get global 'f' */
     lua_getinfo(L, ">S", &ar);
     printf("%d\n", ar.linedefined);

Each character in the string what selects some fields of the structure
ar to be filled or a value to be pushed on the stack:

  * 'n': fills in the field name and namewhat;
  * 'S': fills in the fields source, short_src, linedefined,
         lastlinedefined, and what;
  * 'l': fills in the field currentline;
  * 'u': fills in the field nups;
  * 'f': pushes onto the stack the function that is running at the given level;
  * 'L': pushes onto the stack a table whose indices are the numbers of the
         lines that are valid on the function. (A valid line is a line with
         some associated code, that is, a line where you can put a break point.
         Non-valid lines include empty lines and comments.)

This function returns 0 on error (for instance, an invalid option in
what).

  debug.getinfo ([thread,] function [, what])

Returns a table with information about a function. You can give the
function directly, or you can give a number as the value of function,
which means the function running at level function of the call stack
of the given thread: level 0 is the current function (getinfo itself);
level 1 is the function that called getinfo; and so on. If function is a
number larger than the number of active functions, then getinfo returns
nil.

The returned table may contain all the fields returned by lua_getinfo,
with the string what describing which fields to fill in. The default
for what is to get all information available, except the table of
valid lines. If present, the option 'f' adds a field named func with
the function itself. If present, the option 'L' adds a field named
activelines with the table of valid lines.

For instance, the expression debug.getinfo(1,"n").name returns a table
with a name for the current function, if a reasonable name can be
found, and the expression debug.getinfo(print) returns a table with all
available information about the print function.
--------------------------------------------------------------------]]--

--[[--------------------------------------------------------------------
function declareGlobal(name, initval)
    rawset(_G, name, initval or false)
end
function offender_info(call_level)
    local info = debug.getinfo(call_level, "Sl")
    local logmess = ""
    if info then
        if info.what == "C" then
            logmess = ": call level " .. call_level .. " is a C function"
        else
            logmess = s_format(": line %d: %s", info.currentline, info.short_src)
        end
    else
        logmess = ": could not get problem info"
    end
    return logmess
end
setmetatable(_G, {
    __newindex = function(_, var)
        local logmess = ("lua_global: set undeclared variable " .. var .. offender_info(3))
        -- error("lua_global: attempt to set undeclared variable " .. var, 2)
        -- dprint(logmess)
        ln_log(LNLOG_SERR, LNLOG_CALL, logmess)
        return nil
    end,
    __index = function(_, var)
        logmess = ("lua_global: read undeclared variable " .. var)
        error(logmess, 2)
        return nil
    end,
})
-- from here on you can only access declared variables!

declareGlobal("_")
declareGlobal("fetchnews_init")
declareGlobal("fetchnews_finish")
declareGlobal("fetchnews_init_group")
declareGlobal("fetchnews_finish_group")
declareGlobal("fetchnews_init_article")
declareGlobal("fetchnews_finish_article")
declareGlobal("fetchnews_headertable")
declareGlobal("fetchnews_headertxt")
declareGlobal("fetchnews_headertxt_bodytxt")
--------------------------------------------------------------------]]--

--[[--------------------------------------------------------------------

these are the result fields of the table your script can return. other
fields will be ignored. the "result" field must be present. all fields
except for "result" are optional.

"newsgroups", "body", "header", "result"

a typical result looks like this:
{result=SCRIPT_NOERROR,
 newsgroups="local.spam",
 body=SCRIPT_NOCHANGE,
 header=string}

NB:  you can shoot yourself in the foot with scripting! you can
     completely replace the header of an article and the cached part of
     the body. to be able to recognize your work later on, you should
     just append or prepend legal USENET format strings.

there are a number of hooks you can define, presenting different
interfaces. none of them has to be defined, in that case they are
treated as NOPs. you can add a definition at any time and remove it
again by setting it to the lua special value "nil".

typically, you would analyze headers in "fetchnews_headertxt"
or "fetchnews_headertable" and save away a more suitable one
retaining the inconspicuous ones. return {result=SCRIPT_NOERROR,
header=SCRIPT_NOCHANGE} then. later, in "fetchnews_headertxt_bodytxt"
you get a chance for analyzing deeper and make up your mind. there you
also can send both the complete header plus the cached part of the body
to an external filtering problem. once you know what to do, you might
append the filtering result to the header and return, eg.:

{result=SCRIPT_NOERROR,
 newsgroups=("local.spam" .. "," original_group),
 body=SCRIPT_NOCHANGE,
 header=(original_header .. "\n" .. spam_marker_string)}

note for this to work, you need to have a local group named "local.spam"
defined in "/etc/leafnode/local.groups", otherwise the article won't
show up there and you might even get an error message.

the >>original_group<< can be retrieved in "fetchnews_init_group", which
exists for this reason and in case you want to setup some group-specific
patterns etc.

note also that only a certain amount of the body can be cached due to
memory-constraints. this limit is defined either at installation time or
at runtime through a fetchnews option. if you download large articles
and want to append a site-specific signature, the article might end up
with that signature somewhere in between.

--------------------------------------------------------------------]]--

-- if you need to pull in patterns from some file and build tables from
-- them, rather do this here instead of on every article init.
function fetchnews_init()
    local hostname = LN_HOSTNAME or "LN_HOSTNAME?"
    local logmess = "fetchnews_init("
    logmess = (logmess .. hostname .. "), body_size = " .. LN_BODY_SIZE)
    -- dprint(logmess)
    ln_log(LNLOG_SNOTICE, LNLOG_CALL, logmess)
    return ok
end
function fetchnews_finish(rc)
    local iam = "fetchnews_finish(" .. rc .. ")"
    local logmess
    if n_lcase_total < 1 then n_lcase_total = 1 end
    if n_origs_total < 1 then n_origs_total = 1 end
    logmess = s_format("%s: total=%d, spam=%d/ham=%d, ucase/lcase=%.3f, cite/orig=%.3f",
    iam, grand_total, spam_total, ham_total,
    n_ucase_total/n_lcase_total, n_cites_total/n_origs_total)
    ln_log(LNLOG_SNOTICE, LNLOG_CALL, logmess)
end

--[[--------------------------------------------------------------------

use fetchnews_init_group() to stash away the group name and setup
group specific filters. you can return a special status code named
SCRIPT_IGNORE_GROUP to have the group skipped on download.

--------------------------------------------------------------------]]--
function fetchnews_init_group(name_string)
    local iam = "fetchnews_init_group"
    local logmess = iam .. "(" .. name_string .. ")"
    Group = emptyGroupData(Group)
    Group.name = name_string
    -- dprint(logmess)
    ln_log(LNLOG_SNOTICE, LNLOG_CGROUP, logmess)
    -- return SCRIPT_IGNORE_GROUP
    populate_arc_mids(Group)
    return ok
end

-- use this for de-allocating group specific filter patterns etc.
function fetchnews_finish_group(status_code)
    local iam = "fetchnews_finish_group(" .. status_code.. ")"
    local logmess
    local case_q = 0
    local cite_q = 0
    n_ucase_total = n_ucase_total + Group.n_ucase
    n_lcase_total = n_lcase_total + Group.n_lcase
    n_cites_total = n_cites_total + Group.n_cites
    n_origs_total = n_origs_total + Group.n_origs
    if Group.n_lcase < 1 then Group.n_lcase = 1 end
    if Group.n_ucase < 1 then
        case_q = 0
    else
        case_q = Group.n_ucase / Group.n_lcase
    end
    if Group.n_origs < 1 then Group.n_origs = 1 end
    if Group.n_cites < 1 then
        cite_q = 0
    else
        cite_q = Group.n_cites / Group.n_origs
    end
    logmess = s_format("%s: spam=%d/ham=%d, ucase/lcase=%.3f, cite/orig=%.3f",
    iam, Group.spam, Group.ham, case_q, cite_q)
    ln_log(LNLOG_SNOTICE, LNLOG_CGROUP, logmess)
    save_arc_mids(Group.name)
    return ok
end

-- set up links between current article and current group
function fetchnews_init_article()
    local logmess = ("fetchnews_init_article()")
    -- dprint(logmess)
    ln_log(LNLOG_SNOTICE, LNLOG_CALL, logmess)
    Article = emptyArticleData(Article)
    Group.cur_art = Article
    Article.cur_group = Group
    return ok
end

-- status code tells you if anything went wrong while storing the
-- article, it is defined in "store.c", which see.
function fetchnews_finish_article(status_code)
    local logmess = ("fetchnews_finish_article(" .. status_code .. ")")
    -- dprint(logmess)
    ln_log(LNLOG_SNOTICE, LNLOG_CARTICLE, logmess)
    Article = emptyArticleData(Article)
    grand_total = grand_total + 1
    return ok
end

-- "set score" helper function handed down to run_hooks
function score_fun(delta, article)
    local iam = "score_fun"
    if delta and type(delta) == "number" then
        article.score = article.score + delta
        logmess = iam .. ": score now: " .. article.score
        ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
        return true
    end
    return false
end

--[[--------------------------------------------------------------------

the tables keys are integer numbers. this is because there can be
duplicate article headers, like "Original-Received:" etc. thus you need
to iterate over the table entries and split the values to look at header
names and their values. you don't have to handle continuation lines,
though, headers spilling over several lines in the original become one
long table entry.

--------------------------------------------------------------------]]--
function fetchnews_headertable(header_table)
    local iam = "fetchnews_headertable"
    local decision
    local from, newsgroups, subject
    local logmess = (iam .. " of length " .. #header_table)
    local result = 0
    local err
    ln_log(LNLOG_SNOTICE, LNLOG_CALL, logmess)
    logmess = (Group.name .. ": ")
    for i,v in ipairs(header_table) do
    -- for i,v in pairs(header_table) do
        local head, val
        head, val = s_match(v, head_val_regex)
        if head then
            addHeader(Article, head, val)
            Article.s_header[s_lower(head)] = val
        end
    end
    -- newsgroups = Article.s_header.newsgroups
    newsgroups = Group.name
    from = Article.s_header.from and
           shortstring(s_match(Article.s_header.from, "[<(](.-)[)>]"), 23)
    logmess = (logmess .. "F: " .. from)
    subject = Article.s_header.subject and shortstring(Article.s_header.subject, 49)
    logmess = (logmess .. ", S: " .. subject)
    addHeader(Article, lua_header,
              iam .. " F:" .. from .. " S:" .. subject)
    -- watch out for "arithmetic", ie. dashes in field names like >>Message-ID<<
    Article.s_header.mid = Article.s_header["message-id"] or "bogus-artificial-message-id"
    if Article.s_header.date then
        Article.article_age, err = ln_age(Article.s_header.date)
    end
    Article.article_age = Article.article_age or 0
    logmess = logmess .. ", " .. Article.article_age .. " days old"
    ln_log(LNLOG_SNOTICE, LNLOG_CARTICLE, logmess)
    decision = ok
    if #header_table < 1 then
        dprint("header_table is empty")
        ln_log(LNLOG_SERR, LNLOG_CARTICLE, "header_table is empty")
    end
    run_hooks("fn_headertable_hooks", match_list, score_fun, Group, Article, header_table)
    result = {result=ok}
    return result
end

-- gets the header text as a string
function fetchnews_headertxt(header_string)
    local iam = "fetchnews_headertxt"
    local logmess = (iam .. "(header_string) of length " .. #header_string)
    local result, score = {}, 0
    Article.header = header_string
    result = {result=ok}
    return result
end

--[[--------------------------------------------------------------------

this is an alternative in case you don't want to look at headers and
body separately. you get the text of header and body as strings.

note that you can make modified copies of the strings and return them to
become the _changed_ news spooled versions of the original article. the
original is lost, however, so you might just want to _add_ or remove a
few lines.

this hook allows to change the newsgroups the article is to finally get
stored in.  NB: the newsgroups do not necessarily have to exist, but
the format is strict: "ng1[,ng2,...]", ie. newsgroup names WITHOUT ANY
SPACES AROUND THE COMMAS!

--------------------------------------------------------------------]]--

function fetchnews_headertxt_bodytxt(header_string, body_string)
    local iam = "fetchnews_headertxt_bodytxt"
    local logmess = iam .. ": " .. Group.name
    local newsgroups = ""
    local result, score = {}, 0
    local decision = ok
    local scorelimit = select_match(Scorelimit, Group.name) or max_scorelimit
    logmess = logmess .. " limit " .. scorelimit
    logmess = logmess .. ", header len=" .. s_len(header_string)
    logmess = logmess .. ", body len=" .. s_len(body_string)
    ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
    Article.header = header_string
    Article.body = body_string
    run_hooks("fn_headertxt_hooks", match_list, score_fun, Group, Article, header_string)
    run_hooks("ScoredMatches", match_list, score_fun, Group, Article, body_string)
    run_hooks("fn_bodytxt_hooks", match_list, score_fun, Group, Article, body_string)
    run_hooks("fn_headertxt_bodytxt_hooks", match_list, score_fun, Group, Article, header_string, body_string)
    score = scorelimit + Article.score
    -- headers, especially newsgroups might have been changed by hooks
    newsgroups = Article.s_header.newsgroups
    addHeader(Article, lua_header,
    (iam .. ": " .. Group.name .. ": limit: " .. scorelimit .. ", score: " .. score))
    logmess = iam .. ": " .. Group.name .. ", score " .. score
    if score <= 0 then
        if score <= scorelimit + reject_cutoff then
            if score <= scorelimit + reject_cutoff - 13 then
                decision = rej
                logmess = logmess .. ", rejected"
            else
                newsgroups = reject_ngs
                logmess = logmess .. ", redirected to " .. newsgroups
            end
        else
            newsgroups = spam_ngs
            logmess = logmess .. ", redirected to " .. newsgroups
        end
        spam_total = spam_total + 1
        Group.spam = Group.spam + 1
    else
        ham_total = ham_total + 1
        Group.ham = Group.ham + 1
    end
    ln_log(LNLOG_SINFO, LNLOG_CARTICLE, logmess)
    newsgroups = s_gsub(newsgroups, "%s+", "")
    result = {
        result = decision,
        header = stringifyHeader(Article),
        body = Article.body,
        newsgroups = newsgroups
    }
    return result
end

