/*==============================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's official duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================
*/
#include <klib/rc.h>
#include <kfs/md5.h>
#include <kfs/bzip.h>
#include <kfs/gzip.h>

#include <stdlib.h>
#include <string.h>

#include "loader-file.h"
#include "debug.h"

#if _DEBUGGING
#   ifndef SRALOADERFILE_TRACING
#       define SRALOADERFILE_TRACING 0
#   endif
#   ifndef SRALOADERFILE_BUFFERSIZE
#       define SRALOADERFILE_BUFFERSIZE (256 * 1024)
#   endif
#else
#   undef SRALOADERFILE_TRACING
#   define SRALOADERFILE_TRACING 0
#   undef SRALOADERFILE_BUFFERSIZE
#   define SRALOADERFILE_BUFFERSIZE (256 * 1024)
#endif

static uint64_t g_total_size = 0;
static uint64_t g_total_read = 0;
static uint8_t g_total_percent = 0;

struct SRALoaderFile
{
    const KDirectory *dir;
    char* filename;
    const DataBlock* data_block;
    const DataBlockFileAttr* file_attr;
    const uint8_t* md5_digest;

    /* current file */
    uint64_t pos;
    const KFile *kfile;
    bool eof;
    uint32_t eol; /* next line start in buffer (next symbol after previously detected eol) */
    uint64_t line_no;

    /* file buffer */
    uint8_t *buffer_pos;
    uint32_t avail;
    uint8_t *buffer;
    size_t buffer_size;
#if _DEBUGGING
    uint32_t small_reads; /* used to detect ineffective reads from file */
#endif
};

static
rc_t SRALoaderFile_Open(SRALoaderFile* self)
{
    rc_t rc = 0;
    const KFile* kf = NULL;
    const uint8_t* md5 = self->md5_digest;
    char* ext = strrchr(self->filename, '.');

    DEBUG_MSG (3, ("%s opening %s\n", __func__, self->filename));
    if( ext == NULL ) {
#if 0
        /* this is asking for a huge fail on a debug message below! */
        ext = "";
#else
        ext = self->filename + strlen (self->filename);
#endif
    }
    if( (rc = KDirectoryOpenFileRead(self->dir, &kf, self->filename)) != 0 ) {
        if( strcmp(ext, ".gz") == 0 || strcmp(ext, ".bz2") == 0 ) {
            DEBUG_MSG (3, ("%s retry opening as %.*s\n", __func__, (int)(ext - self->filename),
                           self->filename));
            rc = KDirectoryOpenFileRead(self->dir, &kf, "%.*s", ext - self->filename, self->filename);
        }
        md5 = NULL;
        ext = "";
    }
    if( rc == 0 && md5 != NULL ) {
        const KFile *md5File = NULL;
        if( (rc = KFileMakeMD5Read(&md5File, kf, self->md5_digest)) == 0) {
            kf = md5File;
        }
    }
    if( rc == 0 ) {
        if( strcmp(ext, ".gz") == 0 ) {
            const KFile *z = NULL;
            if( (rc = KFileMakeGzipForRead(&z, kf)) == 0) {
                KFileRelease(kf);
                kf = z;
            }
        } else if( strcmp(ext, ".bz2") == 0 ) {
            const KFile *z = NULL;
            if( (rc = KFileMakeBzip2ForRead(&z, kf)) == 0) {
                KFileRelease(kf);
                kf = z;
            }
        }
    }
    if( rc != 0 ) {
        PLOGERR(klogErr, (klogErr, rc, "SRALoaderFile opening $(file)", PLOG_S(file), self->filename));
        KFileRelease(kf);
    } else {
        self->kfile = kf;
    }
    return rc;
}

/* Fill
 *  fill buffer as far as possible, shift unread data in buffer to buffer start
 */
static
rc_t SRALoaderFileFill(SRALoaderFile *self)
{
    rc_t rc = 0;

    if (self->kfile == NULL) {
        rc = SRALoaderFile_Open(self);
    }
    if( rc == 0 ) {
        /* determine space in buffer available */
        size_t to_read = self->buffer_size - self->avail;
        if( to_read > 0 ) {
#if _DEBUGGING
            if( to_read < self->buffer_size * 0.5 ) {
                self->small_reads++;
                if( self->small_reads > 10 ) {
                    PLOGMSG(klogWarn, (klogWarn, "$(filename) INEFFECTIVE READING: $(times) times, now $(bytes) bytes",
                        PLOG_3(PLOG_S(filename),PLOG_U32(times),PLOG_U32(bytes)), self->filename, self->small_reads, to_read));
                }
            }
#endif
            /* shift left unread data */
            memmove(self->buffer, self->buffer_pos, self->avail);
            /* skip read chunk in buffer */
            self->pos += self->buffer_pos - self->buffer;
            /* reset pointer */
            self->buffer_pos = self->buffer;
            do { /* fill buffer up to eof */
                size_t num_read = 0;
                if( (rc = KFileRead(self->kfile, self->pos + self->avail,
                                &self->buffer[self->avail], to_read, &num_read)) == 0 ) {
                    self->eof = (num_read == 0);
                    self->avail += num_read;
                    to_read -= num_read;
                    g_total_read += num_read;
#if SRALOADERFILE_TRACING
                    DEBUG_MSG (10, ("SRALoaderFile read %s from %ju %u bytes%s\n",
                                    self->filename, self->pos + self->avail - num_read,
                                    num_read, self->eof ? " EOF" : ""));
#endif
                }
            } while( rc == 0 && to_read > 0 && !self->eof );
        }
    }
    if( rc == 0 ) {
        uint8_t p;
        if( g_total_size > 0 ) {
            p = g_total_read * 100 / g_total_size;
        } else {
            p = 100;
        }
        if( p > g_total_percent && p <= 100 ) {
            g_total_percent = p;
            PLOGMSG(klogInt, (klogInfo, "processed $(percent)%; $(processed) of $(total) bytes",
                "severity=status,processed=%lu,total=%lu,percent=%hu",
                g_total_read, g_total_size, g_total_percent));
        }
    }
    return rc;
}

rc_t SRALoaderFile_IsEof(const SRALoaderFile* cself, bool* eof)
{
    if( cself == NULL || eof == NULL ) {
        return RC(rcSRA, rcFile, rcConstructing, rcParam, rcNull);
    }
    /* end of file is when file is at eof and nothing in buffer or last readline returned the buffer */
    *eof = cself->eof && cself->avail == 0;
    return 0;
}

rc_t SRALoaderFile_LOG(const SRALoaderFile* cself, KLogLevel lvl, rc_t rc, const char *msg, const char *fmt, ...)
{
    if( cself == NULL || (msg == NULL && rc == 0) ) {
        rc = RC(rcRuntime, rcFile, rcAccessing, rcParam, rcInvalid);
        LOGERR(klogErr, rc, __func__);
    } else if( msg == NULL && rc != 0 ) {
        if( cself->line_no == 0 ) {
            PLOGERR(lvl, (lvl, rc, NULL, PLOG_2(PLOG_S(file),PLOG_U64(line)), cself->filename, cself->pos));
        } else {
            PLOGERR(lvl, (lvl, rc, NULL, PLOG_2(PLOG_S(file),PLOG_U64(line)), cself->filename, cself->line_no));
        }
    } else {
        char xfmt[4096];
        va_list args;
        const char* f = fmt ? fmt : "";
        const char* c = fmt ? "," : "";

        va_start(args, fmt);
        if( cself->line_no == 0 ) {
            if( knprintf(xfmt, sizeof(xfmt), "file=%s,offset=%" LU64 "%s%s", cself->filename, cself->pos, c, f) < sizeof(xfmt) ) {
                fmt = xfmt;
            }
        } else {
            if( knprintf(xfmt, sizeof(xfmt), "file=%s,line=%" LU64 "%s%s", cself->filename, cself->line_no, c, f) < sizeof(xfmt) ) {
                fmt = xfmt;
            }
        }
        if( rc == 0 ) {
            VLOGMSG(lvl, (lvl, msg, fmt, args));
        } else {
            VLOGERR(lvl, (lvl, rc, msg, fmt, args));
        }
        va_end(args);
    }
    return rc;
}

rc_t SRALoaderFile_Offset(const SRALoaderFile* cself, uint64_t* offset)
{
    if( cself == NULL || offset == NULL ) {
        return RC(rcSRA, rcFile, rcConstructing, rcParam, rcNull);
    }
    *offset = cself->pos + (cself->buffer_pos - cself->buffer);
    return 0;
}

static
rc_t SRALoaderFileAllocateBuffer(SRALoaderFile *self)
{
    self->buffer_size = SRALOADERFILE_BUFFERSIZE;
    if( (self->buffer = malloc(self->buffer_size)) == NULL ) {
        self->buffer_size = 0;
        return RC(rcSRA, rcFile, rcConstructing, rcMemory, rcExhausted);
    }
    self->buffer[0] = 0;
    self->buffer_pos = self->buffer;
    return 0;
}

rc_t SRALoaderFileReadline(const SRALoaderFile* cself, const void** buffer, size_t* length)
{
    rc_t rc = 0;

    if(cself == NULL || buffer == NULL || length == NULL) {
        rc = RC( rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    } else {
        SRALoaderFile *self = (SRALoaderFile*)cself;
        uint8_t* nl;
        bool refill = true;
        while( rc == 0 ) {
            bool CR_last = false;
            int i, cnt = self->avail - self->eol;
            uint8_t* buf = &self->buffer_pos[self->eol];
            *buffer = buf;
            /* find first eol from current position */
            for(nl = NULL, i = 0; i < cnt && nl == NULL; i++) {
                if(buf[i] == '\n' || buf[i] == '\r') {
                    nl = &buf[i];
                }
            }
            if( !(nl == NULL && refill) ) {
                break;
            }
            refill = false;
            /* none found we need to push out processed portion and load full buffer */
            if( self->eol > 0 ) {
                /* mark that line ended on buffer end and last char in buffer is \r */
                CR_last = self->eol == self->avail && self->buffer_pos[self->eol - 1] == '\r';
                self->buffer_pos += self->eol;
                self->avail -= self->eol;
                self->eol = 0;
            }
            if( (rc = SRALoaderFileFill(self)) == 0 ) {
                if( CR_last && self->buffer_pos[0] == '\n' ) {
                    /* in previous chunk last char was \r and in next chunk 1st char is \n
                    this is \r\n seq split by buffer, need to ignore \n */
                    self->eol++;
                }
            }
        }
        if( rc == 0 ) {
            if( nl == NULL ) {
                self->eol = self->avail;
                *length = self->avail;
                if( self->buffer_size == self->avail ) {
                    /* buffer could be copied and next call will provide tail of line */
                    rc = RC( rcSRA, rcFile, rcReading, rcString, rcTooLong);
                } else if( (rc = SRALoaderFile_IsEof(cself, &refill)) == 0 && refill ) {
                    /* EOF */
                    *buffer = NULL;
                }
            } else {
                *length = nl - (uint8_t*)*buffer;
                self->eol = nl - self->buffer_pos + 1;
                if( *nl == '\r' && nl < &self->buffer[self->buffer_size - 1] && *(nl + 1) == '\n' ) {
                    /* \r\n */
                    self->eol++;
                }
                self->line_no++;
            }
        }
    }
    return rc;
}

rc_t SRALoaderFileRead(const SRALoaderFile* cself, size_t advance, size_t size, const void** buffer, size_t* length)
{
    rc_t rc = 0;

    if (cself == NULL || buffer == NULL || length == NULL ) {
        return RC(rcSRA, rcFile, rcPositioning, rcSelf, rcNull);
    } else {
        SRALoaderFile* self = (SRALoaderFile*)cself;
        if( advance > 0 ) {
            if(advance >= self->avail) {
                self->pos += advance;
                self->avail = 0;
                self->eol = 0;
            } else {
                self->buffer_pos += advance;
                self->avail -= advance;
                self->eol = self->eol > advance ? self->eol - advance : 0;
            }
        }
        if( size > self->avail || self->avail == 0 ) {
            rc = SRALoaderFileFill(self);
        }
        if( rc == 0 ) {
            *buffer = self->buffer_pos;
            *length = self->avail;
            if( self->avail == 0 && self->eof ) {
                *buffer = NULL;
            } else if( size > self->avail ) {
                if( !self->eof ) {
                    rc = RC( rcSRA, rcFile, rcReading, rcBuffer, rcInsufficient);
                }
            } else if( size > 0 ) {
                *length = size;
            }
        }
    }
    return rc;
}

rc_t SRALoaderFileName(const SRALoaderFile *self, const char **name)
{
    if( self == NULL || name == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *name = self->filename;
    return 0;
}

rc_t SRALoaderFileBlockName ( const SRALoaderFile *self, const char **block_name )
{
    if( self == NULL || block_name == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *block_name = self->data_block->name;
    return 0;
}

rc_t SRALoaderFileMemberName(const SRALoaderFile *self, const char **member_name)
{
    if( self == NULL || member_name == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *member_name = self->data_block->member_name;
    return 0;
}

rc_t SRALoaderFileSector( const SRALoaderFile *self, int64_t* sector)
{
    if( self == NULL || sector == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *sector = self->data_block->sector;
    return 0;
}

rc_t SRALoaderFileRegion( const SRALoaderFile *self, int64_t* region)
{
    if( self == NULL || region == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *region = self->data_block->region;
    return 0;
}

rc_t SRALoaderFile_QualityScoringSystem( const SRALoaderFile *self, ExperimentQualityType* quality_scoring_system )
{
    if( self == NULL || quality_scoring_system == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *quality_scoring_system = self->file_attr->quality_scoring_system;
    return 0;
}

rc_t SRALoaderFile_QualityEncoding( const SRALoaderFile *self, ExperimentQualityEncoding* quality_encoding )
{
    if( self == NULL || quality_encoding == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *quality_encoding = self->file_attr->quality_encoding;
    return 0;
}

rc_t SRALoaderFile_AsciiOffset( const SRALoaderFile *self, uint8_t* ascii_offset )
{
    if( self == NULL || ascii_offset == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *ascii_offset = self->file_attr->ascii_offset;
    return 0;
}

rc_t SRALoaderFile_FileType( const SRALoaderFile *self, ERunFileType* filetype )
{
    if( self == NULL || filetype == NULL ) {
        return RC(rcSRA, rcFile, rcAccessing, rcParam, rcNull);
    }
    *filetype = self->file_attr->filetype;
    return 0;
}

rc_t SRALoaderFile_Release(const SRALoaderFile* cself)
{
    rc_t rc = 0;

    if( cself ) {
        SRALoaderFile* self = (SRALoaderFile*)cself;
#if SRALOADERFILE_TRACING 
        DEBUG_MSG (2, ("SRALoaderFile release: '%s'\n", self->filename));
#endif
        /* may return md5 check error here */
        if( (rc = KFileRelease(self->kfile)) == 0 ) {
            PLOGMSG(klogInt, (klogInfo, "file $(file) $(bytes) bytes",
                    "severity=file,file=%s,bytes=%lu",
                    self->filename, self->pos + (self->buffer_pos - self->buffer)));
        }
        KDirectoryRelease(self->dir);
        free(self->filename);
        free(self->buffer);
        free(self);
    }
    return rc;
}

rc_t SRALoaderFile_Make(const SRALoaderFile **file, const KDirectory* dir, char* filename,
                        const DataBlock* block, const DataBlockFileAttr* fileattr, const uint8_t* md5_digest)
{
    rc_t rc = 0;
    SRALoaderFile* obj;

    if( file == NULL || dir == NULL || filename == NULL || block == NULL || fileattr == NULL ) {
        free(filename);
        return RC(rcSRA, rcFile, rcConstructing, rcParam, rcNull);
    }
    *file = NULL;
    if( (obj = calloc(1, sizeof(*obj))) == NULL ) {
        free(filename);
        return RC(rcSRA, rcFile, rcConstructing, rcMemory, rcExhausted);
    }
    obj->filename = filename;
    obj->data_block = block;
    obj->file_attr = fileattr;
    obj->md5_digest = md5_digest;

    if( (rc = KDirectoryAddRef(dir)) == 0 ) {
        obj->dir = dir;
    }
    if( rc == 0 ) {
        rc = SRALoaderFileAllocateBuffer(obj);
    }
    if( rc == 0 ) {
        uint64_t sz = 0;
        *file = obj;
        if( KDirectoryFileSize(dir, &sz, filename) == 0 ) {
            g_total_size += sz;
        }
    } else {
        SRALoaderFile_Release(obj);
    }
    return rc;
}
