/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.codegen;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import jcuda.Pointer;
import org.apache.sysds.hops.codegen.SpoofCompiler;
import org.apache.sysds.runtime.codegen.SpoofCUDACellwise;
import org.apache.sysds.runtime.codegen.SpoofOperator;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.instructions.cp.ScalarObject;
import org.apache.sysds.runtime.instructions.gpu.context.GPUObject;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;

public interface SpoofCUDAOperator {
    public static final int JNI_MAT_ENTRY_SIZE = 40;
    public static final int TRANSFERRED_DATA_HEADER_SIZE = 32;

    public String getName();

    default public void writeMatrixDescriptorToBuffer(ByteBuffer dst, int rows, int cols, long row_ptr, long col_idx_ptr, long data_ptr, long nnz) {
        dst.putLong(nnz);
        dst.putInt(rows);
        dst.putInt(cols);
        dst.putLong(row_ptr);
        dst.putLong(col_idx_ptr);
        dst.putLong(data_ptr);
    }

    default public void prepareMatrixPointers(ByteBuffer buf, ExecutionContext ec, MatrixObject mo, boolean tB1) {
        if (mo.getGPUObject(ec.getGPUContext(0)).isSparse()) {
            this.writeMatrixDescriptorToBuffer(buf, (int)mo.getNumRows(), (int)mo.getNumColumns(), GPUObject.getPointerAddress(ec.getGPUSparsePointerAddress((MatrixObject)mo).rowPtr), GPUObject.getPointerAddress(ec.getGPUSparsePointerAddress((MatrixObject)mo).colInd), GPUObject.getPointerAddress(ec.getGPUSparsePointerAddress((MatrixObject)mo).val), ec.getGPUSparsePointerAddress((MatrixObject)mo).nnz);
        } else if (tB1) {
            int rows = (int)mo.getNumRows();
            int cols = (int)mo.getNumColumns();
            Pointer b1 = mo.getGPUObject(ec.getGPUContext(0)).getDensePointer();
            Pointer ptr = ec.getGPUContext(0).allocate(this.getName(), (long)rows * (long)cols * (long)LibMatrixCUDA.sizeOfDataType, false);
            LibMatrixCUDA.denseTranspose(ec, ec.getGPUContext(0), this.getName(), b1, ptr, rows, cols);
            this.writeMatrixDescriptorToBuffer(buf, rows, cols, 0L, 0L, GPUObject.getPointerAddress(ptr), mo.getNnz());
        } else {
            this.writeMatrixDescriptorToBuffer(buf, (int)mo.getNumRows(), (int)mo.getNumColumns(), 0L, 0L, ec.getGPUDensePointerAddress(mo), mo.getNnz());
        }
    }

    default public void packDataForTransfer(ExecutionContext ec, ArrayList<MatrixObject> inputs, ArrayList<ScalarObject> scalarObjects, MatrixObject out_obj, int num_inputs, int ID, long grix, boolean tB1, Pointer[] ptr) {
        int op_data_size = (inputs.size() + 1) * 40 + scalarObjects.size() * 8 + 32;
        Pointer staging = new Pointer();
        if (SpoofOperator.getNativeStagingBuffer(staging, this.getContext(), op_data_size) != 0) {
            throw new RuntimeException("Failed to get native staging buffer from spoof operator");
        }
        ByteBuffer buf = staging.getByteBuffer();
        buf.putInt(op_data_size);
        buf.putInt(ID);
        buf.putInt((int)grix);
        buf.putInt(num_inputs);
        buf.putInt(inputs.size() - num_inputs);
        buf.putInt(out_obj == null ? 0 : 1);
        buf.putInt(scalarObjects.size());
        buf.putInt(-1);
        for (int i = 0; i < inputs.size(); ++i) {
            if (i == num_inputs) {
                this.prepareMatrixPointers(buf, ec, inputs.get(i), tB1);
                continue;
            }
            this.prepareMatrixPointers(buf, ec, inputs.get(i), false);
        }
        if (out_obj == null) {
            long num_blocks = 1L;
            if (this instanceof SpoofCUDACellwise) {
                int NT = 256;
                long N = inputs.get(0).getNumRows() * inputs.get(0).getNumColumns();
                num_blocks = (N + (long)(NT * 2) - 1L) / (long)(NT * 2);
                ptr[0] = ec.getGPUContext(0).allocate(this.getName(), (long)LibMatrixCUDA.sizeOfDataType * num_blocks, false);
            } else {
                ptr[0] = ec.getGPUContext(0).allocate(this.getName(), (long)LibMatrixCUDA.sizeOfDataType * num_blocks, true);
            }
            this.writeMatrixDescriptorToBuffer(buf, 1, 1, 0L, 0L, GPUObject.getPointerAddress(ptr[0]), 1L);
        } else {
            this.prepareMatrixPointers(buf, ec, out_obj, false);
        }
        for (ScalarObject scalarObject : scalarObjects) {
            buf.putDouble(scalarObject.getDoubleValue());
        }
    }

    public MatrixObject execute(ExecutionContext var1, ArrayList<MatrixObject> var2, ArrayList<ScalarObject> var3, String var4);

    public ScalarObject execute(ExecutionContext var1, ArrayList<MatrixObject> var2, ArrayList<ScalarObject> var3);

    public int execute_dp(long var1);

    public int execute_sp(long var1);

    public long getContext();

    public static abstract class PrecisionProxy {
        protected final long ctx = SpoofCompiler.native_contexts.get((Object)SpoofCompiler.GeneratorAPI.CUDA);

        public abstract int exec(SpoofCUDAOperator var1);
    }
}

