//                                               -*- C++ -*-
/**
 *  @file  CovarianceHMatrix.cxx
 *  @brief This class builds a covariance model as an H-Matrix
 *
 *  Copyright 2005-2015 Airbus-EDF-IMACS-Phimeca
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  @author schueller
 *  @date   2012-07-16 15:59:45 +0200 (Mon, 16 Jul 2012)
 */
#include "CovarianceHMatrix.hxx"
#include "OSS.hxx"

#ifdef OPENTURNS_HAVE_HMAT
//  HMat does not use C++ bindings of MPI, but including <mpi.h> with
//  a C++ compiler embeds useless C++ definitions.
#ifndef OMPI_SKIP_MPICXX
# define OMPI_SKIP_MPICXX
#endif
#ifndef MPICH_SKIP_MPICXX
# define MPICH_SKIP_MPICXX 1
#endif

#include <hmat/config.h>
#include <hmat/hmat.h>
#ifdef HMAT_HAVE_STARPU
# include <starpu_engine.hpp>
# define HMatEngine StarPUEngine
#elif defined(HMAT_HAVE_RUNTIME)
# include <runtime_engine.hpp>
# define HMatEngine RuntimeEngine
#else
# define HMatEngine DefaultEngine
#endif

#include <interaction.hpp>
#include <hmat_cpp_interface.hpp>
#include <cstring>
#include <cstdlib>
#include <vector>
#endif

BEGIN_NAMESPACE_OPENTURNS


// PIMPL idiom to not include HMatInterface in CovarianceHMatrix.hxx
struct CovarianceHMatrix::MatImpl
{
#ifdef OPENTURNS_HAVE_HMAT
  HMatInterface<NumericalScalar, HMatEngine> hmat;
  MatImpl(ClusterTree* rows, ClusterTree* cols): hmat(rows, cols) {}
#endif
};

#ifdef OPENTURNS_HAVE_HMAT
// First implementation, by using SimpleAssemblyFunction.
// This is going to be very slow, because each local
// covariance matrix of size 3x3 is computed, but a single
// coefficient is stored.
class CovarianceAssemblyFunction : public SimpleAssemblyFunction<NumericalScalar>
{
private:
  const CovarianceModel & covarianceModel_;
  const NumericalSample & vertices_;
  const UnsignedInteger covarianceDimension_;
  const double epsilon_;

public:
  CovarianceAssemblyFunction(const CovarianceModel & covarianceModel, const NumericalSample & vertices, double epsilon = 0.0)
    : SimpleAssemblyFunction<NumericalScalar>()
    , covarianceModel_(covarianceModel)
    , vertices_(vertices)
    , covarianceDimension_(covarianceModel.getDimension())
    , epsilon_(epsilon) {}

  NumericalScalar interaction(int i, int j) const
  {
    const UnsignedInteger rowIndex(i / covarianceDimension_), columnIndex(j / covarianceDimension_);
    const CovarianceMatrix localCovarianceMatrix(covarianceModel_( vertices_[rowIndex],  vertices_[columnIndex] ));
    const UnsignedInteger rowIndexLocal(i % covarianceDimension_), columnIndexLocal(j % covarianceDimension_);
    return localCovarianceMatrix(rowIndexLocal, columnIndexLocal) + (i != j ? 0.0 : epsilon_);
  }
};

// Second implementation, by using CovarianceBlockAssemblyFunction
// This requires lots of extra stuff

/**
  This structure contains data related to our problem.
*/
class problem_data_t
{
public:
  problem_data_t(const CovarianceModel & covarianceModel, const NumericalSample & vertices, const NumericalScalar epsilon)
    : covarianceModel_(covarianceModel)
    , vertices_(vertices)
    , epsilon_(epsilon) {}

  const CovarianceModel & covarianceModel_;
  const NumericalSample & vertices_;
  const NumericalScalar epsilon_;
};

/**
  This structure is a glue between hmat library and application.
*/
typedef struct
{
  int row_start;
  int row_count;
  int col_start;
  int* row_hmat2client;
  int* row_client2hmat;
  int* col_hmat2client;
  int* col_client2hmat;
  problem_data_t* user_context;
} block_data_t;

/**
  Auxiliary data structure to represente a couple of degrees of freedom.
  This data structure will be sorted by compare_couple_indices so that
  all couples which have the same (point_1,point_2) are stored together.
*/
typedef struct
{
  int point_1;
  int point_2;
  int dim_1;
  int dim_2;
} couple_data_t;

static bool compare_couple_indices (const couple_data_t& couple1, const couple_data_t& couple2)
{
  if (couple1.point_1 == couple2.point_1) return couple1.point_2 < couple2.point_2;
  return couple1.point_1 < couple2.point_1;
}

/**
  See prepare_func prototype in hmat/hmat.h

  \param row_start starting row
  \param row_count number of rows
  \param col_start starting column
  \param col_count number of columns
  \param row_hmat2client renumbered rows -> global row indices mapping
  \param row_client2hmat global row indices -> renumbered rows mapping
  \param col_hmat2client renumbered cols -> global col indices mapping
  \param col_client2hmat global col indices -> renumbered cols mapping
  \param context user provided data
  \param **data pointer to the opaque data related to this block. Has
  to be freed using \a free_block_data().
 */
static void
prepare_block(int row_start,
              int row_count,
              int col_start,
              int col_count,
              int *row_hmat2client,
              int *row_client2hmat,
              int *col_hmat2client,
              int *col_client2hmat,
              void *context,
              void **data)
{
  block_data_t* b_data = (block_data_t*) malloc(sizeof(block_data_t));
  // Copy only needed arguments
  b_data->row_start = row_start;
  b_data->col_start = col_start;
  b_data->row_hmat2client = row_hmat2client;
  b_data->row_client2hmat = row_client2hmat;
  b_data->col_hmat2client = col_hmat2client;
  b_data->col_client2hmat = col_client2hmat;
  b_data->user_context = (problem_data_t*) context;
  *data = b_data;
}

/**
  See compute_func prototype in hmat/hmat.h

  Compute all values of a block and store them into an array,
  which had already been allocated by hmat.  There is no
  padding, all values computed in this block are contiguous,
  and stored in a column-major order.
  This block is not necessarily identical to the one previously
  processed by prepare_hmat, it may be a sub-block.  In fact,
  it is either the full block, or a full column, or a full row.
*/
static void
compute_block(void *data,
              int rowBlockBegin,
              int rowBlockCount,
              int colBlockBegin,
              int colBlockCount,
              void *values)
{
  NumericalScalar *dValues = (NumericalScalar *) values;
  block_data_t* bdata = (block_data_t*) data;
  UnsignedInteger blockSize = rowBlockCount * colBlockCount;
  std::vector<couple_data_t> list_couples(blockSize);
  const NumericalSample & vertices(bdata->user_context->vertices_);
  const UnsignedInteger dimension(bdata->user_context->covarianceModel_.getDimension());
  const NumericalScalar epsilon(bdata->user_context->epsilon_);

  // Fill list_couples
  std::vector<couple_data_t>::iterator couple_it = list_couples.begin();
  for (int j = 0; j < colBlockCount; ++j)
  {
    const int c_dof_e = bdata->col_hmat2client[j + colBlockBegin + bdata->col_start];
    const int c_point_e = c_dof_e / dimension;
    const int c_dim_e   = c_dof_e % dimension;
    for (int i = 0; i < rowBlockCount; ++i, ++couple_it)
    {
      const int r_dof_e = bdata->row_hmat2client[i + rowBlockBegin + bdata->row_start];
      couple_it->point_1 = r_dof_e / dimension;
      couple_it->point_2 = c_point_e;
      couple_it->dim_1 = r_dof_e % dimension;
      couple_it->dim_2 = c_dim_e;
    }
  }
  // Sort couples
  std::sort(list_couples.begin(), list_couples.end(), compare_couple_indices);
  int lastPoint1 = -1, lastPoint2 = -1;
  CovarianceMatrix localCovarianceMatrix;
  const UnsignedInteger firstColumnIndex(colBlockBegin + bdata->col_start);
  const UnsignedInteger firstRowIndex(rowBlockBegin + bdata->row_start);
  const CovarianceModel & covarianceModel(bdata->user_context->covarianceModel_);
  for (std::vector<couple_data_t>::const_iterator cit = list_couples.begin(); cit != list_couples.end(); ++cit)
  {
    const int r_point_e = cit->point_1;
    const int c_point_e = cit->point_2;
    const UnsignedInteger r_dim_e = cit->dim_1;
    const UnsignedInteger c_dim_e = cit->dim_2;
    if (lastPoint1 != r_point_e || lastPoint2 != c_point_e)
    {
      localCovarianceMatrix = covarianceModel( vertices[r_point_e],  vertices[c_point_e]);
      lastPoint1 = r_point_e;
      lastPoint2 = c_point_e;
    }
    const int c_dof_i = bdata->col_client2hmat[dimension * c_point_e + c_dim_e];
    const int r_dof_i = bdata->row_client2hmat[dimension * r_point_e + r_dim_e];
    const int pos = (c_dof_i - firstColumnIndex) * rowBlockCount + (r_dof_i - firstRowIndex);
    dValues[pos] = localCovarianceMatrix(r_dim_e, c_dim_e) + (c_dof_i != r_dof_i ? 0.0 : epsilon);
  }
}

/**
  See free_func prototype in hmat/hmat.h

  Function to free our block_data_t structure.
*/
static void
free_data_block(void *data)
{
  free((block_data_t*) data);
}


class CovarianceBlockAssemblyFunction : public BlockAssemblyFunction<NumericalScalar>
{
public:
  CovarianceBlockAssemblyFunction(const ClusterData* rowData, const ClusterData* colData, void* user_context)
    : BlockAssemblyFunction<NumericalScalar>(rowData, colData, user_context, prepare_block, compute_block, free_data_block) {}
};

#endif

/**
 * @class CovarianceHMatrix
 */

CLASSNAMEINIT(CovarianceHMatrix);

/* Constructor without parameters */
CovarianceHMatrix::CovarianceHMatrix()
  : Object()
  , dimension_(0)
  , isInitialized_(false)
{
  // Nothing to do
}

/* Parameters constructor */
CovarianceHMatrix::CovarianceHMatrix(const CovarianceModel & covarianceModel, const Mesh & mesh, NumericalScalar epsilon)
  : Object()
  , dimension_(covarianceModel.getDimension() * mesh.getVerticesNumber())
  , isInitialized_(false)
{
#ifdef OPENTURNS_HAVE_HMAT
  try
  {
    assembleAndComputeCholesky(covarianceModel, mesh, epsilon);
    isInitialized_ = true;
  }
  catch (InternalException & ex)
  {
    // Warning already printed by assembleAndComputeCholesky
    throw ex;
  }
#else
  throw NotYetImplementedException(HERE) << "OpenTURNS had been compiled without HMat support";
#endif
}

/* Dimension accessor */
UnsignedInteger CovarianceHMatrix::getDimension() const
{
  return dimension_;
}

/* Set regularization factor */
void CovarianceHMatrix::assembleAndComputeCholesky(const CovarianceModel & covarianceModel, const Mesh & mesh, NumericalScalar epsilon)
{
#ifdef OPENTURNS_HAVE_HMAT
  const NumericalSample vertices(mesh.getVertices());
  const UnsignedInteger covarianceDimension(covarianceModel.getDimension());
  HMatInterface<NumericalScalar, HMatEngine>::init();
  HMatSettings& settings = HMatSettings::getInstance();
  const CompressionMethod methods[] = { Svd, AcaFull, AcaPartial, AcaPlus };
  const UnsignedInteger compMethod = ResourceMap::GetAsUnsignedInteger("CovarianceHMatrix-CompressionMethod");
  if (compMethod < sizeof(methods) / sizeof(methods[0]))
    settings.compressionMethod = methods[compMethod];
  else
  {
    LOGWARN( OSS() << "Resource \"CovarianceHMatrix-CompressionMethod\" " << compMethod << " is out-of-bounds, set it to 1");
    settings.compressionMethod = methods[1];
  }
  settings.useLu = false;
  settings.useLdlt = true;
  settings.cholesky = true;
  settings.admissibilityFactor = ResourceMap::GetAsNumericalScalar("CovarianceHMatrix-AdmissibilityFactor");
  settings.assemblyEpsilon = ResourceMap::GetAsNumericalScalar("CovarianceHMatrix-AssemblyEpsilon");
  settings.recompressionEpsilon = ResourceMap::GetAsNumericalScalar("CovarianceHMatrix-RecompressionEpsilon");
  settings.maxLeafSize = ResourceMap::GetAsUnsignedInteger("CovarianceHMatrix-MaxLeafSize");
  settings.maxParallelLeaves = ResourceMap::GetAsUnsignedInteger("CovarianceHMatrix-MaxParallelLeaves");
  settings.setParameters();
  {
    const UnsignedInteger size(covarianceDimension * vertices.getSize());
    DofCoordinate* points = new DofCoordinate[size];
    for (UnsignedInteger i = 0; i < size; ++i)
    {
      points[i].x = vertices[i / covarianceDimension][0];
      points[i].y = vertices[i / covarianceDimension][1];
      points[i].z = vertices[i / covarianceDimension][2];
    }
    ClusterTree* ct = createClusterTree(points, size);
    hmat_ = new MatImpl(ct, ct);
    HMatInterface<NumericalScalar, HMatEngine>* ptrHMat = &(hmat_->hmat);

    try
    {
      // Simple but inefficient variant:
      if (covarianceDimension == 1)
      {
        CovarianceAssemblyFunction simple(covarianceModel, vertices, epsilon);
        // 3rd argument is false, which means that there is no barrier between
        // assembly and factorization
        ptrHMat->assemble(simple, kLowerSymmetric, false);
        ptrHMat->factorize();
      }
      else
      {
        problem_data_t problem_data(covarianceModel, vertices, epsilon);
        CovarianceBlockAssemblyFunction block(&ct->data, &ct->data, &problem_data);
        // 3rd argument is false, which means that there is no barrier between
        // assembly and factorization
        ptrHMat->assemble(block, kLowerSymmetric, false);
        ptrHMat->factorize();
      }

      std::pair<size_t, size_t> compressionRatio = ptrHMat->compressionRatio();
      OSS oss;
      oss << "Compression Ratio = "
          << 100 * ((double) compressionRatio.first) / compressionRatio.second
          << "%";
    }
    catch(...)
    {
      delete[] points;
      throw InternalException(HERE) << "Error in assembly or factorization";
    }
    delete[] points;
  }
#else
  throw NotYetImplementedException(HERE) << "OpenTURNS had been compiled without HMat support";
#endif
}

/* String converter */
String CovarianceHMatrix::__repr__() const
{
  OSS oss(true);
  oss << "class= " << CovarianceHMatrix::GetClassName();
  return oss;
}

/* String converter */
String CovarianceHMatrix::__str__(const String & offset) const
{
  OSS oss(false);
  oss << offset << "class= " << CovarianceHMatrix::GetClassName();
  return oss;
}

NumericalPoint CovarianceHMatrix::gemvLower(const NumericalPoint& x)
{
#ifdef OPENTURNS_HAVE_HMAT
  if (!isInitialized_) throw InternalException(HERE) << "CovarianceHMatrix could not be factorized";

  if (!hmat_) throw InternalException(HERE) << "hmat_ is corrupted";
  HMatInterface<NumericalScalar, HMatEngine>* ptrHMat = &(hmat_->hmat);

  FullMatrix<NumericalScalar> rhs(x.getDimension(), 1);
  FullMatrix<NumericalScalar> lx(x.getDimension(), 1);
  memcpy(rhs.m, &x[0], x.getDimension() * sizeof(NumericalScalar));
  memset(lx.m, 0, x.getDimension() * sizeof(NumericalScalar));
  ptrHMat->gemv('N', 1., rhs, 0., lx);

  Collection<NumericalScalar> result(x.getDimension());
  memcpy(&result[0], lx.m, x.getDimension() * sizeof(NumericalScalar));
  return NumericalPoint(result);
#else
  throw NotYetImplementedException(HERE) << "OpenTURNS had been compiled without HMat support";
#endif
}

END_NAMESPACE_OPENTURNS
