Example Code: C++ and C for Python#

Generate a Cartesian grid of $51times51$ (01_grid.py).#

#!/usr/bin/env python3

from matplotlib import pyplot as plt

# [begin example]
import numpy as np

def make_grid():
    # Number of the grid points.
    nx = 51
    # Produce the x coordinates.
    x = np.linspace(0, 1, nx)
    # Create the two arrays for x and y coordinates of the grid.
    gx, gy = np.meshgrid(x, x)
    # Create the field and zero-initialize it.
    u = np.zeros_like(gx)
    # Set the boundary condition.
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    # Return all values.
    return nx, x, u

nx, x, uoriginal = make_grid()
# [end example]

def show_grid(size):
    fig, ax = plt.subplots(figsize=(size,size))
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_xticks(x, minor=True)
    ax.set_yticks(x, minor=True)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.grid(True, which='major', linewidth=2)
    ax.grid(True, which='minor', linewidth=1)

show_grid(10)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

Solve the Laplace equation using Python nested loops (01_solve_python_loop.py).#

#!/usr/bin/env python3

import numpy as np
from matplotlib import pyplot as plt

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

import time
import numpy as np

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin example]
def solve_python_loop():
    u = uoriginal.copy()  # Input from outer scope
    un = u.copy()  # Create the buffer for the next time step
    converged = False
    step = 0
    # Outer loop.
    while not converged:
        step += 1
        # Inner loops. One for x and the other for y.
        for it in range(1, nx-1):
            for jt in range(1, nx-1):
                un[it,jt] = (u[it+1,jt] + u[it-1,jt] + u[it,jt+1] + u[it,jt-1]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm
# [end example]

with Timer():
    u, step, norm = solve_python_loop()

print(step)

# Run the Python solver
def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

The analytical solution of the Laplace equation (01_solve_analytical.py).#

#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

import numpy as np

# [begin example]
def solve_analytical():
    u = np.empty((len(x), len(x)), dtype='float64')
    for ix, cx in enumerate(x):
        u[ix, :] = np.sinh(np.pi*cx) / np.sinh(np.pi) * np.sin(np.pi*x)
    return u
# [end example]

def solve_python_loop():
    u = uoriginal.copy()  # Input from outer scope
    un = u.copy()  # Create the buffer for the next time step
    converged = False
    step = 0
    # Outer loop.
    while not converged:
        step += 1
        # Inner loops. One for x and the other for y.
        for it in range(1, nx-1):
            for jt in range(1, nx-1):
                un[it,jt] = (u[it+1,jt] + u[it-1,jt] + u[it,jt+1] + u[it,jt-1]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm

u, step, norm = solve_python_loop()

# [begin pycon]
uanalytical = solve_analytical()
# Calculate the L inf norm.
print("Linf of difference is %f" % np.abs(u - uanalytical).max())
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(uanalytical, 0, 0)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

Solve the Laplace equation using numpy (02_solve_array.py).#

#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

import numpy as np

# [begin example]
def solve_array():
    u = uoriginal.copy()  # Input from outer scope
    un = u.copy()  # Create the buffer for the next time step
    converged = False
    step = 0
    while not converged:
        step += 1
        un[1:nx-1,1:nx-1] = (u[2:nx,1:nx-1] + u[0:nx-2,1:nx-1] +
                             u[1:nx-1,2:nx] + u[1:nx-1,0:nx-2]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm
# [end example]

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin pycon]
# Run the Python solver
with Timer():
    u, step, norm = solve_array()
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

Solve the Laplace equation using the C++ code (solve_cpp.cpp).#

#include <pybind11/pybind11.h>
#include <modmesh/buffer/buffer.hpp>
#include <modmesh/buffer/pymod/buffer_pymod.hpp>

#include <vector>
#include <algorithm>
#include <tuple>
#include <iostream>

#include <pybind11/numpy.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>

namespace modmesh
{

namespace python
{

void import_numpy()
{
    auto local_import_numpy = []()
    {
        import_array2("cannot import numpy", false); // or numpy c api segfault.
        return true;
    };
    if (!local_import_numpy())
    {
        throw pybind11::error_already_set();
    }
}

} /* end namespace python */

} /* end namespace modmesh */

template <typename T>
T calc_norm_amax(modmesh::SimpleArray<T> const & arr0, modmesh::SimpleArray<T> const & arr1)
{
    size_t const nelm = arr0.size();
    T ret = 0;
    for (size_t it = 0; it < nelm; ++it)
    {
        T const val = std::abs(arr0[it] - arr1[it]);
        if (val > ret)
        {
            ret = val;
        }
    }
    return ret;
}

// [begin example]
std::tuple<modmesh::SimpleArray<double>, size_t, double>
solve1(modmesh::SimpleArray<double> u)
{
    const size_t nx = u.shape(0);
    modmesh::SimpleArray<double> un = u;
    bool converged = false;
    size_t step = 0;
    double norm;
    while (!converged)
    {
        norm = 0.0;
        ++step;
        for (size_t it=1; it<nx-1; ++it)
        {
            for (size_t jt=1; jt<nx-1; ++jt)
            {
                un(it,jt) = (u(it+1,jt) + u(it-1,jt) + u(it,jt+1) + u(it,jt-1)) / 4;
                double const v = std::abs(un(it,jt) - u(it,jt));
                if (v > norm)
                {
                    norm = v;
                }
            }
        }
        // additional loop slows down: norm = calc_norm_amax(u, un);
        if (norm < 1.e-5) { converged = true; }
        u = un;
    }
    return std::make_tuple(u, step, norm);
}

// Expose the C++ helper to Python
PYBIND11_MODULE(solve_cpp, m)
{
    // Boilerplate for using the SimpleArray with Python
    {
        modmesh::python::import_numpy();
        modmesh::python::wrap_ConcreteBuffer(m);
        modmesh::python::wrap_SimpleArray(m);
    }
    m.def
    (
        "solve_cpp"
      , [](pybind11::array_t<double> & uin)
        {
            auto ret = solve1(modmesh::python::makeSimpleArray(uin));
            return std::make_tuple(
                modmesh::python::to_ndarray(std::get<0>(ret)),
                std::get<1>(ret),
                std::get<2>(ret));
        }
    );
}
// [end example]

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:

The Python driver to solve the Laplace equation using the C++ code (03_solve_cpp.py).#

#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

# [begin example]
import solve_cpp
# [end example]

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin pycon]
with Timer():
    u, step, norm = solve_cpp.solve_cpp(uoriginal)
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

C++ code for least-square regression to polynomial functions (data_prep.cpp).#

#include <pybind11/pybind11.h>
#include <modmesh/buffer/buffer.hpp>
#include <modmesh/buffer/pymod/buffer_pymod.hpp>

#include <pybind11/numpy.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>

#ifdef HASMKL
#include <mkl_lapack.h>
#include <mkl_lapacke.h>
#else // HASMKL
#ifdef __MACH__
#include <clapack.h>
#include <Accelerate/Accelerate.h>
#endif // __MACH__
#endif // HASMKL

#include <vector>
#include <algorithm>

namespace modmesh
{

namespace python
{

void import_numpy()
{
    auto local_import_numpy = []()
    {
        import_array2("cannot import numpy", false); // or numpy c api segfault.
        return true;
    };
    if (!local_import_numpy())
    {
        throw pybind11::error_already_set();
    }
}

} /* end namespace python */

} /* end namespace modmesh */

modmesh::SimpleArray<double> solve
(
    modmesh::SimpleArray<double> const & sarr
  , modmesh::SimpleArray<double> const & rhs
)
{
    // Populate the column major input by transposing
    modmesh::SimpleArray<double> mat(
        std::vector<size_t>{sarr.shape(1), sarr.shape(0)});
    for (size_t i = 0; i < mat.shape(0); ++i)
    {
        for (size_t j = 0; j < mat.shape(1); ++j)
        {
            mat(i, j) = sarr(j, i);
        }
    }

    modmesh::SimpleArray<double> b = rhs;
    int n = rhs.size();
    modmesh::SimpleArray<int> ipiv(n);

    int status;
    int nn = mat.shape(0);
    int bncol = 1;
    int bnrow = b.shape(0);
    int matnrow = mat.shape(1);

    // FIXME: This call is not yet validated. I am not sure about the
    // correctness of the solution.
    dgesv_( // column major.
        &nn // int * n: number of linear equation
      , &bncol // int * nrhs: number of RHS
      , mat.data() // double * a: array (lda, n)
      , &matnrow // int * lda: leading dimension of array a
      , ipiv.data() // int * ipiv: pivot indices
      , b.data() // double * b: array (ldb, nrhs)
      , &bnrow // int * ldb: leading dimension of array b
      , &status // for column major matrix, ldb remains the leading dimension.
    );

    return b;
}

// [begin example: single fit]
/**
 * This function calculates the least-square regression of a point cloud to a
 * polynomial function of a given order.
 */
modmesh::SimpleArray<double> fit_poly
(
    modmesh::SimpleArray<double> const & xarr
  , modmesh::SimpleArray<double> const & yarr
  , size_t start
  , size_t stop
  , size_t order
)
{
    if (xarr.size() != yarr.size())
    {
        throw std::runtime_error("xarr and yarr size mismatch");
    }

    // The rank of the linear map is (order+1).
    modmesh::SimpleArray<double> matrix(std::vector<size_t>{order+1, order+1});

    // Use the x coordinates to build the linear map for least-square
    // regression.
    for (size_t it=0; it<order+1; ++it)
    {
        for (size_t jt=0; jt<order+1; ++jt)
        {
            double & val = matrix(it, jt);
            val = 0;
            for (size_t kt=start; kt<stop; ++kt)
            {
                val += pow(xarr[kt], it+jt);
            }
        }
    }

    // Use the x and y coordinates to build the vector in the right-hand side
    // of the linear system.
    modmesh::SimpleArray<double> rhs(std::vector<size_t>{order+1});
    for (size_t jt=0; jt<order+1; ++jt)
    {
        rhs[jt] = 0;
        for (size_t kt=start; kt<stop; ++kt)
        {
            rhs[jt] += pow(xarr[kt], jt) * yarr[kt];
        }
    }

    // Solve the linear system for the least-square minimization.
    modmesh::SimpleArray<double> lhs = solve(matrix, rhs);
    std::reverse(lhs.begin(), lhs.end()); // to make numpy.poly1d happy.

    return lhs;
}
// [end example: single fit]

// [begin example: multiple fit]
/**
 * This function calculates the least-square regression of multiple sets of
 * point clouds to the corresponding polynomial functions of a given order.
 */
modmesh::SimpleArray<double> fit_polys
(
    modmesh::SimpleArray<double> const & xarr
  , modmesh::SimpleArray<double> const & yarr
  , size_t order
)
{
    size_t xmin = std::floor(*std::min_element(xarr.begin(), xarr.end()));
    size_t xmax = std::ceil(*std::max_element(xarr.begin(), xarr.end()));
    size_t ninterval = xmax - xmin;

    modmesh::SimpleArray<double> lhs(std::vector<size_t>{ninterval, order+1});
    std::fill(lhs.begin(), lhs.end(), 0); // sentinel.
    size_t start=0;
    for (size_t it=0; it<xmax; ++it)
    {
        // NOTE: We take advantage of the intrinsic features of the input data
        // to determine the grouping.  This is ad hoc and hard to maintain.  We
        // play this trick to demonstrate a hackish way of performing numerical
        // calculation.
        size_t stop;
        for (stop=start; stop<xarr.size(); ++stop)
        {
            if (xarr[stop]>=it+1) { break; }
        }

        // Use the single polynomial helper function.
        auto sub_lhs = fit_poly(xarr, yarr, start, stop, order);
        for (size_t jt=0; jt<order+1; ++jt)
        {
            lhs(it, jt) = sub_lhs[jt];
        }

        start = stop;
    }

    return lhs;
}
// [end example: multiple fit]

// [begin example: wrapping]
/**
 * The pybind11 wrapper for the helper functions for polynomial fitting.
 */
PYBIND11_MODULE(data_prep, m)
{
    // Boilerplate for using the SimpleArray with Python
    {
        modmesh::python::import_numpy();
        modmesh::python::wrap_ConcreteBuffer(m);
        modmesh::python::wrap_SimpleArray(m);
    }

    m.def
    (
        "fit_poly"
      , []
        (
            pybind11::array_t<double> & xarr_in
          , pybind11::array_t<double> & yarr_in
          , size_t order
        )
        {
            auto xarr = modmesh::python::makeSimpleArray(xarr_in);
            auto yarr = modmesh::python::makeSimpleArray(yarr_in);
            auto ret = fit_poly(xarr, yarr, 0, xarr.size(), order);
            return modmesh::python::to_ndarray(ret);
        }
    );
    m.def
    (
        "fit_polys"
      , []
        (
            pybind11::array_t<double> & xarr_in
          , pybind11::array_t<double> & yarr_in
          , size_t order
        )
        {
            auto xarr = modmesh::python::makeSimpleArray(xarr_in);
            auto yarr = modmesh::python::makeSimpleArray(yarr_in);
            auto ret = fit_polys(xarr, yarr, order);
            return modmesh::python::to_ndarray(ret);
        }
    );
}
// [end example: wrapping]

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:

The Python driver to the least-square regression using the C++ code (04_fit_poly.py).#

#!/usr/bin/env python3

import numpy as np
from matplotlib import pyplot as plt

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

print("prep")
# [begin prep pycon]
with Timer():
    # np.unique returns a sorted array.
    xdata = np.unique(np.random.sample(1000000) * 1000)
    ydata = np.random.sample(len(xdata)) * 1000
# [end prep pycon]

# [begin fit_poly pycon]
import data_prep

plt.rc('figure', figsize=(12, 8))

def plot_poly_fitted(i):
    slct = (xdata>=i)&(xdata<(i+1))
    sub_x = xdata[slct]
    sub_y = ydata[slct]
    poly = data_prep.fit_poly(sub_x, sub_y, 3)
    print(poly)
    poly = np.poly1d(poly)
    xp = np.linspace(sub_x.min(), sub_x.max(), 100)
    plt.plot(sub_x, sub_y, '.', xp, poly(xp), '-')

plot_poly_fitted(10)
# [end fit_poly pycon]

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

print("lumped")
# [begin lumped pycon]
with Timer():
    # Do the calculation for the 1000 groups of points.
    polygroup = np.empty((1000, 3), dtype='float64')
    for i in range(1000):
        # Use numpy to build the point group.
        slct = (xdata>=i)&(xdata<(i+1))
        sub_x = xdata[slct]
        sub_y = ydata[slct]
        polygroup[i,:] = data_prep.fit_poly(sub_x, sub_y, 2)
# [end lumped pycon]

print("point build")
# [begin point build pycon]
with Timer():
    # Using numpy to build the point groups takes a lot of time.
    data_groups = []
    for i in range(1000):
        slct = (xdata>=i)&(xdata<(i+1))
        data_groups.append((xdata[slct], ydata[slct]))
# [end point build pycon]

print("fitting")
# [begin fitting pycon]
with Timer():
    # Fitting helper runtime is much less than building the point groups.
    polygroup = np.empty((1000, 3), dtype='float64')
    for it, (sub_x, sub_y) in enumerate(data_groups):
        polygroup[it,:] = data_prep.fit_poly(sub_x, sub_y, 2)
# [end fitting pycon]

print("batch")
# [begin batch pycon]
with Timer():
    rbatch = data_prep.fit_polys(xdata, ydata, 2)
# [end batch pycon]

print(rbatch.shape)
# Verify batch.
assert len(rbatch) == len(polygroup)
for i in range(1000):
    assert (rbatch[i] == polygroup[i]).all()

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:

ConcreteBuffer for memory control (ConcreteBuffer.hpp). See https://github.com/solvcon/modmesh for how it works in an application.#

#pragma once

/*
 * Copyright (c) 2019, Yung-Yu Chen <yyc@solvcon.net>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * - Neither the name of the copyright holder nor the names of its contributors
 *   may be used to endorse or promote products derived from this software
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <modmesh/buffer/small_vector.hpp>

#include <stdexcept>
#include <memory>
#include <algorithm>
#include <sstream>

namespace modmesh
{

namespace detail
{

// Take the remover and deleter classes outside ConcreteBuffer to work around
// https://bugzilla.redhat.com/show_bug.cgi?id=1569374

/**
 * The base class of memory deallocator for ConcreteBuffer.  When the object
 * exists in ConcreteBufferDataDeleter (the unique_ptr deleter), the deleter
 * calls it to release the memory of the ConcreteBuffer data buffer.
 */
struct ConcreteBufferRemover
{

    ConcreteBufferRemover() = default;
    ConcreteBufferRemover(ConcreteBufferRemover const &) = default;
    ConcreteBufferRemover(ConcreteBufferRemover &&) = default;
    ConcreteBufferRemover & operator=(ConcreteBufferRemover const &) = default;
    ConcreteBufferRemover & operator=(ConcreteBufferRemover &&) = default;
    virtual ~ConcreteBufferRemover() = default;

    // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays,readability-non-const-parameter)
    virtual void operator()(int8_t * p) const
    {
        // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
        delete[] p;
    }

}; /* end struct ConcreteBufferRemover */

struct ConcreteBufferNoRemove : public ConcreteBufferRemover
{

    // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays,readability-non-const-parameter)
    void operator()(int8_t *) const override {}

}; /* end struct ConcreteBufferNoRemove */

struct ConcreteBufferDataDeleter
{

    using remover_type = ConcreteBufferRemover;

    ConcreteBufferDataDeleter(ConcreteBufferDataDeleter const &) = delete;
    ConcreteBufferDataDeleter & operator=(ConcreteBufferDataDeleter const &) = delete;

    ConcreteBufferDataDeleter() = default;
    ConcreteBufferDataDeleter(ConcreteBufferDataDeleter &&) = default;
    ConcreteBufferDataDeleter & operator=(ConcreteBufferDataDeleter &&) = default;
    ~ConcreteBufferDataDeleter() = default;
    explicit ConcreteBufferDataDeleter(std::unique_ptr<remover_type> && remover_in)
        : remover(std::move(remover_in))
    {
    }

    // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays,readability-non-const-parameter)
    void operator()(int8_t * p) const
    {
        if (!remover)
        {
            // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
            delete[] p;
        }
        else
        {
            (*remover)(p);
        }
    }

    std::unique_ptr<remover_type> remover{nullptr};

}; /* end struct ConcreteBufferDataDeleter */

} /* end namespace detail */

/**
 * Untyped and unresizeable memory buffer for contiguous data storage.
 */
class ConcreteBuffer
    : public std::enable_shared_from_this<ConcreteBuffer>
{

private:

    struct ctor_passkey
    {
    };

    using data_deleter_type = detail::ConcreteBufferDataDeleter;

public:

    using remover_type = detail::ConcreteBufferRemover;

    static std::shared_ptr<ConcreteBuffer> construct(size_t nbytes)
    {
        return std::make_shared<ConcreteBuffer>(nbytes, ctor_passkey());
    }

    /*
     * This factory method is dangerous since the data pointer passed in will
     * not be owned by the ConcreteBuffer created.  It is an error if the
     * number of bytes of the externally owned buffer doesn't match the value
     * passed in (but we cannot know here).
     */
    static std::shared_ptr<ConcreteBuffer> construct(size_t nbytes, int8_t * data, std::unique_ptr<remover_type> && remover)
    {
        return std::make_shared<ConcreteBuffer>(nbytes, data, std::move(remover), ctor_passkey());
    }

    static std::shared_ptr<ConcreteBuffer> construct(size_t nbytes, void * data, std::unique_ptr<remover_type> && remover)
    {
        return construct(nbytes, static_cast<int8_t *>(data), std::move(remover));
    }

    static std::shared_ptr<ConcreteBuffer> construct() { return construct(0); }

    std::shared_ptr<ConcreteBuffer> clone() const
    {
        std::shared_ptr<ConcreteBuffer> ret = construct(nbytes());
        std::copy_n(data(), size(), (*ret).data());
        return ret;
    }

    /**
     * \param[in] nbytes
     *      Size of the memory buffer in bytes.
     */
    ConcreteBuffer(size_t nbytes, const ctor_passkey &)
        : m_nbytes(nbytes)
        , m_data(allocate(nbytes))
    {
    }

    /**
     * \param[in] nbytes
     *      Size of the memory buffer in bytes.
     * \param[in] data
     *      Pointer to the memory buffer that is not supposed to be owned by
     *      this ConcreteBuffer.
     * \param[in] remover
     *      The memory deallocator for the unowned data buffer passed in.
     */
    // NOLINTNEXTLINE(readability-non-const-parameter)
    ConcreteBuffer(size_t nbytes, int8_t * data, std::unique_ptr<remover_type> && remover, const ctor_passkey &)
        : m_nbytes(nbytes)
        , m_data(data, data_deleter_type(std::move(remover)))
    {
    }

    ~ConcreteBuffer() = default;

    ConcreteBuffer() = delete;
    ConcreteBuffer(ConcreteBuffer &&) = delete;
    ConcreteBuffer & operator=(ConcreteBuffer &&) = delete;
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wextra"
#endif
    // Avoid enabled_shared_from_this copy constructor
    // NOLINTNEXTLINE(bugprone-copy-constructor-init)
    ConcreteBuffer(ConcreteBuffer const & other)
        : m_nbytes(other.m_nbytes)
        , m_data(allocate(other.m_nbytes))
    {
        if (size() != other.size())
        {
            throw std::out_of_range("Buffer size mismatch");
        }
        std::copy_n(other.data(), size(), data());
    }
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
    ConcreteBuffer & operator=(ConcreteBuffer const & other)
    {
        if (this != &other)
        {
            if (size() != other.size())
            {
                throw std::out_of_range("Buffer size mismatch");
            }
            std::copy_n(other.data(), size(), data());
        }
        return *this;
    }

    explicit operator bool() const noexcept { return bool(m_data); }

    size_t nbytes() const noexcept { return m_nbytes; }
    size_t size() const noexcept { return nbytes(); }

    using iterator = int8_t *;
    using const_iterator = int8_t const *;

    iterator begin() noexcept { return data(); }
    iterator end() noexcept { return data() + size(); }
    const_iterator begin() const noexcept { return data(); }
    const_iterator end() const noexcept { return data() + size(); }
    const_iterator cbegin() const noexcept { return begin(); }
    const_iterator cend() const noexcept { return end(); }

    int8_t operator[](size_t it) const { return data(it); }
    int8_t & operator[](size_t it) { return data(it); }

    int8_t at(size_t it) const
    {
        validate_range(it);
        return data(it);
    }
    int8_t & at(size_t it)
    {
        validate_range(it);
        return data(it);
    }

    /* Backdoor */
    int8_t data(size_t it) const { return data()[it]; }
    int8_t & data(size_t it) { return data()[it]; }
    int8_t const * data() const noexcept { return data<int8_t>(); }
    int8_t * data() noexcept { return data<int8_t>(); }
    // clang-format off
    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
    template <typename T> T const * data() const noexcept { return reinterpret_cast<T *>(m_data.get()); }
    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
    template <typename T> T * data() noexcept { return reinterpret_cast<T *>(m_data.get()); }
    // clang-format on

    bool has_remover() const noexcept { return bool(m_data.get_deleter().remover); }
    remover_type const & get_remover() const { return *m_data.get_deleter().remover; }
    remover_type & get_remover() { return *m_data.get_deleter().remover; }

    // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
    using unique_ptr_type = std::unique_ptr<int8_t, data_deleter_type>;

    void validate_range(size_t it) const
    {
        if (it >= size())
        {
            std::ostringstream ms;
            ms << "ConcreteBuffer: index " << it << " is out of bounds with size " << size();
            throw std::out_of_range(ms.str());
        }
    }

    static unique_ptr_type allocate(size_t nbytes)
    {
        unique_ptr_type ret(nullptr, data_deleter_type());
        if (0 != nbytes)
        {
            ret = unique_ptr_type(new int8_t[nbytes], data_deleter_type());
        }
        return ret;
    }

    size_t m_nbytes;
    unique_ptr_type m_data;

}; /* end class ConcreteBuffer */

} /* end namespace modmesh */

/* vim: set et ts=4 sw=4: */

SimpleArray for multi-dimensional array (SimpleArray.hpp). See https://github.com/solvcon/modmesh for how it works in an application.#

#pragma once

/*
 * Copyright (c) 2019, Yung-Yu Chen <yyc@solvcon.net>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * - Neither the name of the copyright holder nor the names of its contributors
 *   may be used to endorse or promote products derived from this software
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <modmesh/buffer/ConcreteBuffer.hpp>

#include <stdexcept>

#if defined(_MSC_VER)
#include <BaseTsd.h>
typedef SSIZE_T ssize_t;
#endif

namespace modmesh
{

namespace detail
{

template <size_t D, typename S>
size_t buffer_offset_impl(S const &)
{
    return 0;
}

template <size_t D, typename S, typename Arg, typename... Args>
size_t buffer_offset_impl(S const & strides, Arg arg, Args... args)
{
    return arg * strides[D] + buffer_offset_impl<D + 1>(strides, args...);
}

} /* end namespace detail */

template <typename S, typename... Args>
size_t buffer_offset(S const & strides, Args... args)
{
    return detail::buffer_offset_impl<0>(strides, args...);
}

inline size_t buffer_offset(small_vector<size_t> const & stride, small_vector<size_t> const & idx)
{
    if (stride.size() != idx.size())
    {
        std::ostringstream ms;
        // clang-format off
        ms << "stride size " << stride.size() << " != " << "index size " << idx.size();
        // clang-format on
        throw std::out_of_range(ms.str());
    }
    size_t offset = 0;
    for (size_t it = 0; it < stride.size(); ++it)
    {
        offset += stride[it] * idx[it];
    }
    return offset;
}

/**
 * Simple array type for contiguous memory storage. Size does not change. The
 * copy semantics performs data copy. The move semantics invalidates the
 * existing memory buffer.
 */
template <typename T>
class SimpleArray
{

public:

    using value_type = T;
    using shape_type = small_vector<size_t>;
    using sshape_type = small_vector<ssize_t>;
    using buffer_type = ConcreteBuffer;

    static constexpr size_t ITEMSIZE = sizeof(value_type);

    static constexpr size_t itemsize() { return ITEMSIZE; }

    explicit SimpleArray(size_t length)
        : m_buffer(buffer_type::construct(length * ITEMSIZE))
        , m_shape{length}
        , m_stride{1}
        , m_body(m_buffer->data<T>())
    {
    }

    template <class InputIt>
    SimpleArray(InputIt first, InputIt last)
        : SimpleArray(last - first)
    {
        std::copy(first, last, data());
    }

    // NOLINTNEXTLINE(modernize-pass-by-value)
    explicit SimpleArray(small_vector<size_t> const & shape)
        : m_shape(shape)
        , m_stride(calc_stride(m_shape))
    {
        if (!m_shape.empty())
        {
            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE);
            m_body = m_buffer->data<T>();
        }
    }

    // NOLINTNEXTLINE(modernize-pass-by-value)
    explicit SimpleArray(small_vector<size_t> const & shape, value_type const & value)
        : SimpleArray(shape)
    {
        std::fill(begin(), end(), value);
    }

    explicit SimpleArray(std::vector<size_t> const & shape)
        : m_shape(shape)
        , m_stride(calc_stride(m_shape))
    {
        if (!m_shape.empty())
        {
            m_buffer = buffer_type::construct(m_shape[0] * m_stride[0] * ITEMSIZE);
            m_body = m_buffer->data<T>();
        }
    }

    explicit SimpleArray(std::vector<size_t> const & shape, value_type const & value)
        : SimpleArray(shape)
    {
        std::fill(begin(), end(), value);
    }

    explicit SimpleArray(std::shared_ptr<buffer_type> const & buffer)
    {
        if (buffer)
        {
            const size_t nitem = buffer->nbytes() / ITEMSIZE;
            if (buffer->nbytes() != nitem * ITEMSIZE)
            {
                throw std::runtime_error("SimpleArray: input buffer size must be divisible");
            }
            m_shape = shape_type{nitem};
            m_stride = shape_type{1};
            m_buffer = buffer;
            m_body = m_buffer->data<T>();
        }
        else
        {
            throw std::runtime_error("SimpleArray: buffer cannot be null");
        }
    }

    explicit SimpleArray(small_vector<size_t> const & shape, std::shared_ptr<buffer_type> const & buffer)
        : SimpleArray(buffer)
    {
        if (buffer)
        {
            m_shape = shape;
            m_stride = calc_stride(m_shape);
            const size_t nbytes = m_shape[0] * m_stride[0] * ITEMSIZE;
            if (nbytes != buffer->nbytes())
            {
                std::ostringstream ms;
                ms << "SimpleArray: shape byte count " << nbytes << " differs from buffer " << buffer->nbytes();
                throw std::runtime_error(ms.str());
            }
        }
    }

    SimpleArray(std::initializer_list<T> init)
        : SimpleArray(init.size())
    {
        std::copy_n(init.begin(), init.size(), data());
    }

    SimpleArray()
        : SimpleArray(0)
    {
    }

    SimpleArray(SimpleArray const & other)
        : m_buffer(other.m_buffer->clone())
        , m_shape(other.m_shape)
        , m_stride(other.m_stride)
        , m_nghost(other.m_nghost)
        , m_body(calc_body(m_buffer->data<T>(), m_stride, other.m_nghost))
    {
    }

    SimpleArray(SimpleArray && other) noexcept
        : m_buffer(std::move(other.m_buffer))
        , m_shape(std::move(other.m_shape))
        , m_stride(std::move(other.m_stride))
        , m_nghost(other.m_nghost)
        , m_body(other.m_body)
    {
    }

    SimpleArray & operator=(SimpleArray const & other)
    {
        if (this != &other)
        {
            *m_buffer = *(other.m_buffer); // Size is checked inside.
            m_shape = other.m_shape;
            m_stride = other.m_stride;
            m_nghost = other.m_nghost;
            m_body = calc_body(m_buffer->data<T>(), m_stride, other.m_nghost);
        }
        return *this;
    }

    SimpleArray & operator=(SimpleArray && other) noexcept
    {
        if (this != &other)
        {
            m_buffer = std::move(other.m_buffer);
            m_shape = std::move(other.m_shape);
            m_stride = std::move(other.m_stride);
            m_nghost = other.m_nghost;
            m_body = other.m_body;
        }
        return *this;
    }

    ~SimpleArray() = default;

    template <typename... Args>
    SimpleArray & remake(Args &&... args)
    {
        SimpleArray(args...).swap(*this);
        return *this;
    }

    static shape_type calc_stride(shape_type const & shape)
    {
        shape_type stride(shape.size());
        if (!shape.empty())
        {
            stride[shape.size() - 1] = 1;
            for (size_t it = shape.size() - 1; it > 0; --it)
            {
                stride[it - 1] = stride[it] * shape[it];
            }
        }
        return stride;
    }

    static T * calc_body(T * data, shape_type const & stride, size_t nghost)
    {
        if (nullptr == data || stride.empty() || 0 == nghost)
        {
            // Do nothing.
        }
        else
        {
            shape_type shape(stride.size(), 0);
            shape[0] = nghost;
            data += buffer_offset(stride, shape);
        }
        return data;
    }

    explicit operator bool() const noexcept { return bool(m_buffer) && bool(*m_buffer); }

    size_t nbytes() const noexcept { return m_buffer ? m_buffer->nbytes() : 0; }
    size_t size() const noexcept { return nbytes() / ITEMSIZE; }

    using iterator = T *;
    using const_iterator = T const *;

    iterator begin() noexcept { return data(); }
    iterator end() noexcept { return data() + size(); }
    const_iterator begin() const noexcept { return data(); }
    const_iterator end() const noexcept { return data() + size(); }
    const_iterator cbegin() const noexcept { return begin(); }
    const_iterator cend() const noexcept { return end(); }

    value_type const & operator[](size_t it) const noexcept { return data(it); }
    value_type & operator[](size_t it) noexcept { return data(it); }

    value_type const & at(size_t it) const
    {
        validate_range(it);
        return data(it);
    }
    value_type & at(size_t it)
    {
        validate_range(it);
        return data(it);
    }

    value_type const & at(ssize_t it) const
    {
        validate_range(it);
        it += m_nghost;
        return data(it);
    }
    value_type & at(ssize_t it)
    {
        validate_range(it);
        it += m_nghost;
        return data(it);
    }

    value_type const & at(std::vector<size_t> const & idx) const { return at(shape_type(idx)); }
    value_type & at(std::vector<size_t> const & idx) { return at(shape_type(idx)); }

    value_type const & at(shape_type const & idx) const
    {
        const size_t offset = buffer_offset(m_stride, idx);
        validate_range(offset);
        return data(offset);
    }
    value_type & at(shape_type const & idx)
    {
        const size_t offset = buffer_offset(m_stride, idx);
        validate_range(offset);
        return data(offset);
    }

    value_type const & at(std::vector<ssize_t> const & idx) const { return at(sshape_type(idx)); }
    value_type & at(std::vector<ssize_t> const & idx) { return at(sshape_type(idx)); }

    value_type const & at(sshape_type sidx) const
    {
        validate_shape(sidx);
        sidx[0] += m_nghost;
        shape_type const idx(sidx.begin(), sidx.end());
        const size_t offset = buffer_offset(m_stride, idx);
        return data(offset);
    }
    value_type & at(sshape_type sidx)
    {
        validate_shape(sidx);
        sidx[0] += m_nghost;
        shape_type const idx(sidx.begin(), sidx.end());
        const size_t offset = buffer_offset(m_stride, idx);
        return data(offset);
    }

    size_t ndim() const noexcept { return m_shape.size(); }
    shape_type const & shape() const { return m_shape; }
    size_t shape(size_t it) const noexcept { return m_shape[it]; }
    size_t & shape(size_t it) noexcept { return m_shape[it]; }
    shape_type const & stride() const { return m_stride; }
    size_t stride(size_t it) const noexcept { return m_stride[it]; }
    size_t & stride(size_t it) noexcept { return m_stride[it]; }

    size_t nghost() const { return m_nghost; }
    size_t nbody() const { return m_shape.empty() ? 0 : m_shape[0] - m_nghost; }
    bool has_ghost() const { return m_nghost != 0; }
    void set_nghost(size_t nghost)
    {
        if (0 != nghost)
        {
            if (0 == ndim())
            {
                std::ostringstream ms;
                ms << "SimpleArray: cannot set nghost " << nghost << " > 0 to an empty array";
                throw std::out_of_range(ms.str());
            }
            if (nghost > shape(0))
            {
                std::ostringstream ms;
                ms << "SimpleArray: cannot set nghost " << nghost << " > shape(0) " << shape(0);
                throw std::out_of_range(ms.str());
            }
        }
        m_nghost = nghost;
        if (bool(*this))
        {
            m_body = calc_body(m_buffer->data<T>(), m_stride, m_nghost);
        }
    }

    template <typename U>
    SimpleArray<U> reshape(shape_type const & shape) const
    {
        return SimpleArray<U>(shape, m_buffer);
    }

    SimpleArray reshape(shape_type const & shape) const
    {
        return SimpleArray(shape, m_buffer);
    }

    SimpleArray reshape() const
    {
        return SimpleArray(m_shape, m_buffer);
    }

    void swap(SimpleArray & other) noexcept
    {
        if (this != &other)
        {
            std::swap(m_buffer, other.m_buffer);
            std::swap(m_shape, other.m_shape);
            std::swap(m_stride, other.m_stride);
            std::swap(m_nghost, other.m_nghost);
            std::swap(m_body, other.m_body);
        }
    }

    template <typename... Args>
    value_type const & operator()(Args... args) const { return *vptr(args...); }
    template <typename... Args>
    value_type & operator()(Args... args) { return *vptr(args...); }

    template <typename... Args>
    value_type const * vptr(Args... args) const { return m_body + buffer_offset(m_stride, args...); }
    template <typename... Args>
    value_type * vptr(Args... args) { return m_body + buffer_offset(m_stride, args...); }

    /* Backdoor */
    value_type const & data(size_t it) const { return data()[it]; }
    value_type & data(size_t it) { return data()[it]; }
    value_type const * data() const { return buffer().template data<value_type>(); }
    value_type * data() { return buffer().template data<value_type>(); }

    buffer_type const & buffer() const { return *m_buffer; }
    buffer_type & buffer() { return *m_buffer; }

    value_type const * body() const { return m_body; }
    value_type * body() { return m_body; }

private:

    void validate_range(ssize_t it) const
    {
        if (m_nghost != 0 && ndim() != 1)
        {
            std::ostringstream ms;
            ms << "SimpleArray::validate_range(): cannot handle "
               << ndim() << "-dimensional (more than 1) array with non-zero nghost: " << m_nghost;
            throw std::out_of_range(ms.str());
        }
        if (it < -static_cast<ssize_t>(m_nghost))
        {
            std::ostringstream ms;
            ms << "SimpleArray: index " << it << " < -nghost: " << -static_cast<ssize_t>(m_nghost);
            throw std::out_of_range(ms.str());
        }
        if (it >= static_cast<ssize_t>(size() - m_nghost))
        {
            std::ostringstream ms;
            ms << "SimpleArray: index " << it << " >= " << size() - m_nghost
               << " (size: " << size() << " - nghost: " << m_nghost << ")";
            throw std::out_of_range(ms.str());
        }
    }

    void validate_shape(small_vector<ssize_t> const & idx) const
    {
        auto index2string = [&idx]()
        {
            std::ostringstream ms;
            ms << "[";
            for (size_t it = 0; it < idx.size(); ++it)
            {
                ms << idx[it];
                if (it != idx.size() - 1)
                {
                    ms << ", ";
                }
            }
            ms << "]";
            return ms.str();
        };

        // Test for the "index shape".
        if (idx.empty())
        {
            throw std::out_of_range("SimpleArray::validate_shape(): empty index");
        }
        if (idx.size() != m_shape.size())
        {
            std::ostringstream ms;
            ms << "SimpleArray: dimension of input indices " << index2string() << " != array dimension " << m_shape.size();
            throw std::out_of_range(ms.str());
        }

        // Test the first dimension.
        if (idx[0] < -static_cast<ssize_t>(m_nghost))
        {
            std::ostringstream ms;
            ms << "SimpleArray: dim 0 in " << index2string() << " < -nghost: " << -static_cast<ssize_t>(m_nghost);
            throw std::out_of_range(ms.str());
        }
        if (idx[0] >= static_cast<ssize_t>(nbody()))
        {
            std::ostringstream ms;
            // clang-format off
            ms << "SimpleArray: dim 0 in " << index2string() << " >= nbody: " << nbody()
                << " (shape[0]: " << m_shape[0] << " - nghost: " << nghost() << ")";
            // clang-format on
            throw std::out_of_range(ms.str());
        }

        // Test the rest of the dimensions.
        for (size_t it = 1; it < m_shape.size(); ++it)
        {
            if (idx[it] < 0)
            {
                std::ostringstream ms;
                ms << "SimpleArray: dim " << it << " in " << index2string() << " < 0";
                throw std::out_of_range(ms.str());
            }
            if (idx[it] >= static_cast<ssize_t>(m_shape[it]))
            {
                std::ostringstream ms;
                ms << "SimpleArray: dim " << it << " in " << index2string()
                   << " >= shape[" << it << "]: " << m_shape[it];
                throw std::out_of_range(ms.str());
            }
        }
    }

    /// Contiguous data buffer for the array.
    std::shared_ptr<buffer_type> m_buffer;
    /// Each element in this vector is the number of element in the
    /// corresponding dimension.
    shape_type m_shape;
    /// Each element in this vector is the number of elements (not number of
    /// bytes) to skip for advancing an index in the corresponding dimension.
    shape_type m_stride;

    size_t m_nghost = 0;
    value_type * m_body = nullptr;

}; /* end class SimpleArray */

template <typename S>
using is_simple_array = std::is_same<
    std::remove_reference_t<S>,
    SimpleArray<typename std::remove_reference_t<S>::value_type>>;

template <typename S>
inline constexpr bool is_simple_array_v = is_simple_array<S>::value;

} /* end namespace modmesh */

/* vim: set et ts=4 sw=4: */

Small vector optimization (small_vector.hpp). See https://github.com/solvcon/modmesh for how it works in an application.#

#pragma once

/*
 * Copyright (c) 2020, Yung-Yu Chen <yyc@solvcon.net>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * - Neither the name of the copyright holder nor the names of its contributors
 *   may be used to endorse or promote products derived from this software
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdexcept>
#include <array>
#include <vector>
#include <algorithm>

namespace modmesh
{

template <typename T, size_t N = 3>
class small_vector
{

public:

    using value_type = T;
    using iterator = T *;
    using const_iterator = T const *;

    explicit small_vector(size_t size)
        : m_size(static_cast<unsigned int>(size))
    {
        if (m_size > N)
        {
            m_capacity = m_size;
            // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
            m_head = new T[m_capacity];
        }
        else
        {
            m_capacity = N;
            m_head = m_data.data();
        }
    }

    explicit small_vector(size_t size, T const & v)
        : small_vector(size)
    {
        std::fill(begin(), end(), v);
    }

    explicit small_vector(std::vector<T> const & vector)
        : small_vector(vector.size())
    {
        std::copy_n(vector.begin(), m_size, begin());
    }

    template <class InputIt>
    small_vector(InputIt first, InputIt last)
        : small_vector(last - first)
    {
        std::copy(first, last, begin());
    }

    small_vector(std::initializer_list<T> init)
        : small_vector(init.size())
    {
        std::copy_n(init.begin(), m_size, begin());
    }

    small_vector() { m_head = m_data.data(); }

    small_vector(small_vector const & other)
        : m_size(other.m_size)
    {
        if (other.m_head == other.m_data.data())
        {
            m_capacity = N;
            m_head = m_data.data();
        }
        else
        {
            m_capacity = m_size;
            // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
            m_head = new T[m_capacity];
        }
        std::copy_n(other.m_head, m_size, m_head);
    }

    small_vector(small_vector && other) noexcept
        : m_size(other.m_size)
    {
        if (other.m_head == other.m_data.data())
        {
            m_capacity = N;
            std::copy_n(other.m_data.begin(), m_size, m_data.begin());
            m_head = m_data.data();
        }
        else
        {
            m_capacity = m_size;
            m_head = other.m_head;
            other.m_size = 0;
            other.m_capacity = N;
            other.m_head = other.m_data.data();
        }
    }

    small_vector & operator=(small_vector const & other)
    {
        if (this != &other)
        {
            if (other.m_head == other.m_data.data())
            {
                if (m_head != m_data.data())
                {
                    delete[] m_head;
                    m_head = m_data.data();
                }
                m_size = other.m_size;
                m_capacity = N;
            }
            else
            {
                if (m_capacity < other.m_size)
                {
                    delete[] m_head;
                    m_head = nullptr;
                }
                if (m_head == m_data.data() || m_head == nullptr)
                {
                    m_capacity = other.m_size;
                    // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
                    m_head = new T[m_capacity];
                }
                m_size = other.m_size;
            }
            std::copy_n(other.m_head, m_size, m_head);
        }
        return *this;
    }

    small_vector & operator=(small_vector && other) noexcept
    {
        if (this != &other)
        {
            if (other.m_head == other.m_data.data())
            {
                if (m_head != m_data.data())
                {
                    delete[] m_head;
                    m_head = m_data.data();
                }
                m_size = other.m_size;
                m_capacity = N;
                std::copy_n(other.m_data.begin(), m_size, m_data.begin());
            }
            else
            {
                m_size = other.m_size;
                m_capacity = other.m_capacity;
                m_head = other.m_head;
                other.m_size = 0;
                other.m_capacity = N;
                other.m_head = other.m_data.data();
            }
        }
        return *this;
    }

    small_vector & operator=(std::vector<T> const & other)
    {
        if (size() < other.size())
        {
            std::copy(other.begin(), other.begin() + size(), begin());
            for (size_t it = size(); it < other.size(); ++it)
            {
                push_back(other[it]);
            }
        }
        else
        {
            std::copy(other.begin(), other.end(), begin());
            m_size = static_cast<unsigned int>(other.size());
        }
        return *this;
    }

    ~small_vector()
    {
        if (m_head != m_data.data() && m_head != nullptr)
        {
            delete[] m_head;
            m_head = nullptr;
        }
    }

    size_t size() const noexcept { return m_size; }
    size_t capacity() const noexcept { return m_capacity; }
    bool empty() const noexcept { return 0 == m_size; }

    iterator begin() noexcept { return m_head; }
    iterator end() noexcept { return m_head + m_size; }
    const_iterator begin() const noexcept { return m_head; }
    const_iterator end() const noexcept { return m_head + m_size; }
    const_iterator cbegin() const noexcept { return begin(); }
    const_iterator cend() const noexcept { return end(); }

    T const & operator[](size_t it) const { return m_head[it]; }
    T & operator[](size_t it) { return m_head[it]; }

    T const & at(size_t it) const
    {
        validate_range(it);
        return (*this)[it];
    }
    T & at(size_t it)
    {
        validate_range(it);
        return (*this)[it];
    }

    T const * data() const { return m_head; }
    T * data() { return m_head; }

    void clear() noexcept
    {
        if (m_head != m_data.data())
        {
            delete[] m_head;
            m_head = m_data.data();
        }
        m_size = 0;
        m_capacity = N;
    }

    void push_back(T const & value)
    {
        if (m_size == m_capacity)
        {
            m_capacity *= 2;
            // NOLINTNEXTLINE(cppcoreguidelines-owning-memory)
            T * storage = new T[m_capacity];
            std::copy_n(m_head, m_size, storage);
            if (m_head != m_data.data())
            {
                delete[] m_head;
            }
            m_head = storage;
        }
        m_head[m_size++] = value;
    }

private:

    void validate_range(size_t it) const
    {
        if (it >= size())
        {
            throw std::out_of_range("small_vector: index out of range");
        }
    }

    T * m_head = nullptr;
    unsigned int m_size = 0;
    unsigned int m_capacity = N;
    std::array<T, N> m_data;

}; /* end class small_vector */

template <typename T>
bool operator==(small_vector<T> const & lhs, small_vector<T> const & rhs)
{
    return std::equal(lhs.begin(), lhs.end(), rhs.begin());
}

static_assert(sizeof(small_vector<size_t>) == 40, "small_vector<size_t> should use 40 bytes");

} /* end namespace modmesh */

/* vim: set et ts=4 sw=4: */