Example Code: C++ and C for Python#

Generate a Cartesian grid of $51times51$ (01_grid.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3

from matplotlib import pyplot as plt

# [begin example]
import numpy as np

def make_grid():
    # Number of the grid points.
    nx = 51
    # Produce the x coordinates.
    x = np.linspace(0, 1, nx)
    # Create the two arrays for x and y coordinates of the grid.
    gx, gy = np.meshgrid(x, x)
    # Create the field and zero-initialize it.
    u = np.zeros_like(gx)
    # Set the boundary condition.
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    # Return all values.
    return nx, x, u

nx, x, uoriginal = make_grid()
# [end example]

def show_grid(size):
    fig, ax = plt.subplots(figsize=(size,size))
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_xticks(x, minor=True)
    ax.set_yticks(x, minor=True)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.grid(True, which='major', linewidth=2)
    ax.grid(True, which='minor', linewidth=1)

show_grid(10)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:
Solve the Laplace equation using Python nested loops (01_solve_python_loop.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3

import numpy as np
from matplotlib import pyplot as plt

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

# [begin example]
import time
import numpy as np

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

def solve_python_loop():
    u = uoriginal.copy()
    un = u.copy()
    converged = False
    step = 0
    # Outer loop.
    while not converged:
        step += 1
        # Inner loops. One for x and the other for y.
        for it in range(1, nx-1):
            for jt in range(1, nx-1):
                un[it,jt] = (u[it+1,jt] + u[it-1,jt] + u[it,jt+1] + u[it,jt-1]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm

with Timer():
    u, step, norm = solve_python_loop()

print(step)
# [end example]

# Run the Python solver
def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:
The analytical solution of the Laplace equation (01_solve_analytical.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

# [begin example]
import numpy as np

def solve_analytical():
    u = np.empty((len(x), len(x)), dtype='float64')
    for ix, cx in enumerate(x):
        u[ix, :] = np.sinh(np.pi*cx) / np.sinh(np.pi) * np.sin(np.pi*x)
    return u
# [end example]

def solve_python_loop():
    u = uoriginal.copy()
    un = u.copy()
    converged = False
    step = 0
    # Outer loop.
    while not converged:
        step += 1
        # Inner loops. One for x and the other for y.
        for it in range(1, nx-1):
            for jt in range(1, nx-1):
                un[it,jt] = (u[it+1,jt] + u[it-1,jt] + u[it,jt+1] + u[it,jt-1]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm

u, step, norm = solve_python_loop()

# [begin pycon]
uanalytical = solve_analytical()
# Calculate the L inf norm.
print("Linf of difference is %f" % np.abs(u - uanalytical).max())
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(uanalytical, 0, 0)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:
Solve the Laplace equation using numpy (02_solve_array.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

# [begin example]
import numpy as np

def solve_array():
    u = uoriginal.copy()
    un = u.copy()
    converged = False
    step = 0
    while not converged:
        step += 1
        un[1:nx-1,1:nx-1] = (u[2:nx,1:nx-1] + u[0:nx-2,1:nx-1] +
                             u[1:nx-1,2:nx] + u[1:nx-1,0:nx-2]) / 4
        norm = np.abs(un-u).max()
        u[...] = un[...]
        converged = True if norm < 1.e-5 else False
    return u, step, norm
# [end example]

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin pycon]
# Run the Python solver
with Timer():
    u, step, norm = solve_array()
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:
Solve the Laplace equation using the C++ code (solve_cpp.cpp).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include <pybind11/pybind11.h>
#define FORCE_IMPORT_ARRAY
#include <xtensor-python/pyarray.hpp>

#include <vector>
#include <algorithm>
#include <tuple>
#include <iostream>

#include <xtensor/xarray.hpp>
#include <xtensor/xadapt.hpp>
#include <xtensor/xview.hpp>

// [begin example]
std::tuple<xt::xarray<double>, size_t, double>
solve1(xt::xarray<double> u)
{
    const size_t nx = u.shape(0);
    xt::xarray<double> un = u;
    bool converged = false;
    size_t step = 0;
    double norm;
    while (!converged)
    {
        ++step;
        for (size_t it=1; it<nx-1; ++it)
        {
            for (size_t jt=1; jt<nx-1; ++jt)
            {
                un(it,jt) = (u(it+1,jt) + u(it-1,jt) + u(it,jt+1) + u(it,jt-1)) / 4;
            }
        }
        norm = xt::amax(xt::abs(un-u))();
        if (norm < 1.e-5) { converged = true; }
        u = un;
    }
    return std::make_tuple(u, step, norm);
}

PYBIND11_MODULE(solve_cpp, m)
{
    xt::import_numpy();
    m.def
    (
        "solve_cpp"
      , [](xt::pyarray<double> & uin) { return solve1(xt::xarray<double>(uin)); }
    );
}
// [end example]

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
The Python driver to solve the Laplace equation using the C++ code (03_solve_cpp.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3

import numpy as np

def make_grid():
    nx = 51
    x = np.linspace(0, 1, nx)
    gx, gy = np.meshgrid(x, x)
    u = np.zeros_like(gx)
    u[0,:] = 0
    u[-1,:] = 1 * np.sin(np.linspace(0,np.pi,nx))
    u[:,0] = 0
    u[:,-1] = 0
    return nx, x, u

nx, x, uoriginal = make_grid()

# [begin example]
import solve_cpp
# [end example]

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin pycon]
with Timer():
    u, step, norm = solve_cpp.solve_cpp(uoriginal)
# [end pycon]

from matplotlib import pyplot as plt

def show_result(u, step, norm, size=7):
    print("step", step, "norm", norm)
    fig, ax = plt.subplots(figsize=(size,size))
    cs = ax.contour(x, x, u.T)
    ax.clabel(cs, inline=1, fontsize=10)

    ax.set_xticks(np.linspace(0,1,6))
    ax.set_yticks(np.linspace(0,1,6))
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')

    ax.grid(True, which='minor')

show_result(u, step, norm)

plt.rc('figure', figsize=(8, 8))

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79:
C++ code for least-square regression to polynomial functions (data_prep.cpp).#
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#include <pybind11/pybind11.h>
#define FORCE_IMPORT_ARRAY
#include <xtensor-python/pyarray.hpp>

#include <vector>
#include <algorithm>

#include <xtensor/xarray.hpp>
#include <xtensor/xadapt.hpp>
#include <xtensor/xview.hpp>
#include <xtensor-blas/xlinalg.hpp>

// [begin example: single fit]
/**
 * This function calculates the least-square regression of a point cloud to a
 * polynomial function of a given order.
 */
template <class AT>
xt::xarray<double> fit_poly(AT & xarr, AT & yarr, size_t order)
{
    if (xarr.size() != yarr.size())
    {
        throw std::runtime_error("xarr and yarr size mismatch");
    }

    // The rank of the linear map is (order+1).
    xt::xarray<double> matrix(std::vector<size_t>{order+1, order+1});

    // Use the x coordinates to build the linear map for least-square
    // regression.
    for (size_t it=0; it<order+1; ++it)
    {
        for (size_t jt=0; jt<order+1; ++jt)
        {
            double & val = matrix(it, jt);
            val = 0;
            for (size_t kt=0; kt<xarr.size(); ++kt)
            {
                val += pow(xarr[kt], it+jt);
            }
        }
    }

    // Use the x and y coordinates to build the vector in the right-hand side
    // of the linear system.
    xt::xarray<double> rhs(std::vector<size_t>{order+1});
    for (size_t jt=0; jt<order+1; ++jt)
    {
        rhs[jt] = 0;
        for (size_t kt=0; kt<yarr.size(); ++kt)
        {
            rhs[jt] += pow(xarr[kt], jt) * yarr[kt];
        }
    }

    // Solve the linear system for the least-square minimization.
    xt::xarray<double> lhs = xt::linalg::solve(matrix, rhs);
    std::reverse(lhs.begin(), lhs.end()); // to make numpy.poly1d happy.

    return lhs;
}
// [end example: single fit]

// [begin example: multiple fit]
/**
 * This function calculates the least-square regression of multiple sets of
 * point clouds to the corresponding polynomial functions of a given order.
 */
template <class AT>
xt::xarray<double> fit_polys
(
    xt::xarray<double> & xarr, xt::xarray<double> & yarr, size_t order
)
{
    size_t xmin = std::floor(*std::min_element(xarr.begin(), xarr.end()));
    size_t xmax = std::ceil(*std::max_element(xarr.begin(), xarr.end()));
    size_t ninterval = xmax - xmin;

    xt::xarray<double> lhs(std::vector<size_t>{ninterval, order+1});
    lhs.fill(0); // sentinel.
    size_t start=0;
    for (size_t it=0; it<xmax; ++it)
    {
        // NOTE: We take advantage of the intrinsic features of the input data
        // to determine the grouping.  This is ad hoc and hard to maintain.  We
        // play this trick to demonstrate a hackish way of performing numerical
        // calculation.
        size_t stop;
        for (stop=start; stop<xarr.size(); ++stop)
        {
            if (xarr[stop]>=it+1) { break; }
        }

        // Obtain the slice of the input that is to be processed in this
        // iteration.
        AT const sub_x = xt::view(xarr, xt::range(start, stop));
        AT const sub_y = xt::view(yarr, xt::range(start, stop));

        // Use the single polynomial helper function.
        xt::xarray<double> sub_lhs = fit_poly(sub_x, sub_y, order);
        xt::view(lhs, it, xt::all()) = sub_lhs;

        start = stop;
    }

    return lhs;
}
// [end example: multiple fit]

// [begin example: wrapping]
/**
 * The pybind11 wrapper for the helper functions for polynomial fitting.
 */
PYBIND11_MODULE(data_prep, m)
{
    using array_type = xt::xarray<double>;

    xt::import_numpy();
    m.def
    (
        "fit_poly"
      , []
        (
            xt::pyarray<double> & xarr_in
          , xt::pyarray<double> & yarr_in
          , size_t order
        )
        {
            std::vector<size_t> xarr_shape(xarr_in.shape().begin()
                                         , xarr_in.shape().end());
            xt::xarray<double> xarr = xt::adapt(xarr_in.data(), xarr_shape);

            std::vector<size_t> yarr_shape(yarr_in.shape().begin()
                                         , yarr_in.shape().end());
            xt::xarray<double> yarr = xt::adapt(yarr_in.data(), yarr_shape);

            return fit_poly(xarr, yarr, order);
        }
    );
    m.def
    (
        "fit_polys"
      , []
        (
            xt::pyarray<double> & xarr_in
          , xt::pyarray<double> & yarr_in
          , size_t order
        )
        {
            std::vector<size_t> xarr_shape(xarr_in.shape().begin()
                                         , xarr_in.shape().end());
            xt::xarray<double> xarr = xt::adapt(xarr_in.data(), xarr_shape);
            std::vector<size_t> yarr_shape(yarr_in.shape().begin()
                                         , yarr_in.shape().end());
            xt::xarray<double> yarr = xt::adapt(yarr_in.data(), yarr_shape);
            return fit_polys<array_type>(xarr, yarr, order);
        }
    );
}
// [end example: wrapping]

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
The Python driver to the least-square regression using the C++ code (04_fit_poly.py).#
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3

import numpy as np
from matplotlib import pyplot as plt

import time

class Timer:
    def __enter__(self):
        self._t0 = time.time()
    def __exit__(self, *args, **kw):
        self._t1 = time.time()
        print("Wall time: {:g} s".format(self._t1 - self._t0))

# [begin prep pycon]
with Timer():
    # np.unique returns a sorted array.
    xdata = np.unique(np.random.sample(1000000) * 1000)
    ydata = np.random.sample(len(xdata)) * 1000
# [end prep pycon]

# [begin fit_poly pycon]
import data_prep

plt.rc('figure', figsize=(12, 8))

def plot_poly_fitted(i):
    slct = (xdata>=i)&(xdata<(i+1))
    sub_x = xdata[slct]
    sub_y = ydata[slct]
    poly = data_prep.fit_poly(sub_x, sub_y, 3)
    print(poly)
    poly = np.poly1d(poly)
    xp = np.linspace(sub_x.min(), sub_x.max(), 100)
    plt.plot(sub_x, sub_y, '.', xp, poly(xp), '-')

plot_poly_fitted(10)
# [end fit_poly pycon]

import os
imagedir = os.path.join(os.path.dirname(__file__), '..', 'image')
imagebase = os.path.splitext(os.path.basename(__file__))[0] + '.png'
imagepath = os.path.join(imagedir, imagebase)
print('write to {}'.format(imagepath))
plt.savefig(imagepath, dpi=150)

# [begin lumped pycon]
with Timer():
    # Do the calculation for the 1000 groups of points.
    polygroup = np.empty((1000, 3), dtype='float64')
    for i in range(1000):
        # Use numpy to build the point group.
        slct = (xdata>=i)&(xdata<(i+1))
        sub_x = xdata[slct]
        sub_y = ydata[slct]
        polygroup[i,:] = data_prep.fit_poly(sub_x, sub_y, 2)
# [end lumped pycon]

# [begin point build pycon]
with Timer():
    # Using numpy to build the point groups takes a lot of time.
    data_groups = []
    for i in range(1000):
        slct = (xdata>=i)&(xdata<(i+1))
        data_groups.append((xdata[slct], ydata[slct]))
# [end point build pycon]

# [begin fitting pycon]
with Timer():
    # Fitting helper runtime is much less than building the point groups.
    polygroup = np.empty((1000, 3), dtype='float64')
    for it, (sub_x, sub_y) in enumerate(data_groups):
        polygroup[it,:] = data_prep.fit_poly(sub_x, sub_y, 2)
# [end fitting pycon]

# [begin batch pycon]
with Timer():
    rbatch = data_prep.fit_polys(xdata, ydata, 2)
# [end batch pycon]

print(rbatch.shape)
# Verify batch.
assert (rbatch[i] == polygroup[i]).all()

# vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4 tw=79: