diff options
Diffstat (limited to 'libprakipp')
-rw-r--r-- | libprakipp/compile_flags.txt | 2 | ||||
-rw-r--r-- | libprakipp/include/prakcommon.hpp | 94 | ||||
-rw-r--r-- | libprakipp/include/prakmath.hpp | 289 | ||||
-rw-r--r-- | libprakipp/include/prakmatrix.hpp | 185 | ||||
-rw-r--r-- | libprakipp/include/praktable.hpp | 345 | ||||
-rw-r--r-- | libprakipp/include/tests.cpp | 31 | ||||
-rw-r--r-- | libprakipp/src/prakmath.cpp | 0 | ||||
-rw-r--r-- | libprakipp/test_data | 21 | ||||
-rw-r--r-- | libprakipp/tests.cpp | 66 |
9 files changed, 1033 insertions, 0 deletions
diff --git a/libprakipp/compile_flags.txt b/libprakipp/compile_flags.txt new file mode 100644 index 0000000..909d9cc --- /dev/null +++ b/libprakipp/compile_flags.txt @@ -0,0 +1,2 @@ +-std=c++2c +-Iinclude diff --git a/libprakipp/include/prakcommon.hpp b/libprakipp/include/prakcommon.hpp new file mode 100644 index 0000000..5fee864 --- /dev/null +++ b/libprakipp/include/prakcommon.hpp @@ -0,0 +1,94 @@ +#pragma once + +#include <cmath> +#include <vector> +#include <iostream> + +#if defined(_MSC_VER) || !defined(__cpp_multidimensional_subscript) || __cplusplus < 202110L +#warning "can not use multidimentional subscript operator: falling back to `operator()`" +#undef MDSUBSCRIPT +#else +#define MDSUBSCRIPT +#endif + +#ifdef NO_MDSUBSCRIPT +#undef MDSUBSCRIPT +#endif + +#ifdef MDSUBSCRIPT + #define SUBSCR_OPN [ + #define SUBSCR_CLS ] + #define SUBSCR_OPRTR operator[] +#else + #define SUBSCR_OPN ( + #define SUBSCR_CLS ) + #define SUBSCR_OPRTR operator() +#endif + +namespace prak { + +/// stolen from [cppreference.com](https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon) +/// Compares 2 floating-point values up to `ulps` ULPS (units in the last place) +template <class T> +std::enable_if_t<not std::numeric_limits<T>::is_integer, bool> +fequal(T x, T y, std::size_t ulps = 1) +{ + // Since `epsilon()` is the gap size (ULP, unit in the last place) + // of floating-point numbers in interval [1, 2), we can scale it to + // the gap size in interval [2^e, 2^{e+1}), where `e` is the exponent + // of `x` and `y`. + + // If `x` and `y` have different gap sizes (which means they have + // different exponents), we take the smaller one. Taking the bigger + // one is also reasonable, I guess. + const T m = std::min(std::fabs(x), std::fabs(y)); + + // Subnormal numbers have fixed exponent, which is `min_exponent - 1`. + const int exp = m < std::numeric_limits<T>::min() + ? std::numeric_limits<T>::min_exponent - 1 + : std::ilogb(m); + + // We consider `x` and `y` equal if the difference between them is + // within `n` ULPs. + return std::fabs(x - y) <= ulps * std::ldexp(std::numeric_limits<T>::epsilon(), exp); +} + + +/// prints a vector +template <typename T> +void printv(const T &vec) { + std::cout << "std::vector { "; + for (const auto &x : vec) { + std::cout << x << ' '; + } + std::cout << '}' << std::endl; +} + +/// An allocator that aligns memory to 64 bytes. Needed for AVX instructions. +/// C++17 required for std::align_val_t +template <class T> +struct align_alloc : public std::allocator<T> { + constexpr T* allocate( std::size_t n ) { + return static_cast<T*>(::operator new(sizeof (T) * n, std::align_val_t{64})); + } + + constexpr void deallocate( T* p, std::size_t n ) { + ::operator delete(p, std::align_val_t{64}); + } +};; + +/// alias prak::vector that is the same as std::vector, but uses aligned allocator +template <typename T> +using vector = std::vector<T, align_alloc<T>>; + +/// prak value / pair value: a value with an error +template <typename T> +struct pvalue { T val, err; }; + +template <typename T> +std::ostream &operator<<(std::ostream &os, const struct pvalue<T> &p) { + /* return os << "value {" << p.val << "±" << p.err << "}"; */ + return os << p.val << "±" << p.err; +} + +} // namespace prak diff --git a/libprakipp/include/prakmath.hpp b/libprakipp/include/prakmath.hpp new file mode 100644 index 0000000..342c0cd --- /dev/null +++ b/libprakipp/include/prakmath.hpp @@ -0,0 +1,289 @@ +#pragma once + +#include <cmath> +#include <concepts> + +#ifdef __x86_64__ +#include <immintrin.h> +#endif + +#include "prakcommon.hpp" +#include "prakmatrix.hpp" + +namespace prak { + +// :) +constexpr const double PI = 3.141592657; +constexpr const float fPI = 3.141592657f; + +/// defines a type that supports arithmetic operation +template <typename T> +concept arithmetic = requires(T a) { + a + a; + a * a; + a - a; + a / a; +}; + +/// TODO: remove +enum struct operation { mul, div, add, sub }; + +/// vector multiply: fallback for non-floating-point types +template <arithmetic T> +void vmul(const T *op1, const T *op2, T *dest, size_t s) { + for (size_t i = 0; i < s; ++i) { + dest[i] = op1[i] * op2[i]; break; + } +} + +/// vector multiply: float implementation +template <> +inline void vmul(const float *op1, const float *op2, float *dest, size_t s) { + if (s < 8) goto scalar; + __m256 b1; + __m256 b2; + + for (size_t i = 0; i < s / 8; ++i) { + b1 = _mm256_load_ps(op1 + 8*i); + b2 = _mm256_load_ps(op2 + 8*i); + b1 = _mm256_mul_ps(b1, b2); + _mm256_store_ps(dest + 8*i, b1); + } +scalar: + for (size_t i = s - s % 8; i < s; ++i) { + dest[i] = op1[i] * op2[i]; + } +} + +/// vector multiply: double implementation +template <> +inline void vmul(const double *op1, const double *op2, double *dest, size_t s) { + if (s < 4) goto scalar; + __m256d b1; + __m256d b2; + + for (size_t i = 0; i < s / 4; ++i) { + b1 = _mm256_load_pd(op1 + 4*i); + b2 = _mm256_load_pd(op2 + 4*i); + b1 = _mm256_mul_pd(b1, b2); + _mm256_store_pd(dest + 4*i, b1); + } +scalar: + for (size_t i = s - s % 4; i < s; ++i) { + dest[i] = op1[i] * op2[i]; + } +} + +/// vector division: non-floating-point types fallback +template <arithmetic T> +void vdiv(const T *op1, const T *op2, T *dest, size_t s) { + for (size_t i = 0; i < s; ++i) { + dest[i] = op1[i] / op2[i]; break; + } +} + +/// vector division: floating point: single precision +template <> +inline void vdiv(const float *op1, const float *op2, float *dest, size_t s) { + if (s < 8) goto scalar; + __m256 b1; + __m256 b2; + + for (size_t i = 0; i < s / 8; ++i) { + b1 = _mm256_load_ps(op1 + 8*i); + b2 = _mm256_load_ps(op2 + 8*i); + b1 = _mm256_div_ps(b1, b2); + _mm256_store_ps(dest + 8*i, b1); + } +scalar: + for (size_t i = s - s % 8; i < s; ++i) { + dest[i] = op1[i] / op2[i]; + } +} + +/// vector division: floating point: double precision +template <> +inline void vdiv(const double *op1, const double *op2, double *dest, size_t s) { + if (s < 4) goto scalar; + __m256d b1; + __m256d b2; + + for (size_t i = 0; i < s / 4; ++i) { + b1 = _mm256_load_pd(op1 + 4*i); + b2 = _mm256_load_pd(op2 + 4*i); + b1 = _mm256_div_pd(b1, b2); + _mm256_store_pd(dest + 4*i, b1); + } +scalar: + for (size_t i = s - s % 4; i < s; ++i) { + dest[i] = op1[i] / op2[i]; + } +} + +inline float finalize(__m256 reg) { + // reg: x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 + __m128 extr = _mm256_extractf128_ps(reg, 1); + // extr: x0 | x1 | x2 | x3 + // __mm256_castps256_ps128: x4 | x5 | x6 | x7 + extr = _mm_add_ps(extr, _mm256_castps256_ps128(reg)); + // extr: x0+x4 | x1 + x5 | x2 + x6 | x3 + x7 + extr = _mm_hadd_ps(extr, extr); + // extr: x0 + x1 + x2 + x5 | x2 + x3 + x6 x7 | ... | ... + return extr[0] + extr[1]; +} + +inline double finalize(__m256d reg) { + // reg: x0 | x1 | x2 | x3 + reg = _mm256_hadd_pd(reg, reg); + // reg: x0 + x1 | x2 + x3 | ... | ... + return reg[0] + reg[1]; +} + +/// Fallback generic method to sum a vector +template <arithmetic T> +T vsum(const T *op, size_t size) { + T res; + for (size_t i = 0; i < size; ++i) + res += op[i]; + return res; +} + +/// Sum a vector, float implementation +template <> +inline float vsum(const float *op, size_t size) { + __m256 buf; + buf = _mm256_setzero_ps(); + if (size < 8) goto scalar; + for (size_t i = 0; i < size / 8; ++i) { + __m256 next = _mm256_load_ps(op + 8*i); + buf = _mm256_add_ps(buf, next); + } +scalar: + float linear = 0; + for (size_t i = size - size%8; i < size; ++i) { + linear += op[i]; + } + return linear + finalize(buf); +} + +/// Sum a vector, double implementation +template <> +inline double vsum(const double *op, size_t size) { + __m256d buf; + buf = _mm256_setzero_pd(); + if (size < 4) goto scalar; + for (size_t i = 0; i < size / 4; ++i) { + __m256d next = _mm256_load_pd(op + 4*i); + buf = _mm256_add_pd(buf, next); + } +scalar: + double linear = 0; + for (size_t i = size - size%8; i < size; ++i) { + linear += op[i]; + } + return linear + finalize(buf); +} + +/// calculate least-squares linear approximation to fit data +/// ax+b = y (ss is an error of Y value) +template <std::floating_point T> +void least_squares_linear( + const prak::vector<T> &xs, + const prak::vector<T> &ys, + const prak::vector<T> &ss, + struct pvalue<T> &a, + struct pvalue<T> &b) +{ + if (xs.size() != ys.size() || xs.size() != ss.size()) { + std::cout << "x.size() = " << xs.size() + << ", y.size() = " << ys.size() + << ", s.size() = " << ss.size() << std::endl; + return; + } + [[assume(xs.size() == ys.size() && ys.size() == ss.size())]]; + size_t sz = xs.size(); + + prak::vector<T> ones(sz); + prak::vector<T> ssq(sz); + prak::vector<T> ssq_m1(sz); + prak::vector<T> buf(sz); + prak::vector<T> buf_xs_ssq_m1(sz); + std::fill(ones.begin(), ones.end(), (T)1); // ones: [1] + vmul(ss.data(), ss.data(), ssq.data(), sz); // ssq: [ss*ss] + vdiv(ones.data(), ssq.data(), ssq_m1.data(), sz); // ssq_m1: [1/ss^2] + vmul(xs.data(), ssq_m1.data(), buf_xs_ssq_m1.data(), sz); // [xs / ss^2] + + const T *ysd = ys.data(), + *xsd = xs.data(); + T *ssqd = ssq.data(), + *ssq_m1d = ssq_m1.data(), + *onesd = ones.data(), + *bufd = buf.data(), + *buf_xs_ssq_m1d = buf_xs_ssq_m1.data(); + + vmul(buf_xs_ssq_m1d, xsd, bufd, sz); // buf: [xs^2 / ss^2] + T d1 = vsum(bufd, sz); // sum([xs^2 / ss^2]) + T ssq_m1sum = vsum(ssq_m1d, sz); // sum(1 / ss^2) + vmul(xsd, ssq_m1d, bufd, sz); // buf: [xs / ss^2] + T d2 = vsum(buf_xs_ssq_m1d, sz); // sum([xs / ss^2]) + T D = d1 * ssq_m1sum - d2*d2; // sum((xs/ss)^2) * sum(1/ss^2) - sum(xs/ss^2)^2 + + vmul(ysd, buf_xs_ssq_m1d, bufd, sz); // buf: [ys*xs/ss^2] + T da1 = vsum(bufd, sz); // sum([ys*xs/ss^2]) + vmul(ysd, ssq_m1d, bufd, sz); // buf: [ys/ss^2] + T da2 = vsum(bufd, sz); // sum([ys/ss^2]) + T DA = da1 * ssq_m1sum - da2 * d2; // sum([ys*xs/ss^2]) * sum([1/ss^2]) - sum([ys/ss^2]) * sum(xs/ss^2) + + T DB = d1 * da2 - d2 * da1; // sum([xs^2/ss^2]) * sum([ys/ss^2]) - sum([xs/ss^2]) * sum(ys*xs/ss^2) + + a.val = DA/D; + a.err = sqrt(ssq_m1sum / D); + + b.val = DB/D; + b.err = sqrt(d1 / D); +} + + +/// May throw std::bad_optional_access +template <typename T> +std::enable_if<std::is_arithmetic_v<T>, std::vector<pvalue<T>>> +polynomial_regression( + size_t degree, + std::vector<T> data_x, + std::vector<T> data_y, + std::optional<std::vector<T>> data_errors = std::nullopt) +{ + ++degree; // hack) + size_t data_size = data_x.size(); + if (data_size != data_y.size() || (data_errors.has_value() && data_errors->size() != data_size)) + throw dimension_error("Xs, Ys or Sigmas do not match sizes"); + struct matrix<T> X(data_size, degree), + Y(data_size, 1), + B(degree, 1), + W = matrix<T>::identity(data_size); + // initialize X + for (size_t row = 0; row < X.rows; ++row) { + X.SUBSCR_OPRTR(row, 0) = 1; + for (size_t col = 1; col < X.cols; ++col) { + X.SUBSCR_OPRTR(row, col) = X.SUBSCR_OPRTR(row, col-1) * data_x[row]; + } + } + // initialize Y + std::memcpy(Y.data(), data_y.data(), sizeof (T) * data_size); + // initialize W + if (data_errors.has_value()) { + std::vector<T> &err_value = *data_errors; + for (size_t i = 0; i < err_value.size(); ++i) + W.data()[i * (data_size + 1)] = 1 / err_value[i] / err_value[i]; + } + std::cerr << X << '\n' << Y << '\n' << W << '\n'; + matrix<T> X_T_W = X.tr() * W; + matrix<T> tmp1 = (X_T_W * X).inv().value(); + B = tmp1 * X_T_W * Y; + B.print(); + // TODO: FINISH (with covariation matrix) + return {}; +} + + +} // namespace prak diff --git a/libprakipp/include/prakmatrix.hpp b/libprakipp/include/prakmatrix.hpp new file mode 100644 index 0000000..87b38b4 --- /dev/null +++ b/libprakipp/include/prakmatrix.hpp @@ -0,0 +1,185 @@ +#pragma once + +#include "prakcommon.hpp" + +#include <cstring> +#include <iomanip> +#include <ostream> +#include <vector> + +namespace prak { + +struct dimension_error : public std::logic_error { + explicit dimension_error(const char *what): std::logic_error(what) {} +}; + +/// A class to represent a matrix. Works as an extension to std::vector, so all methods of std::vector are available on this class +/// BIG TODO: replave `throw` with `std::optional` return type +template <typename T> +struct matrix : public std::vector<T> { + size_t rows, cols; + size_t feq_precision = 10; + const T &SUBSCR_OPRTR(size_t row, size_t col) const { + return this->at(row * cols + col); + } + T &SUBSCR_OPRTR(size_t row, size_t col) { + return this->at(row * cols + col); + } + matrix(void) = default; + matrix(struct matrix<T> &&) = default; + matrix(const struct matrix<T> &) = default; + struct matrix<T> &operator=(const struct matrix<T> &) = default; + explicit matrix(size_t rows, size_t cols, T def) + : rows{rows}, cols{cols}, std::vector<T>(rows * cols, def) {} + explicit matrix(size_t rows, size_t cols, T *data = nullptr) + : rows{rows}, cols{cols}, std::vector<T>(rows * cols) { + if (data != nullptr) std::memcpy(this->data(), data, rows * cols); + } + explicit matrix (size_t rows, size_t cols, std::vector<T> &&data) noexcept(false) + : rows{rows}, cols{cols}, std::vector<T>(std::move(data)) { + if (this->size() != rows * cols) throw std::out_of_range("data.size() != rows*cols"); + } + /// add 2 matrixes + struct matrix<T> operator+(const struct matrix<T> &other) noexcept(false) { + if (other.rows != rows || other.cols != cols) + throw dimension_error("matrix add: dimensions of 'other' do not match dimensions of 'this'"); + struct matrix<T> ret(rows, cols); + for (size_t i = 0; i < this->size(); ++i) { + ret.data()[i] = this->data()[i] + other.data()[i]; + } + return ret; + } + /// perform matrix multiplication + struct matrix<T> operator*(const struct matrix<T> &other) noexcept(false) { + if (cols != other.rows) + throw dimension_error("Matrix mul: 'other.rows' does not match 'this.cols'"); + struct matrix<T> ret(rows, other.cols, T{}); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < other.cols; ++j) { + for (size_t k = 0; k < cols; ++k) + ret.SUBSCR_OPRTR(i, j) += SUBSCR_OPRTR(i, k) * other.SUBSCR_OPRTR(k, j); + } + } + return ret; + } + /// multiply matrix by a vector [other.size() x 1] + std::enable_if<std::is_arithmetic_v<T>, std::vector<T>> + operator*(const std::vector<T> &other) noexcept(false) { + if (cols != other.size()) throw dimension_error("Matrix mul: 'other.rows' does not match 'this.cols'"); + std::vector<T> ret(rows, T{}); + for (size_t row = 0; row < rows; ++rows) { + for (size_t k = 0; k < cols; ++k) + ret[row] += SUBSCR_OPRTR(row, k) * other[k]; + } + return ret; + } + /// multiply matrix by a scalar + template <typename Arg> + std::enable_if_t<std::is_arithmetic_v<Arg>, struct matrix<T>> + operator*(const Arg other) const { + struct matrix<T> ret(rows, cols); + for (size_t i = 0; i < this->size(); ++i) + ret.data()[i] = this->data()[i] * other; + return ret; + } + /// Tries to find an inverse of the matrix by using gaussian elimination + /// TODO: handle the case where the inverse does not exist + std::enable_if_t<std::is_floating_point_v<T>, std::optional<struct matrix<T>>> + inv(void) const noexcept(false) { + if (rows != cols) throw dimension_error("Matrix must be square to be inverted"); + struct matrix<T> gaussian(rows, cols * 2, T{}); + for (size_t row = 0; row < rows; ++row) { + std::memcpy(gaussian.data() + (row * cols * 2 ), this->data() + row * cols, cols * sizeof (T)); + gaussian.data()[row * cols * 2 + cols + row] = 1; + } + for (size_t row = 0; row < rows; ++row) { + T &fst_row_v = gaussian.SUBSCR_OPRTR(row, row); + bool row_found = true; + if (fequal<T>(fst_row_v, 0, feq_precision)) row_found = gaussian.find_suitable_row(row); + if (!row_found) return std::nullopt; + gaussian.mul_row(row, 1/gaussian.SUBSCR_OPRTR(row, row)); + for (size_t pr_row = 0; pr_row < rows; ++pr_row) { + if (row == pr_row) continue; + T& row_n_v = gaussian.SUBSCR_OPRTR(pr_row, row); + if (fequal<T>(row_n_v, 0, feq_precision)) continue; + gaussian.muladd_row(row, pr_row, -row_n_v/fst_row_v); + } + } + struct matrix<T> ret(rows, cols); + for (size_t row = 0; row < rows; ++row) { + std::memcpy(ret.data() + row * cols, gaussian.data() + row * 2*cols + cols, cols * sizeof (T)); + } + return ret; + + } + struct matrix<T> tr(void) const { + struct matrix<T> ret(cols, rows); + for (size_t row = 0; row < rows; ++row) + for (size_t col = 0; col < cols; ++col) + ret.SUBSCR_OPRTR(col, row) = SUBSCR_OPRTR(row, col); + return ret; + } + /// swaps rows `r1`, `r2` + void swap_rows(size_t r1, size_t r2) { + T* tmpbuf = new T[cols]; + size_t rowsize = cols * sizeof (T); + std::memcpy(tmpbuf, this->data() + cols * r1, rowsize); + std::memcpy(this->data() + cols * r1, this->data() + cols * r2, rowsize); + std::memcpy(this->data() + cols * r2, tmpbuf, rowsize); + delete[] tmpbuf; + } + /// multiplies `r_src` row by number `mul` and adds it ro row `r_dest` + void muladd_row(size_t r_src, size_t r_dest, T mul) { + for (size_t col = 0; col < cols; ++col) { + size_t r_src_ofs = cols * r_src + col; + size_t r_dest_ofs = cols * r_dest + col; + this->data()[r_dest_ofs] += this->data()[r_src_ofs] * mul; + } + } + /// multiplies a row by a number + void mul_row(size_t r1, T mul) { + for (size_t ind = r1 * cols; ind < (r1 + 1) * cols; ++ind) + this->data()[ind] *= mul; + } + /// finds a row that contains non-zero number at index [row = i > `row`, column=`row`]. + /// will only search for the rows below `row` and swap them + /// returns whether the swap occured (whether the row was found) + bool find_suitable_row(size_t row) { + for (size_t i = row + 1; i < rows; ++i) { + if (SUBSCR_OPRTR(i, row) != 0) { + swap_rows(i, row); + return true; + } + } + return false; + } + + static struct matrix<T> identity(size_t rank) { + struct matrix<T> ret(rank, rank); + for (size_t i = 0; i < rank * rank; i += (rank + 1)) { + ret.data()[i] = 1; + } + return ret; + } + + std::ostream &print(std::ostream &out = std::cout, size_t width = 10) const { + for (size_t row = 0; row < rows; ++row) { + for (size_t col = 0; col < cols; ++col) + out << "|" << std::setw(width) << SUBSCR_OPRTR(row, col); + std::cout << "|\n"; + } + return out; + } +}; + +template<typename T> +void operator/(const int x, const struct matrix<T> &m) { + std::cout << __PRETTY_FUNCTION__ << std::endl; +} + +template <typename T> +std::ostream &operator<<(std::ostream &out, const struct matrix<T> &m) { + return m.print(out); +} + +} // namespace prak diff --git a/libprakipp/include/praktable.hpp b/libprakipp/include/praktable.hpp new file mode 100644 index 0000000..53519a5 --- /dev/null +++ b/libprakipp/include/praktable.hpp @@ -0,0 +1,345 @@ +#pragma once + +#include <cstddef> +#include <functional> +#include <algorithm> +#include <optional> +#include <iostream> +#include <fstream> +#include <iomanip> +#include <ostream> +#include <string> +#include <vector> +#include <cmath> + + +#include "prakcommon.hpp" +#include "prakmath.hpp" + +namespace prak { + +/// truncate a string to n characters, or return in unmodified +inline std::string truncate_or(const std::string &str, size_t n) { + if (str.size() >= n) return str.substr(0, n); + return str; +} + + +/// Unused for now. +/// TODO: Remove +template <typename T> +struct opt_value { + enum struct t: unsigned char { + val = 0, + up = 1 << 0, + left = 1 << 1, + down = 1 << 2, + right = 1 << 3, + } tag; + T val; + + explicit opt_value(const T &v): tag{t::val}, val{v} {} + explicit opt_value(const opt_value::t t): tag{t} {} + opt_value &operator=(const T &v) { + val = v; + tag = t::val; + return *this; + } + opt_value &operator=(const opt_value::t t) { + tag = t; + return *this; + } + +}; + + +/// Class that can store, print, and apply operations to a table used by lab works +template <typename dtype> +class table { + std::vector<dtype> data; + + using function_type = std::function<dtype(const std::vector<dtype> &)>; + using stringvec = std::vector<std::string>; + + size_t index(const std::string &str) const { + return std::distance(names.cbegin(), std::find(names.cbegin(), names.cend(), str)); + } + + FILE* open_gnuplot() { +#if defined(_POSIX_VERSION) || defined(__unix) + FILE* = popen("gnuplot", "w"); +#else +#warning Not implemented for non-unix operating systems + return NULL; +#endif + } + +public: + size_t rows = 0, columns = 0; + friend class iterator; + struct iterator { + table *parent; + size_t columns; + size_t col_index = 0, data_index = 0; + using iterator_category = std::forward_iterator_tag; + using value_type = dtype; + using difference_type = ptrdiff_t; + using pointer = dtype *; + using reference = dtype &; + iterator(table *new_parent, const std::string &column, size_t row_idx = 0) + : parent{new_parent}, columns(new_parent->columns) + { + col_index = parent->index(column); + data_index = col_index + row_idx * new_parent->columns; + } + iterator &operator++() { data_index += columns; return *this; } + iterator operator++(int) { iterator ret = *this; ++(*this); return ret; } + bool operator==(iterator other) { return data_index == other.data_index && parent == other.parent && col_index == other.col_index; } + bool operator!=(iterator other) { return data_index != other.data_index || parent != other.parent || col_index != other.col_index; } + value_type operator*() { return parent->data[data_index]; }; + }; + /// Optional rownames: names of rows + std::vector<std::string> opt_rownames; + /// Mandatory columnnames: names of columns + std::vector<std::string> names; + /// width used for printing, defaults to 8 + size_t column_width = 8; + + /// default constructor + table() = default; + + /// Data: array of ararys, not freed automatically. + explicit table(const std::vector<std::string> &strings, dtype **data, size_t rows /*size_t columns = strings.size()*/) + : rows{rows}, columns{strings.size()} + { + names = strings; + + data = std::vector<dtype>(rows * strings.size()); + for (size_t i = 0; i < rows; ++i) + for (size_t j = 0; j < strings.size(); ++j) + data[i * strings.size() + j] = data[i][j]; + } + + /// Strings: names for columns + /// `new_data` format: { { entry1_a, entry2_a, ...} { entry1_b, entry2_b, ... }, ... } + /// where `a`, `b`, ... are columns + explicit table(std::vector<std::string> &&strings, std::vector<std::vector<dtype>> &&new_data) + : rows{new_data.size() ? new_data[0].size() : 0}, columns{strings.size()} + { + names = strings; + data = std::vector<dtype>(new_data.size() * rows); + auto dit = data.begin(); + for (size_t j = 0; j < rows; ++j) { + for (size_t i = 0; i < new_data.size(); ++i) { + *dit = new_data[i][j]; + ++dit; + } + } + } + + iterator begin(std::string column) { return iterator(this, column); } + iterator end(std::string column) { return iterator(this, column, rows); } + + dtype & SUBSCR_OPRTR (const std::string &column, size_t row) { + return data.at(names.size() * row + index(column)); + } + + dtype & SUBSCR_OPRTR (size_t column, size_t row) { + return data.at(names.size() * row + column); + } + + /// prints a table. defaults to using std::cout, but any std::ostream can be passed in it. + void print(std::ostream &stream = std::cout) const { + stream << "columns: " << columns << ", rows: " << rows << std::endl; + bool print_names = opt_rownames.size() == data.size() / columns; + size_t topchars = (column_width + 1) * (columns + print_names) + 1; + + std::string rowsep(column_width, '_'); + for (size_t i = 0; i < columns + print_names - 1; ++i) { + rowsep.append(1, '|'); + rowsep.append(column_width, '_'); + } + + stream << std::left << std::string(topchars, '_') << std::endl; + if (print_names) stream << '|' << std::setw(column_width) << ' '; + + for (const auto &s : names) + stream << '|' << std::setw(column_width) << truncate_or(s, column_width-1); + + for (size_t i = 0; auto x : data) { + if (i % columns == 0) { + stream << '|' << std::endl; + stream << '|' << rowsep << '|' << std::endl; + if (print_names) + stream << '|' << std::setw(column_width) << truncate_or(opt_rownames[i / columns], column_width); + } + stream << '|' << std::setw(column_width) << x; + ++i; + } + stream << '|' << std::endl << '|' << rowsep << '|' << std::endl; + } + + /// Returns whether the amount of names is correct + /// If it is incorrect, names won't be displayed during printing + bool set_rownames(std::vector<std::string> &&names) { + opt_rownames = names; + return opt_rownames.size() == data.size() / names.size(); + } + + /// apply a function to several columns and store result in another column + /// function must accept std::vector or arguments + table &apply(function_type function, stringvec args, std::optional<std::string> result) { + size_t result_index = result.has_value() ? index(*result) : 0; + for (size_t i = 0; i < rows; ++i) { + std::vector<dtype> v(args.size()); + for (size_t j = 0; j < args.size(); ++j) + v[j] = SUBSCR_OPRTR(args[j], i); + if (result.has_value()) data[columns * i + result_index] = function(v); + else (void)function(v); + } + return *this; + } + + /// adds a column with name `name` and data `column_data` + void add_column(std::string name, std::vector<dtype> column_data) { + std::vector<dtype> data_new(rows * (++columns)); + + for (size_t row = 0; row < rows; ++row) { + for (size_t column = 0; column < names.size(); ++column) // columns variable is incremented already + data_new[row * columns + column] = data[row * names.size() + column]; + data_new[(row + 1) * columns - 1] = column_data[row]; + } + data = std::move(data_new); + names.push_back(name); + } + + /// Appends a column to the table. if name is set, appends it to `opt_rownames` + void add_row(std::vector<dtype> values, std::optional<std::string> name = std::nullopt) { + data.resize(columns * (++rows)); + std::copy_n(values.cbegin(), columns, data.end() - columns); + if (name.has_value()) opt_rownames.push_back(*name); + } + + friend std::ostream& operator<<(std::ostream &os, table<dtype> &t) { + t.print(os); + return os; + } + + /// Reads a table from a file in a format: + /// ``` + /// col1 col2 col3 ... + /// val1 val2 val3 ... + /// val4 val5 val6 ... + /// ... + /// ``` + /// Note tha `val` may either be a real number or a question mark, denoting that the value is unknown + /// `col` may be any string without whitespaeces. + /// if the first column is named "__name__" (as in python), first val in each row is a string used as + /// a row name. + void read(std::ifstream& f) { + std::string header; + std::getline(f >> std::ws, header); + std::istringstream h_stream(header); + std::string buffer; + + bool read_names = false; + h_stream >> buffer; + if (buffer == "__name__") read_names = true; + else names.push_back(buffer); + + for (size_t i = read_names ? 0 : 1; h_stream >> buffer; ++i) + names.push_back(buffer); + + std::vector<dtype> tmp_row(names.size()); + while (!(f >> std::ws).eof()) { + if (read_names) { + f >> buffer; + opt_rownames.push_back(buffer); + } + for (auto it = tmp_row.begin(); it != tmp_row.end(); ++it) { + if ((f >> std::ws).peek() == '?') { + *it = NAN; + f >> buffer; + } + else f >> *it; + } + data.resize(data.size() + names.size()); + std::copy_n(tmp_row.cbegin(), names.size(), data.end() - names.size()); + ++rows; + } + columns = names.size(); + } + + /// returns an std::pair with coefficients A and B in that order + std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>> least_squares_linear(std::string x, std::string y, std::optional<std::string> sigma = std::nullopt) { + prak::vector<dtype> _x(rows); + prak::vector<dtype> _y(rows); + prak::vector<dtype> _s(rows); + + std::copy(begin(x), end(x), _x.begin()); + std::copy(begin(y), end(y), _y.begin()); + if (sigma.has_value()) std::copy(begin(*sigma), end(*sigma), _s.begin()); + else _s = prak::vector<dtype>(rows, static_cast<dtype>(1)); + + std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>> ret; + prak::least_squares_linear<dtype>(_x, _y, _s, ret.first, ret.second); + return ret; + } + + /// Serialize data in format `data[args[0]][i] data[args[1]][i] data[args[2]][i]...` + void print_plot(const stringvec &args, std::ostream &out = std::cout) const { + std::vector<size_t> offsets(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + offsets[i] = index(args[i]); + } + for (size_t row = 0; row < rows; ++row) { + size_t row_offset = columns * row; + for (const auto column_offset : offsets) + std::cout << data[row_offset + column_offset] << ' '; + std::cout << std::endl; + } + } + + /// Serialize data in format `data[xs][i] data[ys][i] <data[ss][i]>`, readable by gnuplot with yerrorbars + void write_plot(const std::string &xs, const std::string &ys, std::optional<std::string> yss = std::nullopt, std::ostream &out = std::cout) const { + size_t nosigma = std::numeric_limits<size_t>::max(); + size_t xsi = index(xs), ysi = index(ys), ssi = nosigma; + if (yss.has_value()) ssi = index(*yss); + for (size_t row = 0; row < rows; ++row) { + size_t offset = columns * row; + out << data[offset + xsi] << ' ' << data[offset + ysi]; + if (ssi != nosigma) out << ' ' << data[offset+ssi]; + out << std::endl; + } + } + + void plot_png( + const std::string output_filename, + const std::string &x, + const std::string &y, + const std::optional<std::string> &xlabel = std::nullopt, + const std::optional<std::string> &ylabel = std::nullopt, + const std::optional<std::string> &sigma = std::nullopt) { + // TODO: Finish + } + + struct plot { + std::string x; + std::string y; + std::optional<std::string> sigma; + bool plot_points = true; + std::optional<std::string> label; + }; + + void plot_png(const std::string output, const std::vector<struct plot> &plots) { + // TODO: Finish later + /* FILE *gnuplot = open_gnuplot(); */ + /* fprintf(gnuplot, */ + /* "set terminal pngcairo enhanced size 800,600 dpi 300\n" */ + /* "set output '%s'\n" */ + /* , output.data() */ + /* ); */ + } +}; + +} // namespace prak diff --git a/libprakipp/include/tests.cpp b/libprakipp/include/tests.cpp new file mode 100644 index 0000000..af61860 --- /dev/null +++ b/libprakipp/include/tests.cpp @@ -0,0 +1,31 @@ +#include "prakmath.hpp" +#include "prakmatrix.hpp" + +int main() { + using namespace prak; + matrix<double> A(2, 1, {-1, 1}); + matrix<double> B(1, 2, {1, -2}); + matrix<double> C = A*B * (1.0/1.0); + std::cout << A << '\n' << B << '\n' << C << std::endl; + matrix<double> D = C + matrix<double>(2, 2, {-1, -2, -3, -4}); + std::getchar(); + D.feq_precision = 10; + matrix<double> A_1 = D.inv().value(); + std::cout << A_1 << std::endl; + std::cout << A_1*D << D*A_1 << std::endl; + matrix<double> E(2, 3, {1, 2, 3, 4, 5, 6}); + std::cout << E << '\n' << E.tr() << std::endl; + std::vector<double> xs = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector<double> ys = {1, 2.9, 5.2, 7.1, 9, 11.05, 12.7, 14}; + /* std::vector<double> ss = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}; */ + std::vector<double> ss = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1}; + /* std::vector<double> xs = {0, 1, 2}; */ + /* std::vector<double> ys = {1, 2.9, 5.2}; */ + /* std::vector<double> ss = {0.1, 0.2, 0.3}; */ + polynomial_regression(2, xs, ys, std::make_optional(ss)); + + matrix<double> T1 (3, 3, {1, 2, 3, 0, 1, 2, 0, 0, 1}); + matrix<double> T2 (3, 3, {4, 5, 6, 0, 4, 5, 0, 0, 6}); + std::cout << T1 << '\n' << T2 << '\n' << T1 * T1 << std::endl; + +} diff --git a/libprakipp/src/prakmath.cpp b/libprakipp/src/prakmath.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/libprakipp/src/prakmath.cpp diff --git a/libprakipp/test_data b/libprakipp/test_data new file mode 100644 index 0000000..a68190d --- /dev/null +++ b/libprakipp/test_data @@ -0,0 +1,21 @@ +index steel_n_up steel_n_down steel_n copper_n_up copper_n_down copper_n +0 69.8048106744458 69.8048106744458 ? 0 0 ? +1 59.538400172313104 59.538400172313104 ? 0 0 ? +2 49.77235665705151 49.77235665705151 ? 0 0 ? +3 39.003794962183754 39.003794962183754 ? 0 0 ? +4 29.41454165597367 29.41454165597367 ? 0 0 ? +5 19.85284784036345 19.85284784036345 ? 0 0 ? +6 9.621038299065232 9.621038299065232 ? 0 0 ? +7 -0.9475724262480807 -0.9475724262480807 ? 0 0 ? +8 -10.349127476774775 -10.349127476774775 ? 0 0 ? +9 -20.602089970343936 -20.602089970343936 ? 0 0 ? +10 -30.69633194493392 -30.69633194493392 ? 0 0 ? +11 -40.16111422807451 -40.16111422807451 ? 0 0 ? +12 -50.86998825863326 -50.86998825863326 ? 0 0 ? +13 -60.712258771120425 -60.712258771120425 ? 0 0 ? +14 -70.75473619559727 -70.75473619559727 ? 0 0 ? +15 -80.96959205323306 -80.96959205323306 ? 0 0 ? +16 -90.85022462488773 -90.85022462488773 ? 0 0 ? +17 -100.67744328024175 -100.67744328024175 ? 0 0 ? +18 -110.14378705377496 -110.14378705377496 ? 0 0 ? +19 -120.3377964523638 -120.3377964523638 ? 0 0 ? diff --git a/libprakipp/tests.cpp b/libprakipp/tests.cpp new file mode 100644 index 0000000..3dda25a --- /dev/null +++ b/libprakipp/tests.cpp @@ -0,0 +1,66 @@ + +#include <iostream> +#include <vector> + +#include "praktable.hpp" +#include "prakmath.hpp" + +double avg(const std::vector<double> &v) { + return (v[0] + v[1]) / 2 ; +} + +int main_1(int argc, char *argv[]) { + prak::table<double> t; + std::ifstream f("test_data"); + t.read(f); + t.column_width = 16; + std::cout << t; + t.apply(&avg, {"steel_n_up", "steel_n_down"}, "steel_n"); + t.apply(&avg, {"copper_n_up", "copper_n_down"}, "copper_n"); + std::cout << t; + std::cout << "a[\"copper_n\"][5] = " << t SUBSCR_OPN "copper_n", 0 SUBSCR_CLS << std::endl; + for (auto it = t.begin("steel_n"); it != t.end("steel_n"); ++it) { + std::cout << *it << ' '; + } + auto [a, b] = t.least_squares_linear("index", "steel_n"); + std::printf("\na = %lf %lf; b = %lf %lf\n", a.val, a.err, b.val, b.err); + + return 0; +} + +int main_2(int argc, char *argv[]) { + main_1(argc, argv); + return 0; + + prak::vector<double> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + prak::vector<double> y = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + prak::vector<double> s = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + struct prak::pvalue<double> val; + prak::least_squares_linear(x, y, s, val, val); + prak::printv(x); + prak::vmul(x.data(), x.data(), x.data(), (size_t)x.size()); + prak::printv(x); + return 0; +} + +int main_3(int argc, char *argv[]) { + volatile __m256 i = {10, 1, 10, 1, 10, 10, 11, 11}; + volatile __m256d j = {100, 200, -100, 400}; + printf("sum(i) = %f\n", prak::finalize(i)); + printf("sum(j) = %lf\n", prak::finalize(j)); + return 0; +} + +int main_4(int argc, char *argv[]) { + prak::vector<double> xs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + prak::vector<double> ys = {0, 2, 2, 8, 8, 10, 13, 15, 16, 18, 20}; + prak::vector<double> ss = {1, 1, 4, 4, 1, 1, 2, 2, 1, 1, 1}; + prak::pvalue<double> a, b; + prak::least_squares_linear(xs, ys, ss, a, b); + std::printf("a: {%lf, %lf}; b: {%lf, %lf}\n", a.val, a.err, b.val, b.err); + return 0; +} + +int main(int argc, char *argv[]) { + main_1(argc, argv); +} |