9 files changed, 1033 insertions, 0 deletions
diff --git a/libprakipp/compile_flags.txt b/libprakipp/compile_flags.txt
new file mode 100644
index 0000000..909d9cc
--- /dev/null
+++ b/libprakipp/compile_flags.txt
@@ -0,0 +1,2 @@
+-std=c++2c 
+-Iinclude
diff --git a/libprakipp/include/prakcommon.hpp b/libprakipp/include/prakcommon.hpp
new file mode 100644
index 0000000..5fee864
--- /dev/null
+++ b/libprakipp/include/prakcommon.hpp
@@ -0,0 +1,94 @@
+#pragma once
+
+#include <cmath>
+#include <vector>
+#include <iostream>
+
+#if defined(_MSC_VER) || !defined(__cpp_multidimensional_subscript) || __cplusplus < 202110L
+#warning "can not use multidimentional subscript operator: falling back to `operator()`"
+#undef MDSUBSCRIPT
+#else
+#define MDSUBSCRIPT
+#endif
+
+#ifdef NO_MDSUBSCRIPT
+#undef MDSUBSCRIPT
+#endif
+
+#ifdef MDSUBSCRIPT
+	#define SUBSCR_OPN [
+	#define SUBSCR_CLS ]
+	#define SUBSCR_OPRTR operator[]
+#else	
+	#define SUBSCR_OPN (
+	#define SUBSCR_CLS )
+	#define SUBSCR_OPRTR operator()
+#endif
+	
+namespace prak {
+
+/// stolen from [cppreference.com](https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon)
+/// Compares 2 floating-point values up to `ulps` ULPS (units in the last place)
+template <class T>
+std::enable_if_t<not std::numeric_limits<T>::is_integer, bool>
+fequal(T x, T y, std::size_t ulps = 1)
+{
+    // Since `epsilon()` is the gap size (ULP, unit in the last place)
+    // of floating-point numbers in interval [1, 2), we can scale it to
+    // the gap size in interval [2^e, 2^{e+1}), where `e` is the exponent
+    // of `x` and `y`.
+ 
+    // If `x` and `y` have different gap sizes (which means they have
+    // different exponents), we take the smaller one. Taking the bigger
+    // one is also reasonable, I guess.
+    const T m = std::min(std::fabs(x), std::fabs(y));
+ 
+    // Subnormal numbers have fixed exponent, which is `min_exponent - 1`.
+    const int exp = m < std::numeric_limits<T>::min()
+                  ? std::numeric_limits<T>::min_exponent - 1
+                  : std::ilogb(m);
+ 
+    // We consider `x` and `y` equal if the difference between them is
+    // within `n` ULPs.
+    return std::fabs(x - y) <= ulps * std::ldexp(std::numeric_limits<T>::epsilon(), exp);
+}
+
+
+/// prints a vector
+template <typename T>
+void printv(const T &vec) {
+	std::cout << "std::vector { ";
+	for (const auto &x : vec) {
+		std::cout << x << ' ';
+	}
+	std::cout << '}' << std::endl;
+}
+
+/// An allocator that aligns memory to 64 bytes. Needed for AVX instructions.
+/// C++17 required for std::align_val_t 
+template <class T>
+struct align_alloc : public std::allocator<T> {
+	constexpr T* allocate( std::size_t n ) {
+		return static_cast<T*>(::operator new(sizeof (T) * n, std::align_val_t{64}));
+	}
+
+	constexpr void deallocate( T* p, std::size_t n ) {
+		::operator delete(p, std::align_val_t{64});
+	}
+};;
+
+/// alias prak::vector that is the same as std::vector, but uses aligned allocator 
+template <typename T>
+using vector = std::vector<T, align_alloc<T>>;
+
+/// prak value / pair value: a value with an error
+template <typename T>
+struct pvalue {	T val, err; };
+
+template <typename T>
+std::ostream &operator<<(std::ostream &os, const struct pvalue<T> &p) {
+	/* return os << "value {" << p.val << "±" << p.err << "}"; */
+	return os << p.val << "±" << p.err;
+}
+
+} // namespace prak
diff --git a/libprakipp/include/prakmath.hpp b/libprakipp/include/prakmath.hpp
new file mode 100644
index 0000000..342c0cd
--- /dev/null
+++ b/libprakipp/include/prakmath.hpp
@@ -0,0 +1,289 @@
+#pragma once
+
+#include <cmath>
+#include <concepts>
+
+#ifdef __x86_64__
+#include <immintrin.h>
+#endif
+
+#include "prakcommon.hpp"
+#include "prakmatrix.hpp"
+
+namespace prak {
+
+// :)
+constexpr const double PI = 3.141592657;
+constexpr const float fPI = 3.141592657f;
+
+/// defines a type that supports arithmetic operation
+template <typename T>
+concept arithmetic = requires(T a) {
+	a + a; 
+	a * a; 
+	a - a; 
+	a / a; 
+};
+
+/// TODO: remove
+enum struct operation { mul, div, add, sub };
+
+/// vector multiply: fallback for non-floating-point types
+template <arithmetic T>
+void vmul(const T *op1, const T *op2, T *dest, size_t s) {
+	for (size_t i = 0; i < s; ++i) {
+		dest[i] = op1[i] * op2[i]; break;
+	}
+}
+
+/// vector multiply: float implementation
+template <>
+inline void vmul(const float *op1, const float *op2, float *dest, size_t s) {
+	if (s < 8) goto scalar; 
+	__m256 b1;
+	__m256 b2;
+
+	for (size_t i = 0; i < s / 8; ++i) {
+		b1 = _mm256_load_ps(op1 + 8*i);
+		b2 = _mm256_load_ps(op2 + 8*i);
+		b1 = _mm256_mul_ps(b1, b2);
+		_mm256_store_ps(dest + 8*i, b1);
+	}
+scalar:
+	for (size_t i = s - s % 8; i < s; ++i) {
+		dest[i] = op1[i] * op2[i];
+	}
+}
+
+/// vector multiply: double implementation
+template <>
+inline void vmul(const double *op1, const double *op2, double *dest, size_t s) {
+	if (s < 4) goto scalar;
+	__m256d b1;
+	__m256d b2;
+	
+	for (size_t i = 0; i < s / 4; ++i) {
+		b1 = _mm256_load_pd(op1 + 4*i);
+		b2 = _mm256_load_pd(op2 + 4*i);
+		b1 = _mm256_mul_pd(b1, b2);
+		_mm256_store_pd(dest + 4*i, b1);
+	}
+scalar:
+	for (size_t i = s - s % 4; i < s; ++i) {
+		dest[i] = op1[i] * op2[i];
+	}
+}
+
+/// vector division: non-floating-point types fallback
+template <arithmetic T>
+void vdiv(const T *op1, const T *op2, T *dest, size_t s) {
+	for (size_t i = 0; i < s; ++i) {
+		dest[i] = op1[i] / op2[i]; break;
+	}
+}
+
+/// vector division: floating point: single precision
+template <>
+inline void vdiv(const float *op1, const float *op2, float *dest, size_t s) {
+	if (s < 8) goto scalar;
+	__m256 b1;
+	__m256 b2;
+
+	for (size_t i = 0; i < s / 8; ++i) {
+		b1 = _mm256_load_ps(op1 + 8*i);
+		b2 = _mm256_load_ps(op2 + 8*i);
+		b1 = _mm256_div_ps(b1, b2);
+		_mm256_store_ps(dest + 8*i, b1);
+	}
+scalar:
+	for (size_t i = s - s % 8; i < s; ++i) {
+		dest[i] = op1[i] / op2[i];
+	}
+}
+
+/// vector division: floating point: double precision
+template <>
+inline void vdiv(const double *op1, const double *op2, double *dest, size_t s) {
+	if (s < 4) goto scalar;
+	__m256d b1;
+	__m256d b2;
+
+	for (size_t i = 0; i < s / 4; ++i) {
+		b1 = _mm256_load_pd(op1 + 4*i);
+		b2 = _mm256_load_pd(op2 + 4*i);
+		b1 = _mm256_div_pd(b1, b2);
+		_mm256_store_pd(dest + 4*i, b1);
+	}
+scalar:
+	for (size_t i = s - s % 4; i < s; ++i) {
+		dest[i] = op1[i] / op2[i];
+	}
+}
+
+inline float finalize(__m256 reg) {
+	// reg: x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 
+	__m128 extr = _mm256_extractf128_ps(reg, 1);
+	// extr: x0 | x1 | x2 | x3
+	// __mm256_castps256_ps128: x4 | x5 | x6 | x7
+	extr = _mm_add_ps(extr, _mm256_castps256_ps128(reg));
+	// extr: x0+x4 | x1 + x5 | x2 + x6 | x3 + x7
+	extr = _mm_hadd_ps(extr, extr);
+	// extr: x0 + x1 + x2 + x5 | x2 + x3 + x6 x7 | ... | ...
+	return extr[0] + extr[1];
+}
+
+inline double finalize(__m256d reg) {
+	// reg: x0 | x1 | x2 | x3
+	reg = _mm256_hadd_pd(reg, reg);
+	// reg: x0 + x1 | x2 + x3 | ... | ...
+	return reg[0] + reg[1];
+}
+
+/// Fallback generic method to sum a vector
+template <arithmetic T>
+T vsum(const T *op, size_t size) {
+	T res;
+	for (size_t i = 0; i < size; ++i)
+		res += op[i];
+	return res;
+}
+
+/// Sum a vector, float implementation
+template <>
+inline float vsum(const float *op, size_t size) {
+	__m256 buf;
+	buf = _mm256_setzero_ps();
+	if (size < 8) goto scalar;
+	for (size_t i = 0; i < size / 8; ++i) {
+		__m256 next = _mm256_load_ps(op + 8*i);
+		buf = _mm256_add_ps(buf, next);
+	}
+scalar:
+	float linear = 0;
+	for (size_t i = size - size%8; i < size; ++i) {
+		linear += op[i];
+	}
+	return linear + finalize(buf);
+}
+
+/// Sum a vector, double implementation
+template <>
+inline double vsum(const double *op, size_t size) {
+	__m256d buf;
+	buf = _mm256_setzero_pd();
+	if (size < 4) goto scalar;
+	for (size_t i = 0; i < size / 4; ++i) {
+		__m256d next = _mm256_load_pd(op + 4*i);
+		buf = _mm256_add_pd(buf, next);
+	}
+scalar:
+	double linear = 0;
+	for (size_t i = size - size%8; i < size; ++i) {
+		linear += op[i];
+	}
+	return linear + finalize(buf);
+}
+
+/// calculate least-squares linear approximation to fit data
+/// ax+b = y (ss is an error of Y value)
+template <std::floating_point T>
+void least_squares_linear(
+	const prak::vector<T> &xs, 
+	const prak::vector<T> &ys, 
+	const prak::vector<T> &ss, 
+	struct pvalue<T> &a, 
+	struct pvalue<T> &b)
+{
+	if (xs.size() != ys.size() || xs.size() != ss.size()) {
+		std::cout <<   "x.size() = " << xs.size() 
+			  << ", y.size() = " << ys.size()
+			  << ", s.size() = " << ss.size() << std::endl;
+		return;
+	}
+	[[assume(xs.size() == ys.size() && ys.size() == ss.size())]];
+	size_t sz = xs.size();
+
+	prak::vector<T> ones(sz);
+	prak::vector<T> ssq(sz);
+	prak::vector<T> ssq_m1(sz);
+	prak::vector<T> buf(sz);
+	prak::vector<T> buf_xs_ssq_m1(sz);
+	std::fill(ones.begin(), ones.end(), (T)1); // ones: [1]
+	vmul(ss.data(), ss.data(), ssq.data(), sz); // ssq: [ss*ss]
+	vdiv(ones.data(), ssq.data(), ssq_m1.data(), sz); // ssq_m1: [1/ss^2]
+	vmul(xs.data(), ssq_m1.data(), buf_xs_ssq_m1.data(), sz); // [xs / ss^2] 
+
+	const T *ysd = ys.data(),
+		*xsd = xs.data();
+	T *ssqd = ssq.data(),
+	  *ssq_m1d = ssq_m1.data(),
+	  *onesd = ones.data(),
+	  *bufd = buf.data(),
+	  *buf_xs_ssq_m1d = buf_xs_ssq_m1.data();
+
+	vmul(buf_xs_ssq_m1d, xsd, bufd, sz);	// buf: [xs^2 / ss^2]
+	T d1 = vsum(bufd, sz); 			// sum([xs^2 / ss^2])
+	T ssq_m1sum = vsum(ssq_m1d, sz);	// sum(1 / ss^2)
+	vmul(xsd, ssq_m1d, bufd, sz);		// buf: [xs / ss^2]
+	T d2 = vsum(buf_xs_ssq_m1d, sz);	// sum([xs / ss^2])
+	T D = d1 * ssq_m1sum - d2*d2;		// sum((xs/ss)^2) * sum(1/ss^2) - sum(xs/ss^2)^2
+	
+	vmul(ysd, buf_xs_ssq_m1d, bufd, sz);	// buf: [ys*xs/ss^2]
+	T da1 = vsum(bufd, sz);			// sum([ys*xs/ss^2])
+	vmul(ysd, ssq_m1d, bufd, sz);		// buf: [ys/ss^2]
+	T da2 = vsum(bufd, sz);			// sum([ys/ss^2])
+	T DA = da1 * ssq_m1sum - da2 * d2;	// sum([ys*xs/ss^2]) * sum([1/ss^2]) - sum([ys/ss^2]) * sum(xs/ss^2)
+	
+	T DB = d1 * da2 - d2 * da1;		// sum([xs^2/ss^2]) * sum([ys/ss^2]) - sum([xs/ss^2]) * sum(ys*xs/ss^2)
+	
+	a.val = DA/D;
+	a.err = sqrt(ssq_m1sum / D);
+
+	b.val = DB/D;
+	b.err = sqrt(d1 / D);
+}
+
+
+/// May throw std::bad_optional_access
+template <typename T>
+std::enable_if<std::is_arithmetic_v<T>, std::vector<pvalue<T>>> 
+polynomial_regression(
+		size_t degree, 
+		std::vector<T> data_x, 
+		std::vector<T> data_y, 
+		std::optional<std::vector<T>> data_errors = std::nullopt)
+{
+	++degree; // hack)
+	size_t data_size = data_x.size();
+	if (data_size != data_y.size() || (data_errors.has_value() && data_errors->size() != data_size)) 
+		throw dimension_error("Xs, Ys or Sigmas do not match sizes");
+	struct matrix<T> X(data_size, degree),
+			 Y(data_size, 1),
+			 B(degree, 1),
+			 W = matrix<T>::identity(data_size);
+	// initialize X
+	for (size_t row = 0; row < X.rows; ++row) {
+		X.SUBSCR_OPRTR(row, 0) = 1;
+		for (size_t col = 1; col < X.cols; ++col) {
+			X.SUBSCR_OPRTR(row, col) = X.SUBSCR_OPRTR(row, col-1) * data_x[row]; 
+		}
+	}
+	// initialize Y
+	std::memcpy(Y.data(), data_y.data(), sizeof (T) * data_size);
+	// initialize W
+	if (data_errors.has_value()) {
+		std::vector<T> &err_value = *data_errors;
+		for (size_t i = 0; i < err_value.size(); ++i)
+			W.data()[i * (data_size + 1)] = 1 / err_value[i] / err_value[i];
+	}
+	std::cerr << X << '\n' << Y << '\n' << W << '\n';
+	matrix<T> X_T_W = X.tr() * W;
+	matrix<T> tmp1 = (X_T_W * X).inv().value();
+	B = tmp1 * X_T_W * Y;
+	B.print();
+	// TODO: FINISH (with covariation matrix)
+	return {};
+}
+
+
+} // namespace prak
diff --git a/libprakipp/include/prakmatrix.hpp b/libprakipp/include/prakmatrix.hpp
new file mode 100644
index 0000000..87b38b4
--- /dev/null
+++ b/libprakipp/include/prakmatrix.hpp
@@ -0,0 +1,185 @@
+#pragma once
+
+#include "prakcommon.hpp"
+
+#include <cstring>
+#include <iomanip>
+#include <ostream>
+#include <vector>
+
+namespace prak {
+
+struct dimension_error : public std::logic_error { 
+	explicit dimension_error(const char *what): std::logic_error(what) {} 
+};
+
+/// A class to represent a matrix. Works as an extension to std::vector, so all methods of std::vector are available on this class
+/// BIG TODO: replave `throw` with `std::optional` return type
+template <typename T>
+struct matrix : public std::vector<T> {
+	size_t rows, cols;
+	size_t feq_precision = 10;
+	const T &SUBSCR_OPRTR(size_t row, size_t col) const {
+		return this->at(row * cols + col);
+	}
+	T &SUBSCR_OPRTR(size_t row, size_t col) {
+		return this->at(row * cols + col);
+	}
+	matrix(void) = default;
+	matrix(struct matrix<T> &&) = default;
+	matrix(const struct matrix<T> &) = default;
+	struct matrix<T> &operator=(const struct matrix<T> &) = default;
+	explicit matrix(size_t rows, size_t cols, T def)
+		: rows{rows}, cols{cols}, std::vector<T>(rows * cols, def) {}
+	explicit matrix(size_t rows, size_t cols, T *data = nullptr)
+		: rows{rows}, cols{cols}, std::vector<T>(rows * cols) {
+		if (data != nullptr) std::memcpy(this->data(), data, rows * cols);
+	}
+	explicit matrix (size_t rows, size_t cols, std::vector<T> &&data) noexcept(false)
+		: rows{rows}, cols{cols}, std::vector<T>(std::move(data)) {
+		if (this->size() != rows * cols) throw std::out_of_range("data.size() != rows*cols");
+	}
+	/// add 2 matrixes
+	struct matrix<T> operator+(const struct matrix<T> &other) noexcept(false) {
+		if (other.rows != rows || other.cols != cols) 
+			throw dimension_error("matrix add: dimensions of 'other' do not match dimensions of 'this'");
+		struct matrix<T> ret(rows, cols);
+		for (size_t i = 0; i < this->size(); ++i) {
+			ret.data()[i] = this->data()[i] + other.data()[i];
+		}
+		return ret;
+	}
+	/// perform matrix multiplication
+	struct matrix<T> operator*(const struct matrix<T> &other) noexcept(false) {
+		if (cols != other.rows) 
+			throw dimension_error("Matrix mul: 'other.rows' does not match 'this.cols'");
+		struct matrix<T> ret(rows, other.cols, T{});
+		for (size_t i = 0; i < rows; ++i) {
+			for (size_t j = 0; j < other.cols; ++j) {
+				for (size_t k = 0; k < cols; ++k) 
+					ret.SUBSCR_OPRTR(i, j) += SUBSCR_OPRTR(i, k) * other.SUBSCR_OPRTR(k, j);
+			}
+		}
+		return ret;
+	}
+	/// multiply matrix by a vector [other.size() x 1]
+	std::enable_if<std::is_arithmetic_v<T>, std::vector<T>>
+	operator*(const std::vector<T> &other) noexcept(false) {
+		if (cols != other.size()) throw dimension_error("Matrix mul: 'other.rows' does not match 'this.cols'");
+		std::vector<T> ret(rows, T{});
+		for (size_t row = 0; row < rows; ++rows) {
+			for (size_t k = 0; k < cols; ++k)
+				ret[row] += SUBSCR_OPRTR(row, k) * other[k];
+		}
+		return ret;
+	}
+	/// multiply matrix by a scalar
+	template <typename Arg>
+	std::enable_if_t<std::is_arithmetic_v<Arg>, struct matrix<T>> 
+	operator*(const Arg other) const {
+		struct matrix<T> ret(rows, cols);
+		for (size_t i = 0; i < this->size(); ++i)
+			ret.data()[i] = this->data()[i] * other;
+		return ret;
+	}
+	/// Tries to find an inverse of the matrix by using gaussian elimination
+	/// TODO: handle the case where the inverse does not exist
+	std::enable_if_t<std::is_floating_point_v<T>, std::optional<struct matrix<T>>>
+	inv(void) const noexcept(false) {
+		if (rows != cols) throw dimension_error("Matrix must be square to be inverted");
+		struct matrix<T> gaussian(rows, cols * 2, T{});
+		for (size_t row = 0; row < rows; ++row) {
+			std::memcpy(gaussian.data() + (row * cols * 2 ), this->data() + row * cols, cols * sizeof (T));
+			gaussian.data()[row * cols * 2 + cols + row] = 1;
+		}
+		for (size_t row = 0; row < rows; ++row) {
+			T &fst_row_v = gaussian.SUBSCR_OPRTR(row, row);
+			bool row_found = true;
+			if (fequal<T>(fst_row_v, 0, feq_precision)) row_found = gaussian.find_suitable_row(row);
+			if (!row_found) return std::nullopt;
+			gaussian.mul_row(row, 1/gaussian.SUBSCR_OPRTR(row, row));
+			for (size_t pr_row = 0; pr_row < rows; ++pr_row) {
+				if (row == pr_row) continue;
+				T& row_n_v = gaussian.SUBSCR_OPRTR(pr_row, row);
+				if (fequal<T>(row_n_v, 0, feq_precision)) continue;
+				gaussian.muladd_row(row, pr_row, -row_n_v/fst_row_v);
+			}
+		}
+		struct matrix<T> ret(rows, cols);
+		for (size_t row = 0; row < rows; ++row) {
+			std::memcpy(ret.data() + row * cols, gaussian.data() + row * 2*cols + cols, cols * sizeof (T));
+		}
+		return ret;
+
+	}
+	struct matrix<T> tr(void) const {
+		struct matrix<T> ret(cols, rows);
+		for (size_t row = 0; row < rows; ++row)
+			for (size_t col = 0; col < cols; ++col)
+				ret.SUBSCR_OPRTR(col, row) = SUBSCR_OPRTR(row, col);
+		return ret;
+	}
+	/// swaps rows `r1`, `r2`
+	void swap_rows(size_t r1, size_t r2) {
+		T* tmpbuf = new T[cols];
+		size_t rowsize = cols * sizeof (T);
+		std::memcpy(tmpbuf, this->data() + cols * r1, rowsize);
+		std::memcpy(this->data() + cols * r1, this->data() + cols * r2, rowsize);
+		std::memcpy(this->data() + cols * r2, tmpbuf, rowsize);
+		delete[] tmpbuf;
+	}
+	/// multiplies `r_src` row by number `mul` and adds it ro row `r_dest`
+	void muladd_row(size_t r_src, size_t r_dest, T mul) {
+		for (size_t col = 0; col < cols; ++col) {
+			size_t r_src_ofs = cols * r_src + col;
+			size_t r_dest_ofs = cols * r_dest + col;
+			this->data()[r_dest_ofs] += this->data()[r_src_ofs] * mul;
+		}
+	}
+	/// multiplies a row by a number
+	void mul_row(size_t r1, T mul) {
+		for (size_t ind = r1 * cols; ind < (r1 + 1) * cols; ++ind)
+			this->data()[ind] *= mul;
+	}
+	/// finds a row that contains non-zero number at index [row = i > `row`, column=`row`].
+	/// will only search for the rows below `row` and swap them
+	/// returns whether the swap occured (whether the row was found)
+	bool find_suitable_row(size_t row) {
+		for (size_t i = row + 1; i < rows; ++i) {
+			if (SUBSCR_OPRTR(i, row) != 0) { 
+				swap_rows(i, row);
+				return true;
+			}
+		}
+		return false;
+	}
+	
+	static struct matrix<T> identity(size_t rank)  {
+		struct matrix<T> ret(rank, rank);
+		for (size_t i = 0; i < rank * rank; i += (rank + 1)) {
+			ret.data()[i] = 1;
+		}
+		return ret;
+	}
+
+	std::ostream &print(std::ostream &out = std::cout, size_t width = 10) const {
+		for (size_t row = 0; row < rows; ++row) {
+			for (size_t col = 0; col < cols; ++col)
+				out << "|" << std::setw(width) << SUBSCR_OPRTR(row, col); 
+			std::cout << "|\n";
+		}
+		return out;
+	}
+};
+
+template<typename T>
+void operator/(const int x, const struct matrix<T> &m) {
+	std::cout << __PRETTY_FUNCTION__ << std::endl;
+}
+
+template <typename T>
+std::ostream &operator<<(std::ostream &out, const struct matrix<T> &m) {
+	return m.print(out);
+}
+
+} // namespace prak
diff --git a/libprakipp/include/praktable.hpp b/libprakipp/include/praktable.hpp
new file mode 100644
index 0000000..53519a5
--- /dev/null
+++ b/libprakipp/include/praktable.hpp
@@ -0,0 +1,345 @@
+#pragma once
+
+#include <cstddef>
+#include <functional>
+#include <algorithm>
+#include <optional>
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+#include <ostream>
+#include <string>
+#include <vector>
+#include <cmath>
+
+
+#include "prakcommon.hpp"
+#include "prakmath.hpp"
+
+namespace prak {
+
+/// truncate a string to n characters, or return in unmodified
+inline std::string truncate_or(const std::string &str, size_t n) {
+	if (str.size() >= n) return str.substr(0, n);
+	return str;
+}
+
+
+/// Unused for now.
+/// TODO: Remove
+template <typename T> 
+struct opt_value {
+	enum struct t: unsigned char {
+		val	= 0,
+		up	= 1 << 0, 
+		left	= 1 << 1, 
+		down	= 1 << 2, 
+		right	= 1 << 3,
+	} tag;
+	T val;
+
+	explicit opt_value(const T &v): tag{t::val}, val{v} {}
+	explicit opt_value(const opt_value::t t): tag{t} {}
+	opt_value &operator=(const T &v) {
+		val = v;
+		tag = t::val;
+		return *this;
+	}
+	opt_value &operator=(const opt_value::t t) {
+		tag = t;
+		return *this;
+	}
+
+};
+
+
+/// Class that can store, print, and apply operations to a table used by lab works
+template <typename dtype>
+class table {
+	std::vector<dtype> data;
+
+	using function_type = std::function<dtype(const std::vector<dtype> &)>;
+	using stringvec = std::vector<std::string>;
+
+	size_t index(const std::string &str) const {
+		return std::distance(names.cbegin(), std::find(names.cbegin(), names.cend(), str));
+	}
+
+	FILE* open_gnuplot() {
+#if defined(_POSIX_VERSION) || defined(__unix)
+		FILE*  = popen("gnuplot", "w");
+#else
+#warning Not implemented for non-unix operating systems
+		return NULL;
+#endif
+	}
+
+public:
+	size_t rows = 0, columns = 0;
+	friend class iterator;
+	struct iterator {
+		table *parent;
+		size_t columns;
+		size_t col_index = 0, data_index = 0;
+		using iterator_category = std::forward_iterator_tag;
+		using value_type = dtype;
+		using difference_type = ptrdiff_t;
+		using pointer = dtype *;
+		using reference = dtype &;
+		iterator(table *new_parent, const std::string &column, size_t row_idx = 0) 
+		 	: parent{new_parent}, columns(new_parent->columns) 
+		{
+			col_index = parent->index(column);
+			data_index = col_index + row_idx * new_parent->columns;
+		}
+		iterator &operator++() { data_index += columns; return *this; }
+		iterator operator++(int) { iterator ret = *this; ++(*this); return ret; }
+		bool operator==(iterator other) { return data_index == other.data_index && parent == other.parent && col_index == other.col_index; }
+		bool operator!=(iterator other) { return data_index != other.data_index || parent != other.parent || col_index != other.col_index; }
+		value_type operator*() { return parent->data[data_index]; };
+	};
+	/// Optional rownames: names of rows
+	std::vector<std::string> opt_rownames;
+	/// Mandatory columnnames: names of columns
+	std::vector<std::string> names;
+	/// width used for printing, defaults to 8
+	size_t column_width = 8;
+
+	/// default constructor
+	table() = default;
+
+	/// Data: array of ararys, not freed automatically.
+	explicit table(const std::vector<std::string> &strings, dtype **data, size_t rows /*size_t columns = strings.size()*/)
+		: rows{rows}, columns{strings.size()} 
+	{
+		names = strings;
+
+		data = std::vector<dtype>(rows * strings.size());
+		for (size_t i = 0; i < rows; ++i)
+			for (size_t j = 0; j < strings.size(); ++j)
+				data[i * strings.size() + j] = data[i][j];
+	}
+	
+	/// Strings: names for columns
+	/// `new_data` format: { { entry1_a, entry2_a, ...} { entry1_b, entry2_b, ... }, ... }
+	/// where `a`, `b`, ... are columns
+	explicit table(std::vector<std::string> &&strings, std::vector<std::vector<dtype>> &&new_data) 
+		: rows{new_data.size() ? new_data[0].size() : 0}, columns{strings.size()} 
+	{
+		names = strings;
+		data = std::vector<dtype>(new_data.size() * rows);
+		auto dit = data.begin();
+		for (size_t j = 0; j < rows; ++j) {
+			for (size_t i = 0; i < new_data.size(); ++i) {
+				*dit = new_data[i][j];
+				++dit;
+			}
+		}
+	}
+
+	iterator begin(std::string column) { return iterator(this, column); }
+	iterator end(std::string column) { return iterator(this, column, rows); }
+
+	dtype & SUBSCR_OPRTR (const std::string &column, size_t row) {
+		return data.at(names.size() * row + index(column));
+	}
+
+	dtype & SUBSCR_OPRTR (size_t column, size_t row) {
+		return data.at(names.size() * row + column);
+	}
+
+	/// prints a table. defaults to using std::cout, but any std::ostream can be passed in it.
+	void print(std::ostream &stream = std::cout) const {
+		stream << "columns: " << columns << ", rows: " << rows << std::endl;
+		bool print_names = opt_rownames.size() == data.size() / columns;
+		size_t topchars = (column_width + 1) * (columns + print_names) + 1;
+
+		std::string rowsep(column_width, '_');
+		for (size_t i = 0; i < columns + print_names - 1; ++i) {
+			rowsep.append(1, '|');
+			rowsep.append(column_width, '_');
+		}
+
+		stream << std::left << std::string(topchars, '_') << std::endl;
+		if (print_names) stream << '|' << std::setw(column_width) << ' ';
+
+		for (const auto &s : names)
+			stream << '|' << std::setw(column_width) << truncate_or(s, column_width-1);
+
+		for (size_t i = 0; auto x : data) {
+			if (i % columns == 0) {
+				stream << '|' << std::endl;
+				stream << '|' << rowsep << '|' << std::endl;
+				if (print_names) 
+					stream << '|' << std::setw(column_width) << truncate_or(opt_rownames[i / columns], column_width);
+			}
+			stream << '|' << std::setw(column_width) << x;
+			++i;
+		}
+		stream << '|' << std::endl << '|' << rowsep << '|' << std::endl;
+	}
+
+	/// Returns whether the amount of names is correct
+	/// If it is incorrect, names won't be displayed during printing
+	bool set_rownames(std::vector<std::string> &&names) {
+		opt_rownames = names;
+		return opt_rownames.size() == data.size() / names.size();
+	}
+
+	/// apply a function to several columns and store result in another column
+	/// function must accept std::vector or arguments
+	table &apply(function_type function, stringvec args, std::optional<std::string> result) {
+		size_t result_index = result.has_value() ? index(*result) : 0;
+		for (size_t i = 0; i < rows; ++i) {
+			std::vector<dtype> v(args.size());
+			for (size_t j = 0; j < args.size(); ++j) 
+				v[j] = SUBSCR_OPRTR(args[j], i);
+			if (result.has_value()) data[columns * i + result_index] = function(v);
+			else (void)function(v);
+		}	
+		return *this;
+	}
+	
+	/// adds a column with name `name` and data `column_data`
+	void add_column(std::string name, std::vector<dtype> column_data) {
+		std::vector<dtype> data_new(rows * (++columns));
+
+		for (size_t row = 0; row < rows; ++row) {
+			for (size_t column = 0; column < names.size(); ++column) // columns variable is incremented already
+				data_new[row * columns + column] = data[row * names.size() + column];
+			data_new[(row + 1) * columns - 1] = column_data[row];
+		}
+		data = std::move(data_new);
+		names.push_back(name);
+	}
+
+	/// Appends a column to the table. if name is set, appends it to `opt_rownames`
+	void add_row(std::vector<dtype> values, std::optional<std::string> name = std::nullopt) {
+		data.resize(columns * (++rows));
+		std::copy_n(values.cbegin(), columns, data.end() - columns);
+		if (name.has_value()) opt_rownames.push_back(*name);
+	}
+
+	friend std::ostream& operator<<(std::ostream &os, table<dtype> &t) {
+		t.print(os);
+		return os;
+	}
+	
+	/// Reads a table from a file in a format:
+	/// ```
+	/// col1 col2 col3 ...
+	/// val1 val2 val3 ...
+	/// val4 val5 val6 ...
+	/// ...
+	/// ```
+	/// Note tha `val` may either be a real number or a question mark, denoting that the value is unknown
+	/// `col` may be any string without whitespaeces.
+	/// if the first column is named "__name__" (as in python), first val in each row is a string used as
+	/// a row name.
+	void read(std::ifstream& f) {
+		std::string header;
+		std::getline(f >> std::ws, header);
+		std::istringstream h_stream(header);
+		std::string buffer;
+
+		bool read_names = false;
+		h_stream >> buffer;
+		if (buffer == "__name__") read_names = true;
+		else names.push_back(buffer);
+
+		for (size_t i = read_names ? 0 : 1; h_stream >> buffer; ++i) 
+			names.push_back(buffer);
+		
+		std::vector<dtype> tmp_row(names.size());
+		while (!(f >> std::ws).eof()) {
+			if (read_names) { 
+				f >> buffer;
+				opt_rownames.push_back(buffer);
+			}
+			for (auto it = tmp_row.begin(); it != tmp_row.end(); ++it) {
+				if ((f >> std::ws).peek() == '?') {
+					*it = NAN;
+					f >> buffer;
+				}
+				else f >> *it;
+			}
+			data.resize(data.size() + names.size());
+			std::copy_n(tmp_row.cbegin(), names.size(), data.end() - names.size());
+			++rows;
+		}
+		columns = names.size();
+	}
+
+	/// returns an std::pair with coefficients A and B in that order
+	std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>> least_squares_linear(std::string x, std::string y, std::optional<std::string> sigma = std::nullopt) {
+		prak::vector<dtype> _x(rows);
+		prak::vector<dtype> _y(rows);
+		prak::vector<dtype> _s(rows);
+
+		std::copy(begin(x), end(x), _x.begin());
+		std::copy(begin(y), end(y), _y.begin());
+		if (sigma.has_value()) std::copy(begin(*sigma), end(*sigma), _s.begin());
+		else _s = prak::vector<dtype>(rows, static_cast<dtype>(1));
+
+		std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>> ret;
+		prak::least_squares_linear<dtype>(_x, _y, _s, ret.first, ret.second);
+		return ret;
+	}
+
+	/// Serialize data in format `data[args[0]][i] data[args[1]][i] data[args[2]][i]...`
+	void print_plot(const stringvec &args, std::ostream &out = std::cout) const {
+		std::vector<size_t> offsets(args.size());
+		for (size_t i = 0; i < args.size(); ++i) {
+			offsets[i] = index(args[i]);
+		}
+		for (size_t row = 0; row < rows; ++row) {
+			size_t row_offset = columns * row;
+			for (const auto column_offset : offsets)
+				std::cout << data[row_offset + column_offset] << ' ';
+			std::cout << std::endl;
+		}
+	}
+
+	/// Serialize data in format `data[xs][i] data[ys][i] <data[ss][i]>`, readable by gnuplot with yerrorbars
+	void write_plot(const std::string &xs, const std::string &ys, std::optional<std::string> yss = std::nullopt, std::ostream &out = std::cout) const {
+		size_t nosigma = std::numeric_limits<size_t>::max();
+		size_t xsi = index(xs), ysi = index(ys), ssi = nosigma;
+		if (yss.has_value()) ssi = index(*yss);
+		for (size_t row = 0; row < rows; ++row) {
+			size_t offset = columns * row;
+			out << data[offset + xsi] << ' ' << data[offset + ysi];
+			if (ssi != nosigma) out << ' ' << data[offset+ssi];
+			out << std::endl;
+		}
+	}
+	
+	void plot_png(
+		const std::string output_filename, 
+		const std::string &x, 
+		const std::string &y, 
+		const std::optional<std::string> &xlabel = std::nullopt,
+		const std::optional<std::string> &ylabel = std::nullopt,
+		const std::optional<std::string> &sigma = std::nullopt) {
+		// TODO: Finish	
+	}	
+	
+	struct plot {
+		std::string x;
+		std::string y;
+		std::optional<std::string> sigma;
+		bool plot_points = true;
+		std::optional<std::string> label;
+	};
+
+	void plot_png(const std::string output, const std::vector<struct plot> &plots) {
+		// TODO: Finish later
+		/* FILE *gnuplot = open_gnuplot(); */
+		/* fprintf(gnuplot, */ 
+		/* 	"set terminal pngcairo enhanced size 800,600 dpi 300\n" */
+		/* 	"set output '%s'\n" */
+		/* 	, output.data() */
+		/* ); */
+	}
+};
+
+} // namespace prak
diff --git a/libprakipp/include/tests.cpp b/libprakipp/include/tests.cpp
new file mode 100644
index 0000000..af61860
--- /dev/null
+++ b/libprakipp/include/tests.cpp
@@ -0,0 +1,31 @@
+#include "prakmath.hpp"
+#include "prakmatrix.hpp"
+
+int main() {
+	using namespace prak;
+	matrix<double> A(2, 1, {-1, 1});
+	matrix<double> B(1, 2, {1, -2});
+	matrix<double> C = A*B * (1.0/1.0);
+	std::cout << A << '\n' << B << '\n' << C << std::endl;
+	matrix<double> D = C + matrix<double>(2, 2, {-1, -2, -3, -4});
+	std::getchar();
+	D.feq_precision = 10;
+	matrix<double> A_1 = D.inv().value();
+	std::cout << A_1 << std::endl;
+	std::cout << A_1*D << D*A_1 << std::endl;
+	matrix<double> E(2, 3, {1, 2, 3, 4, 5, 6});
+	std::cout << E << '\n' << E.tr() << std::endl;
+	std::vector<double> xs = {0, 1, 2, 3, 4, 5, 6, 7};
+	std::vector<double> ys = {1, 2.9, 5.2, 7.1, 9, 11.05, 12.7, 14};
+	/* std::vector<double> ss = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}; */
+	std::vector<double> ss = {0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1};
+	/* std::vector<double> xs = {0, 1, 2}; */
+	/* std::vector<double> ys = {1, 2.9, 5.2}; */
+	/* std::vector<double> ss = {0.1, 0.2, 0.3}; */
+	polynomial_regression(2, xs, ys, std::make_optional(ss));
+
+	matrix<double> T1 (3, 3, {1, 2, 3, 0, 1, 2, 0, 0, 1});
+	matrix<double> T2 (3, 3, {4, 5, 6, 0, 4, 5, 0, 0, 6});
+	std::cout << T1 << '\n' << T2 << '\n' << T1 * T1 << std::endl; 
+
+}
diff --git a/libprakipp/src/prakmath.cpp b/libprakipp/src/prakmath.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/libprakipp/src/prakmath.cpp
diff --git a/libprakipp/test_data b/libprakipp/test_data
new file mode 100644
index 0000000..a68190d
--- /dev/null
+++ b/libprakipp/test_data
@@ -0,0 +1,21 @@
+index steel_n_up steel_n_down steel_n copper_n_up copper_n_down copper_n
+0 69.8048106744458 69.8048106744458 ? 0 0 ?
+1 59.538400172313104 59.538400172313104 ? 0 0 ?
+2 49.77235665705151 49.77235665705151 ? 0 0 ?
+3 39.003794962183754 39.003794962183754 ? 0 0 ?
+4 29.41454165597367 29.41454165597367 ? 0 0 ?
+5 19.85284784036345 19.85284784036345 ? 0 0 ?
+6 9.621038299065232 9.621038299065232 ? 0 0 ?
+7 -0.9475724262480807 -0.9475724262480807 ? 0 0 ?
+8 -10.349127476774775 -10.349127476774775 ? 0 0 ?
+9 -20.602089970343936 -20.602089970343936 ? 0 0 ?
+10 -30.69633194493392 -30.69633194493392 ? 0 0 ?
+11 -40.16111422807451 -40.16111422807451 ? 0 0 ?
+12 -50.86998825863326 -50.86998825863326 ? 0 0 ?
+13 -60.712258771120425 -60.712258771120425 ? 0 0 ?
+14 -70.75473619559727 -70.75473619559727 ? 0 0 ?
+15 -80.96959205323306 -80.96959205323306 ? 0 0 ?
+16 -90.85022462488773 -90.85022462488773 ? 0 0 ?
+17 -100.67744328024175 -100.67744328024175 ? 0 0 ?
+18 -110.14378705377496 -110.14378705377496 ? 0 0 ?
+19 -120.3377964523638 -120.3377964523638 ? 0 0 ?
diff --git a/libprakipp/tests.cpp b/libprakipp/tests.cpp
new file mode 100644
index 0000000..3dda25a
--- /dev/null
+++ b/libprakipp/tests.cpp
@@ -0,0 +1,66 @@
+
+#include <iostream>
+#include <vector>
+
+#include "praktable.hpp"
+#include "prakmath.hpp"
+
+double avg(const std::vector<double> &v) {
+	return (v[0] + v[1]) / 2 ;
+}
+
+int main_1(int argc, char *argv[]) {
+	prak::table<double> t;
+	std::ifstream f("test_data");
+	t.read(f);
+	t.column_width = 16;
+	std::cout << t;
+	t.apply(&avg, {"steel_n_up", "steel_n_down"}, "steel_n");
+	t.apply(&avg, {"copper_n_up", "copper_n_down"}, "copper_n");
+	std::cout << t;
+	std::cout << "a[\"copper_n\"][5] = " << t SUBSCR_OPN "copper_n", 0 SUBSCR_CLS << std::endl;
+	for (auto it = t.begin("steel_n"); it != t.end("steel_n"); ++it) {
+		std::cout << *it << ' ';
+	}
+	auto [a, b] = t.least_squares_linear("index", "steel_n");
+	std::printf("\na = %lf %lf; b = %lf %lf\n", a.val, a.err, b.val, b.err);
+
+	return 0;
+}
+
+int main_2(int argc, char *argv[]) {
+	main_1(argc, argv);
+	return 0;
+
+	prak::vector<double> x = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+	prak::vector<double> y = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+	prak::vector<double> s = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+	struct prak::pvalue<double> val;
+	prak::least_squares_linear(x, y, s, val, val);
+	prak::printv(x);
+	prak::vmul(x.data(), x.data(), x.data(), (size_t)x.size());
+	prak::printv(x);
+	return 0;
+}
+
+int main_3(int argc, char *argv[]) {
+	volatile __m256 i = {10, 1, 10, 1, 10, 10, 11, 11};
+	volatile __m256d j = {100, 200, -100, 400};
+	printf("sum(i) = %f\n", prak::finalize(i));
+	printf("sum(j) = %lf\n", prak::finalize(j));
+	return 0;
+}
+
+int main_4(int argc, char *argv[]) {
+	prak::vector<double> xs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+	prak::vector<double> ys = {0, 2, 2, 8, 8, 10, 13, 15, 16, 18, 20};
+	prak::vector<double> ss = {1, 1, 4, 4, 1, 1, 2, 2, 1, 1, 1};
+	prak::pvalue<double> a, b;
+	prak::least_squares_linear(xs, ys, ss, a, b);
+	std::printf("a: {%lf, %lf}; b: {%lf, %lf}\n", a.val, a.err, b.val, b.err);
+	return 0;
+}
+
+int main(int argc, char *argv[]) {
+	main_1(argc, argv);
+}