#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include "prakcommon.hpp" #include "prakmath.hpp" namespace prak { // truncate a string to n characters, or return in unmodified inline std::string truncate_or(const std::string &str, size_t n) { if (str.size() >= n) return str.substr(0, n); return str; } // Unused for now. // TODO: Remove template struct opt_value { enum struct t: unsigned char { val = 0, up = 1 << 0, left = 1 << 1, down = 1 << 2, right = 1 << 3, } tag; T val; explicit opt_value(const T &v): tag{t::val}, val{v} {} explicit opt_value(const opt_value::t t): tag{t} {} opt_value &operator=(const T &v) { val = v; tag = t::val; return *this; } opt_value &operator=(const opt_value::t t) { tag = t; return *this; } }; // Class that can store, print, and apply operations to a table used by lab works template class table { std::vector data; using function_type = function_t; using stringvec = std::vector; size_t index(const std::string &str) const noexcept(false) { auto ret = std::distance(names.cbegin(), std::find(names.cbegin(), names.cend(), str)); if (ret == columns) throw std::out_of_range("Column " + str + " does not exist"); return ret; } FILE* open_gnuplot() { #if defined(_POSIX_VERSION) || defined(__unix) return popen("gnuplot", "w"); #else #warning Not implemented for non-unix operating systems return NULL; #endif } public: size_t rows = 0, columns = 0; friend class iterator; struct iterator { table *parent; size_t columns; size_t col_index = 0, data_index = 0; using iterator_category = std::bidirectional_iterator_tag; using value_type = dtype; using difference_type = ptrdiff_t; using pointer = dtype *; using reference = dtype &; iterator() = default; iterator(const iterator& other) = default; iterator& operator=(const iterator& other) = default; iterator(table *new_parent, const std::string &column, size_t row_idx = 0) : parent{new_parent}, columns(new_parent->columns) { col_index = parent->index(column); data_index = col_index + row_idx * new_parent->columns; } iterator &operator++() { data_index += columns; return *this; } iterator &operator--() { data_index -= columns; return *this; } iterator operator++(int) { iterator ret = *this; ++(*this); return ret; } iterator operator--(int) { iterator ret = *this; --(*this); return ret; } iterator &operator+(int x) { data_index += columns * x; return *this; } iterator &operator-(int x) { data_index -= columns * x; return *this; } std::strong_ordering operator<=>(const iterator& other) const { return parent == other.parent && col_index == other.col_index && data_index <=> other.data_index; } bool operator==(const iterator& other) const { return parent == other.parent && col_index == other.col_index && data_index == other.data_index; } reference operator*() { return parent->data[data_index]; }; }; struct const_iterator { const table *parent; size_t columns; size_t col_index = 0, data_index = 0; using iterator_category = std::bidirectional_iterator_tag; using value_type = dtype; using difference_type = ptrdiff_t; using pointer = const dtype *; using reference = const dtype &; const_iterator() = default; const_iterator(const const_iterator& other) = default; const_iterator& operator=(const const_iterator& other) = default; const_iterator(const table *new_parent, const std::string &column, size_t row_idx = 0) : parent{new_parent}, columns(new_parent->columns) { col_index = parent->index(column); data_index = col_index + row_idx * new_parent->columns; } const_iterator &operator++() { data_index += columns; return *this; } const_iterator &operator--() { data_index -= columns; return *this; } const_iterator operator++(int) { const_iterator ret = *this; ++(*this); return ret; } const_iterator operator--(int) { const_iterator ret = *this; --(*this); return ret; } const_iterator &operator+(int x) { data_index += columns * x; return *this; } const_iterator &operator-(int x) { data_index -= columns * x; return *this; } std::strong_ordering operator<=>(const const_iterator& other) const { return parent == other.parent && col_index == other.col_index && data_index <=> other.data_index; } bool operator==(const const_iterator& other) const { return parent == other.parent && col_index == other.col_index && data_index == other.data_index; } reference operator*() const { return parent->data[data_index]; }; }; // Optional rownames: names of rows std::vector opt_rownames; // Mandatory columnnames: names of columns std::vector names; // width used for printing, defaults to 8 size_t column_width = 12; // default constructor table() = default; table(const std::vector &cols, std::vector iters, size_t rows) : rows{rows}, columns{cols.size()} { names = cols; data = std::vector(rows * cols.size()); for (size_t row = 0; row < rows; ++row) { for (size_t col = 0; col < columns; ++col) { data[row * columns + col] = *iters[col]; ++iters[col]; } } } explicit table(const std::vector &columns, size_t rows, const dtype &deflt) : rows{rows}, columns{columns.size()} { names = columns; data = std::vector(rows * columns.size(), deflt); } // Data: array of ararys, not freed automatically. explicit table(const std::vector &columns, dtype **data, size_t rows /*size_t columns = strings.size()*/) : rows{rows}, columns{columns.size()} { names = columns; data = std::vector(rows * columns.size()); for (size_t i = 0; i < rows; ++i) for (size_t j = 0; j < columns.size(); ++j) data[i * columns.size() + j] = data[i][j]; } // Strings: names for columns // `new_data` format: { { entry1_a, entry2_a, ...} { entry1_b, entry2_b, ... }, ... } // where `a`, `b`, ... are columns explicit table(std::vector &&strings, std::vector> &&new_data) : rows{new_data.size() ? new_data[0].size() : 0}, columns{strings.size()} { names = strings; data = std::vector(new_data.size() * rows); auto dit = data.begin(); for (size_t j = 0; j < rows; ++j) { for (size_t i = 0; i < new_data.size(); ++i) { *dit = new_data[i][j]; ++dit; } } } explicit table(const std::string &file) { read(file); } iterator begin(std::string column) { return iterator(this, column); } const_iterator cbegin(std::string column) const { return const_iterator(this, column); } iterator end(std::string column) { return iterator(this, column, rows); } const_iterator cend(std::string column) const { return const_iterator(this, column, rows); } dtype & SUBSCR_OPRTR (const std::string &column, size_t row) noexcept(false) { size_t i = index(column); return data.at(names.size() * row + index(column)); } dtype & SUBSCR_OPRTR (size_t column, size_t row) { return data.at(names.size() * row + column); } const dtype & SUBSCR_OPRTR (const std::string &column, size_t row) const noexcept(false) { size_t i = index(column); return data.at(names.size() * row + index(column)); } const dtype & SUBSCR_OPRTR (size_t column, size_t row) const { return data.at(names.size() * row + column); } // prints a table. defaults to using std::cout, but any std::ostream can be passed in it. void print(std::ostream &stream = std::cout) const { stream << "columns: " << columns << ", rows: " << rows << std::endl; bool print_names = opt_rownames.size() == data.size() / columns; size_t topchars = (column_width + 1) * (columns + print_names) + 1; std::string rowsep(column_width, '_'); for (size_t i = 0; i < columns + print_names - 1; ++i) { rowsep.append(1, '|'); rowsep.append(column_width, '_'); } stream << std::left << std::string(topchars, '_') << std::endl; if (print_names) stream << '|' << std::setw(column_width) << ' '; for (const auto &s : names) stream << '|' << std::setw(column_width) << truncate_or(s, column_width-1); for (size_t i = 0; auto x : data) { if (i % columns == 0) { stream << '|' << std::endl; stream << '|' << rowsep << '|' << std::endl; if (print_names) stream << '|' << std::setw(column_width) << truncate_or(opt_rownames[i / columns], column_width); } stream << '|' << std::setw(column_width) << x; ++i; } stream << '|' << std::endl << '|' << rowsep << '|' << std::endl; } // Returns whether the amount of names is correct // If it is incorrect, names won't be displayed during printing bool set_rownames(std::vector &&names) { opt_rownames = names; return opt_rownames.size() == data.size() / names.size(); } // apply a function to several columns and store result in another column // function must accept std::vector or arguments table &apply(function_type function, std::initializer_list _args, std::optional result) { std::vector args = _args; size_t result_index = result.has_value() ? index(*result) : 0; for (size_t i = 0; i < rows; ++i) { std::vector v(args.size()); for (size_t j = 0; j < args.size(); ++j) v[j] = SUBSCR_OPRTR(args[j], i); if (result.has_value()) data[columns * i + result_index] = function(v); else (void)function(v); } return *this; } table &apply(function_type function, stringvec args, std::optional result) { size_t result_index = result.has_value() ? index(*result) : 0; for (size_t i = 0; i < rows; ++i) { std::vector v(args.size()); for (size_t j = 0; j < args.size(); ++j) v[j] = SUBSCR_OPRTR(args[j], i); if (result.has_value()) data[columns * i + result_index] = function(v); else (void)function(v); } return *this; } table& apply(function_type function, const std::string& arg, std::optional result) { size_t result_index = result.has_value() ? index(*result) : 0; for (size_t i = 0; i < rows; ++i) { const std::vector v(1, SUBSCR_OPRTR(arg, i)); if (result.has_value()) data[columns * i + result_index] = function(v); else (void)function(v); } return *this; } table &apply_n(function_type function, std::vector cols, size_t n, std::optional result) { size_t result_index = result.has_value() ? index(*result) : 0; for (size_t i = 0; i < n; ++i) { std::vector v(cols.size()); for (size_t j = 0; j < cols.size(); ++j) v[j] = *cols[j]++; if (result.has_value()) data[columns * i + result_index] = function(v); else (void)function(v); } return *this; } table &apply_function( function_type function, const std::vector cols, const std::vector sgms, const std::string& res, const std::string& ress) { std::vector __cols(cols.size()), __sgms(sgms.size()); for (size_t i = 0; i < __cols.size() && i < __sgms.size(); ++i) { __cols[i] = begin(cols[i]); __sgms[i] = begin(sgms[i]); } return apply_function_n(function, __cols, __sgms, rows, res, ress); } table &apply_function_n( function_type function, std::vector cols, std::vector sigma_cols, size_t n, const std::string &resval, const std::string &ressigma) { if (cols.size() != sigma_cols.size()) throw dimension_error("cols.size() is not equal to sigma_cols.size()"); size_t val_index = index(resval), sgm_index = index(ressigma); for (size_t i = 0; i < n; ++i) { std::vector v(cols.size()); std::vector s(cols.size()); for (size_t j = 0; j < cols.size(); ++j) { v[j] = *cols[j]++; s[j] = *sigma_cols[j]++; } data[columns * i + val_index] = function(v); data[columns * i + sgm_index] = sigma(function, v, s); } return *this; } // adds a column with name `name` and data `column_data` table &add_column(const std::string &name, std::vector column_data) { if (column_data.size() == 0) column_data = std::vector(rows, dtype{}); std::vector data_new(rows * (++columns)); for (size_t row = 0; row < rows; ++row) { for (size_t column = 0; column < names.size(); ++column) // columns variable is incremented already data_new[row * columns + column] = data[row * names.size() + column]; data_new[(row + 1) * columns - 1] = column_data[row]; } data = std::move(data_new); names.push_back(name); return *this; } table &add_column(const std::string &name, dtype dflt = dtype{}) { return add_column(name, std::vector(rows, dflt)); } table &add_columns(const std::vector &cols, dtype dflt = dtype{}) { for (const auto &str : cols) add_column(str, dflt); return *this; } // Deletes a column from a table. table &delete_col(const std::string &colname) { std::vector data_new(rows * (--columns)); size_t idx = index(colname); for (size_t column = 0; column < names.size(); ++column) { if (column == idx) continue; size_t _col = column - (column > idx); for (size_t row = 0; row < rows; ++row) data_new[row * columns + _col] = data[row * names.size() + column]; } data = std::move(data_new); names.erase(names.begin() + idx); return *this; } // Deletes several columns table &delete_cols(const stringvec &cols) noexcept(false) { size_t columns_new = columns - cols.size(); std::vector data_new(rows * columns_new); std::set idxs; for (const std::string &col : cols) idxs.insert(index(col)); size_t skipped = 0; for (size_t column = 0; column < columns; ++column) { if (idxs.count(column) != 0) { ++skipped; continue; } size_t _col = column - skipped; for (size_t row = 0; row < rows; ++row) data_new[row * columns_new + _col] = data[row * columns + column]; } stringvec names_new = stringvec(columns_new); for (size_t i = 0; const std::string &name : names) if (idxs.count(index(name)) == 0) names_new[i++] = name; names = std::move(names_new); data = std::move(data_new); columns = columns_new; return *this; } // Appends a row to the table. if name is set, appends it to `opt_rownames` table& add_row(std::vector values, std::optional name = std::nullopt) { if (values.size() == 0) values = std::vector(columns, dtype{}); data.resize(columns * (++rows)); std::copy_n(values.cbegin(), columns, data.end() - columns); if (name.has_value()) opt_rownames.push_back(*name); return *this; } friend std::ostream& operator<<(std::ostream &os, table &t) { t.print(os); return os; } // Reads a table from a file in a format: // ``` // col1 col2 col3 ... // val1 val2 val3 ... // val4 val5 val6 ... // ... // ``` // Note tha `val` may either be a real number or a question mark, denoting that the value is unknown // `col` may be any string without whitespaeces. // if the first column is named "__name__" (as in python), first val in each row is a string used as // a row name. void read(std::ifstream& f) { std::string header; std::getline(f >> std::ws, header); std::istringstream h_stream(header); std::string buffer; bool read_names = false; h_stream >> buffer; if (buffer == "__name__") read_names = true; else names.push_back(buffer); for (size_t i = read_names ? 0 : 1; h_stream >> buffer; ++i) names.push_back(buffer); std::vector tmp_row(names.size()); int __i = 0; while (!(f >> std::ws).eof()) { ++__i; if (read_names) { f >> buffer; opt_rownames.push_back(buffer); } for (auto it = tmp_row.begin(); it != tmp_row.end(); ++it) { if ((f >> std::ws).peek() == '?') { *it = NAN; f >> buffer; } else f >> *it; } data.resize(data.size() + names.size()); std::copy_n(tmp_row.cbegin(), names.size(), data.end() - names.size()); ++rows; } columns = names.size(); } // Reads a table from a file specified by `path`. // For details, refer to documentation of `void read(std::ifstream&)` overload void read(const std::string &path) { std::ifstream f(path); read(f); } // Fills a specified column with the same value `v` table &fill_column(const std::string &column, dtype v) { apply([&v](const std::vector& _) -> dtype { return v; }, std::vector{}, column); return *this; } table ©_column(const std::string &src, const std::string dest) { size_t si = index(src), di = index(dest); for (size_t i = 0; i < rows; ++i) data[i*columns + di] = data[i*columns + si]; return *this; } template table &multiply_column(const std::string &column, mult s) { size_t i; for (i = index(column); i < rows * columns; i += columns) data[i] *= s; return *this; } // returns an std::pair with coefficients A and B in that order std::pair, prak::pvalue> least_squares_linear(std::string x, std::string y, std::optional sigma, std::optional sigma_fixed) const noexcept(false) { if (sigma.has_value() == sigma_fixed.has_value()) throw std::invalid_argument("sigma and sigma_fixed can't both have (no) value"); prak::vector _x(rows); prak::vector _y(rows); prak::vector _s(rows); std::copy(cbegin(x), cend(x), _x.begin()); std::copy(cbegin(y), cend(y), _y.begin()); if (sigma.has_value()) std::copy(cbegin(*sigma), cend(*sigma), _s.begin()); else _s = prak::vector(rows, static_cast(*sigma_fixed)); std::pair, prak::pvalue> ret; prak::least_squares_linear(_x, _y, _s, ret.first, ret.second); return ret; } prak::pvalue least_squares_prop(std::string x, std::string y, std::optional sigma, std::optional sigma_fixed) const { if (sigma.has_value() == sigma_fixed.has_value()) throw std::invalid_argument("sigma and sigma_fixed can't both have (no) value"); prak::vector _x(rows); prak::vector _y(rows); prak::vector _s(rows); std::copy(cbegin(x), cend(x), _x.begin()); std::copy(cbegin(y), cend(y), _y.begin()); if (sigma.has_value()) std::copy(cbegin(*sigma), cend(*sigma), _s.begin()); else _s = prak::vector(rows, static_cast(*sigma_fixed)); prak::pvalue ret; prak::least_squares_prop(_x, _y, _s, ret); return ret; } // calculate an average of the column dtype col_avg(const std::string &column) const { dtype accum = dtype{}; for (auto it = cbegin(column); it != cend(column); ++it) accum += *it; return accum / rows; } dtype col_max(const std::string &column) const { dtype max = dtype{}; for (auto it = cbegin(column); it != cend(column); ++it) max = max < *it ? *it : max; return max; } dtype col_min(const std::string &column) const { dtype min = dtype{}; for (auto it = cbegin(column); it != cend(column); ++it) min = min > *it ? *it : min; return min; } // calculate standard deviation of the column dtype col_stddev(const std::string &column) const { dtype accum = dtype{}; dtype avg = col_avg(column); for (auto it = cbegin(column); it != cend(column); ++it) accum += (*it - avg)*(*it - avg); return std::sqrt(accum / rows); } // takes columns [columns], calculates average and standard deviation for each row, puts them into `avg` and `stddev` and deleted original columns // if create_columns is true, creates columns avg and stddev // This is common thing to do, so might as well write a function for that table& into_avgstddev(const std::vector &columns, const std::string &avg_out, const std::string &stddev_out, bool create_columns = false) { if (create_columns) { add_column(avg_out, std::vector(rows, dtype{})); add_column(stddev_out, std::vector(rows, dtype{})); } apply(avg, columns, avg_out); apply(stddev, columns, stddev_out); delete_cols(columns); return *this; } // applies a function `func` to arguments in columns `args`, stores the result in column `result` and standard error in column `result_sigma`. // `sigmas` must be in a 1-to-1 correspondance with `args` table& apply_with_err(function_t func, const stringvec &args, const stringvec &sigmas, const std::string &result, const std::string result_sigma) { if (args.size() != sigmas.size()) throw dimension_error("Args and Sigmas did not have the same dimentinons"); size_t result_index = index(result), sigma_index = index(result_sigma); for (size_t i = 0; i < rows; ++i) { std::vector __args(args.size()), __sgms(args.size()); for (size_t j = 0; j < args.size(); ++j) { __args[j] = SUBSCR_OPRTR(args[j], i); __sgms[j] = SUBSCR_OPRTR(sigmas[j], i); } data[columns * i + result_index] = func(__args); data[columns * i + sigma_index] = prak::sigma(func, __args, __sgms); } return *this; } size_t find_index(const std::string &column, const dtype &val) { size_t col_idx = index(column); for (size_t i = 0; i < rows; ++i) { if (data[col_idx + i * columns] == val) return i; } return col_idx; } // Serialize data in format `data[args[0]][i] data[args[1]][i] data[args[2]][i]...` void print_plot(const stringvec &args, std::ostream &out = std::cout) const { std::vector offsets(args.size()); for (size_t i = 0; i < args.size(); ++i) { offsets[i] = index(args[i]); } for (size_t row = 0; row < rows; ++row) { size_t row_offset = columns * row; for (const auto column_offset : offsets) std::cout << data[row_offset + column_offset] << ' '; std::cout << std::endl; } } // Serialize data in format `data[xs][i] data[ys][i] [yss][i]>`, readable by gnuplot with xyerrorbars table &write_plot_4(const std::string &file, const std::string &xs, const std::string &ys, const std::string &xss, const std::string &yss) { std::ofstream out(file); size_t xi = index(xs), yi = index(ys), xsi = index(xss), ysi = index(yss); for (size_t row = 0; row < rows; ++row) { size_t offset = columns * row; out << data[offset + xi] << ' ' << data[offset + yi] << ' ' << data[offset + xsi] << ' ' << data[offset + ysi] << std::endl; } return *this; } // Serialize data in format `data[xs][i] data[ys][i] `, readable by gnuplot with yerrorbars table &write_plot(const std::string &xs, const std::string &ys, std::optional yss = std::nullopt, std::ostream &out = std::cout) { size_t nosigma = std::numeric_limits::max(); size_t xsi = index(xs), ysi = index(ys), ssi = nosigma; if (yss.has_value()) ssi = index(*yss); for (size_t row = 0; row < rows; ++row) { size_t offset = columns * row; out << data.at(offset + xsi) << ' ' << data.at(offset + ysi); if (ssi != nosigma) out << ' ' << data[offset+ssi]; out << std::endl; } return *this; } // Serialize data into a file `file`. For details, refer to documentation for overload with std::ifstream as an argument table &write_plot(const std::string &file, const std::string &xs, const std::string &ys, std::optional yss = std::nullopt) { std::ofstream out(file); return write_plot(xs, ys, yss, out); } void plot_png( const std::string output_filename, const std::string &x, const std::string &y, const std::optional &xlabel = std::nullopt, const std::optional &ylabel = std::nullopt, const std::optional &sigma = std::nullopt) { // TODO: Finish } struct plot { std::string x; std::string y; std::optional sigma; bool plot_points = true; std::optional label; }; void plot_png(const std::string output, const std::vector &plots) { // TODO: Finish later /* FILE *gnuplot = open_gnuplot(); */ /* fprintf(gnuplot, */ /* "set terminal pngcairo enhanced size 800,600 dpi 300\n" */ /* "set output '%s'\n" */ /* , output.data() */ /* ); */ } }; } // namespace prak