1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
|
#pragma once
#include <cassert>
#include <cstddef>
#include <functional>
#include <algorithm>
#include <optional>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <ostream>
#include <string>
#include <vector>
#include <cmath>
#include "prakcommon.hpp"
#include "prakmath.hpp"
namespace prak {
/// truncate a string to n characters, or return in unmodified
inline std::string truncate_or(const std::string &str, size_t n) {
if (str.size() >= n) return str.substr(0, n);
return str;
}
/// Unused for now.
/// TODO: Remove
template <typename T>
struct opt_value {
enum struct t: unsigned char {
val = 0,
up = 1 << 0,
left = 1 << 1,
down = 1 << 2,
right = 1 << 3,
} tag;
T val;
explicit opt_value(const T &v): tag{t::val}, val{v} {}
explicit opt_value(const opt_value::t t): tag{t} {}
opt_value &operator=(const T &v) {
val = v;
tag = t::val;
return *this;
}
opt_value &operator=(const opt_value::t t) {
tag = t;
return *this;
}
};
/// Class that can store, print, and apply operations to a table used by lab works
template <typename dtype>
class table {
std::vector<dtype> data;
using function_type = function_t<dtype>;
using stringvec = std::vector<std::string>;
size_t index(const std::string &str) const {
return std::distance(names.cbegin(), std::find(names.cbegin(), names.cend(), str));
}
FILE* open_gnuplot() {
#if defined(_POSIX_VERSION) || defined(__unix)
return popen("gnuplot", "w");
#else
#warning Not implemented for non-unix operating systems
return NULL;
#endif
}
public:
size_t rows = 0, columns = 0;
friend class iterator;
struct iterator {
table *parent;
size_t columns;
size_t col_index = 0, data_index = 0;
using iterator_category = std::forward_iterator_tag;
using value_type = dtype;
using difference_type = ptrdiff_t;
using pointer = dtype *;
using reference = dtype &;
iterator(table *new_parent, const std::string &column, size_t row_idx = 0)
: parent{new_parent}, columns(new_parent->columns)
{
col_index = parent->index(column);
data_index = col_index + row_idx * new_parent->columns;
}
iterator &operator++() { data_index += columns; return *this; }
iterator operator++(int) { iterator ret = *this; ++(*this); return ret; }
bool operator==(iterator other) { return data_index == other.data_index && parent == other.parent && col_index == other.col_index; }
bool operator!=(iterator other) { return data_index != other.data_index || parent != other.parent || col_index != other.col_index; }
value_type &operator*() { return parent->data[data_index]; };
};
/// Optional rownames: names of rows
std::vector<std::string> opt_rownames;
/// Mandatory columnnames: names of columns
std::vector<std::string> names;
/// width used for printing, defaults to 8
size_t column_width = 12;
/// default constructor
table() = default;
explicit table(const std::vector<std::string> &columns, size_t rows, const dtype &deflt)
: rows{rows}, columns{columns.size()}
{
names = columns;
data = std::vector<dtype>(rows * columns.size(), deflt);
}
/// Data: array of ararys, not freed automatically.
explicit table(const std::vector<std::string> &columns, dtype **data, size_t rows /*size_t columns = strings.size()*/)
: rows{rows}, columns{columns.size()}
{
names = columns;
data = std::vector<dtype>(rows * columns.size());
for (size_t i = 0; i < rows; ++i)
for (size_t j = 0; j < columns.size(); ++j)
data[i * columns.size() + j] = data[i][j];
}
/// Strings: names for columns
/// `new_data` format: { { entry1_a, entry2_a, ...} { entry1_b, entry2_b, ... }, ... }
/// where `a`, `b`, ... are columns
explicit table(std::vector<std::string> &&strings, std::vector<std::vector<dtype>> &&new_data)
: rows{new_data.size() ? new_data[0].size() : 0}, columns{strings.size()}
{
names = strings;
data = std::vector<dtype>(new_data.size() * rows);
auto dit = data.begin();
for (size_t j = 0; j < rows; ++j) {
for (size_t i = 0; i < new_data.size(); ++i) {
*dit = new_data[i][j];
++dit;
}
}
}
explicit table(const std::string &file) {
read(file);
}
iterator begin(std::string column) { return iterator(this, column); }
iterator end(std::string column) { return iterator(this, column, rows); }
dtype & SUBSCR_OPRTR (const std::string &column, size_t row) noexcept(false) {
size_t i = index(column);
if (i == columns) throw std::out_of_range("Column " + column + " does not exist");
return data.at(names.size() * row + index(column));
}
dtype & SUBSCR_OPRTR (size_t column, size_t row) {
return data.at(names.size() * row + column);
}
/// prints a table. defaults to using std::cout, but any std::ostream can be passed in it.
void print(std::ostream &stream = std::cout) const {
stream << "columns: " << columns << ", rows: " << rows << std::endl;
bool print_names = opt_rownames.size() == data.size() / columns;
size_t topchars = (column_width + 1) * (columns + print_names) + 1;
std::string rowsep(column_width, '_');
for (size_t i = 0; i < columns + print_names - 1; ++i) {
rowsep.append(1, '|');
rowsep.append(column_width, '_');
}
stream << std::left << std::string(topchars, '_') << std::endl;
if (print_names) stream << '|' << std::setw(column_width) << ' ';
for (const auto &s : names)
stream << '|' << std::setw(column_width) << truncate_or(s, column_width-1);
for (size_t i = 0; auto x : data) {
if (i % columns == 0) {
stream << '|' << std::endl;
stream << '|' << rowsep << '|' << std::endl;
if (print_names)
stream << '|' << std::setw(column_width) << truncate_or(opt_rownames[i / columns], column_width);
}
stream << '|' << std::setw(column_width) << x;
++i;
}
stream << '|' << std::endl << '|' << rowsep << '|' << std::endl;
}
/// Returns whether the amount of names is correct
/// If it is incorrect, names won't be displayed during printing
bool set_rownames(std::vector<std::string> &&names) {
opt_rownames = names;
return opt_rownames.size() == data.size() / names.size();
}
/// apply a function to several columns and store result in another column
/// function must accept std::vector or arguments
table &apply(function_type function, stringvec args, std::optional<std::string> result) {
size_t result_index = result.has_value() ? index(*result) : 0;
for (size_t i = 0; i < rows; ++i) {
std::vector<dtype> v(args.size());
for (size_t j = 0; j < args.size(); ++j)
v[j] = SUBSCR_OPRTR(args[j], i);
if (result.has_value()) data[columns * i + result_index] = function(v);
else (void)function(v);
}
/* print(std::cerr); */
return *this;
}
/// adds a column with name `name` and data `column_data`
void add_column(std::string name, std::vector<dtype> column_data) {
if (column_data.size() == 0) column_data = std::vector<dtype>(rows, dtype{});
std::vector<dtype> data_new(rows * (++columns));
for (size_t row = 0; row < rows; ++row) {
for (size_t column = 0; column < names.size(); ++column) // columns variable is incremented already
data_new[row * columns + column] = data[row * names.size() + column];
data_new[(row + 1) * columns - 1] = column_data[row];
}
data = std::move(data_new);
names.push_back(name);
}
/// Appends a column to the table. if name is set, appends it to `opt_rownames`
void add_row(std::vector<dtype> values, std::optional<std::string> name = std::nullopt) {
if (values.size() == 0) values = std::vector<dtype>(columns, dtype{});
data.resize(columns * (++rows));
std::copy_n(values.cbegin(), columns, data.end() - columns);
if (name.has_value()) opt_rownames.push_back(*name);
}
friend std::ostream& operator<<(std::ostream &os, table<dtype> &t) {
t.print(os);
return os;
}
/// Reads a table from a file in a format:
/// ```
/// col1 col2 col3 ...
/// val1 val2 val3 ...
/// val4 val5 val6 ...
/// ...
/// ```
/// Note tha `val` may either be a real number or a question mark, denoting that the value is unknown
/// `col` may be any string without whitespaeces.
/// if the first column is named "__name__" (as in python), first val in each row is a string used as
/// a row name.
void read(std::ifstream& f) {
std::string header;
std::getline(f >> std::ws, header);
std::istringstream h_stream(header);
std::string buffer;
bool read_names = false;
h_stream >> buffer;
if (buffer == "__name__") read_names = true;
else names.push_back(buffer);
for (size_t i = read_names ? 0 : 1; h_stream >> buffer; ++i)
names.push_back(buffer);
std::vector<dtype> tmp_row(names.size());
int __i = 0;
while (!(f >> std::ws).eof()) {
++__i;
if (read_names) {
f >> buffer;
opt_rownames.push_back(buffer);
}
for (auto it = tmp_row.begin(); it != tmp_row.end(); ++it) {
if ((f >> std::ws).peek() == '?') {
*it = NAN;
f >> buffer;
}
else f >> *it;
}
data.resize(data.size() + names.size());
std::copy_n(tmp_row.cbegin(), names.size(), data.end() - names.size());
++rows;
}
columns = names.size();
}
/// Reads a table from a file specified by `path`.
/// For details, refer to documentation of `void read(std::ifstream&)` overload
void read(const std::string &path) {
std::ifstream f(path);
read(f);
}
/// Fills a specified column with the same value `v`
void fill_column(const std::string &column, dtype v) {
apply([&v](const std::vector<dtype>& _) -> dtype { return v; }, {}, column);
}
/// returns an std::pair with coefficients A and B in that order
std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>>
least_squares_linear(std::string x, std::string y, std::optional<std::string> sigma, std::optional<dtype> sigma_fixed)
noexcept(false) {
if (sigma.has_value() == sigma_fixed.has_value())
throw std::invalid_argument("sigma and sigma_fixed can't both have (no) value");
prak::vector<dtype> _x(rows);
prak::vector<dtype> _y(rows);
prak::vector<dtype> _s(rows);
std::copy(begin(x), end(x), _x.begin());
std::copy(begin(y), end(y), _y.begin());
if (sigma.has_value()) std::copy(begin(*sigma), end(*sigma), _s.begin());
else _s = prak::vector<dtype>(rows, static_cast<dtype>(*sigma_fixed));
std::pair<prak::pvalue<dtype>, prak::pvalue<dtype>> ret;
prak::least_squares_linear<dtype>(_x, _y, _s, ret.first, ret.second);
return ret;
}
/// calculate an average of the column
dtype col_avg(const std::string &column) {
dtype accum = dtype{};
for (auto it = begin(column); it != end(column); ++it)
accum += *it;
return accum / rows;
}
/// calculate standard deviation of the column
dtype col_stddev(const std::string &column) {
assert(0);
}
/// Serialize data in format `data[args[0]][i] data[args[1]][i] data[args[2]][i]...`
void print_plot(const stringvec &args, std::ostream &out = std::cout) const {
std::vector<size_t> offsets(args.size());
for (size_t i = 0; i < args.size(); ++i) {
offsets[i] = index(args[i]);
}
for (size_t row = 0; row < rows; ++row) {
size_t row_offset = columns * row;
for (const auto column_offset : offsets)
std::cout << data[row_offset + column_offset] << ' ';
std::cout << std::endl;
}
}
/// Serialize data in format `data[xs][i] data[ys][i] <data[ss][i]>`, readable by gnuplot with yerrorbars
void write_plot(const std::string &xs, const std::string &ys, std::optional<std::string> yss = std::nullopt, std::ostream &out = std::cout) const {
size_t nosigma = std::numeric_limits<size_t>::max();
size_t xsi = index(xs), ysi = index(ys), ssi = nosigma;
if (yss.has_value()) ssi = index(*yss);
for (size_t row = 0; row < rows; ++row) {
size_t offset = columns * row;
out << data.at(offset + xsi) << ' ' << data.at(offset + ysi);
if (ssi != nosigma) out << ' ' << data[offset+ssi];
out << std::endl;
}
}
/// Serialize data into a file `file`. For details, refer to documentation for overload with std::ifstream as an argument
void write_plot(const std::string &file, const std::string &xs, const std::string &ys, std::optional<std::string> yss = std::nullopt) const {
std::ofstream out(file);
write_plot(xs, ys, yss, out);
}
void plot_png(
const std::string output_filename,
const std::string &x,
const std::string &y,
const std::optional<std::string> &xlabel = std::nullopt,
const std::optional<std::string> &ylabel = std::nullopt,
const std::optional<std::string> &sigma = std::nullopt) {
// TODO: Finish
}
struct plot {
std::string x;
std::string y;
std::optional<std::string> sigma;
bool plot_points = true;
std::optional<std::string> label;
};
void plot_png(const std::string output, const std::vector<struct plot> &plots) {
// TODO: Finish later
/* FILE *gnuplot = open_gnuplot(); */
/* fprintf(gnuplot, */
/* "set terminal pngcairo enhanced size 800,600 dpi 300\n" */
/* "set output '%s'\n" */
/* , output.data() */
/* ); */
}
};
} // namespace prak
|