ztsdb
array.hpp
1 // (C) 2016,2017 Leonardo Silvestri
2 //
3 // This file is part of ztsdb.
4 //
5 // ztsdb is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // ztsdb is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with ztsdb. If not, see <http://www.gnu.org/licenses/>.
17 
18 
19 #ifndef ARRAY_H
20 #define ARRAY_H
21 
22 #include <vector>
23 #include <sstream>
24 #include <functional>
25 #include <algorithm>
26 #include <numeric>
27 #include <exception>
28 #include <iostream>
29 #include <set>
30 #include <utility>
31 #include <memory>
32 #include <sys/types.h>
33 #include "juice/variant.hpp"
34 #include "allocator_factory.hpp"
35 #include "vector.hpp"
36 #include "string.hpp"
37 #include "misc.hpp"
38 #include "globals.hpp"
39 #include "base_types.hpp"
40 #include "dname.hpp"
41 
42 
43 //#define DEBUG_COPY
44 
45 
46 using namespace std;
47 using namespace Juice;
48 using namespace std::string_literals;
49 
50 
52 namespace arr {
53 
54  // to disambiguate sequence constructors:
55  // call them _tag as in the STL LLL
56  struct seq_to_t { };
57  struct seq_n_t { };
58  constexpr seq_to_t seq_to { };
59  constexpr seq_n_t seq_n { };
60 
61  struct convert_cons_t { };
62  constexpr convert_cons_t convert_cons {};
63 
64  typedef uint64_t idx_type;
65 
66  // a few miscellaneous helper functions:
67 
70  template <typename V>
71  inline bool checksize(const V& v) { return true; }
72  template <typename V1, typename V2>
73  inline bool checksize(const V1& v1, const V2& v2) { return v1.size() == v2.size(); }
74  template <typename V1, typename V2, typename... U>
75  inline bool checksize(const V1& v1, const V2& v2, const U&... u) {
76  return checksize<V1,V2>(v1, v2) && checksize<V1, U...>(v1, u...);
77  }
78 
83  template <typename V>
84  inline bool checkdims(const V& v) { return true; }
85  template <typename V1, typename V2>
86  inline bool checkdims(const V1& v1, const V2& v2) {
87  return v1.ncols() == v2.ncols() && v1.size() == v2.size(); }
88  template <typename V1, typename V2, typename... U>
89  inline bool checkdims(const V1& v1, const V2& v2, const U&... u) {
90  return checkdims<V1,V2>(v1, v2) && checkdims<V1, U...>(v1, u...);
91  }
92 
94  void createDir(const zstring& dirname); // throws
95 
99  void buildNames(vector<unique_ptr<Dname>>& names,
100  const Vector<idx_type> dim,
101  const AllocFactory& allocf,
102  const vector<Vector<zstring>> names_p={Vector<zstring>()}); // throws
103 
104 
105  // --------------------------------------------------------------
108  template<typename T, typename O=std::less<T>>
109  struct Array {
110 
111  typedef T value_type;
112  typedef O comparator;
113  typedef Vector<T,O> vector_type;
114 
115  // constructors -----------------------------------------------
116 
118  template<typename U>
119  Array(seq_to_t, T t1, T t2, U by,
120  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
121  : allocf(std::move(allocf_p))
122  {
123  if (by <= getInitValue<U>()) {
124  throw std::out_of_range("'by' parameter must be >= 0");
125  }
126  auto n = static_cast<idx_type>((t1 < t2 ? t2 - t1 : t1 - t2) / by) + 1;
127  dim = Vector<idx_type>({n}, allocf->get("dim"));
128  v.emplace_back(make_unique<Vector<T,O>>(noinit_tag, n, allocf->get(0)));
129 
130  auto val = t1;
131  if (t2 < t1) {
132  for (idx_type i=0; i<n; ++i) {
133  setv_nocheck(*v[0], i, val);
134  val = val - by;
135  }
136  } else {
137  for (idx_type i=0; i<n; ++i) {
138  setv_nocheck(*v[0], i, val);
139  val = val + by;
140  }
141  }
142  v[0]->setOrdered(n <= 1 || O()(t1, t2));
143  buildNames(names, dim, *allocf);
144  }
145 
147  template<typename U>
148  Array(seq_n_t, T t1, U by, idx_type n,
149  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
150  : allocf(std::move(allocf_p))
151  {
152  dim = Vector<idx_type>({n}, allocf->get("dim"));
153  v.emplace_back(make_unique<Vector<T,O>>(rsv, n, allocf->get(0)));
154  v[0]->push_back(t1);
155  for (idx_type i=1; i<n; ++i) {
156  v[0]->push_back((*v[0])[i-1] + by);
157  }
158  buildNames(names, dim, *allocf);
159  }
160 
163  template<typename INIT_TYPE_TAG>
164  Array(INIT_TYPE_TAG init,
165  const Vector<idx_type> dim_p,
166  const vector<Vector<zstring>> names_p=vector<Vector<zstring>>(),
167  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
168  : dim(dim_p), allocf(std::move(allocf_p))
169  {
170  dim = Vector<idx_type>(dim_p, allocf->get("dim"));
171  // if (dim_p.size() == 0) {
172  // throw range_error("'dims' cannot be of length 0");
173  // }
174  if (dim.size()) { // only initialize if this is not the null array
175  auto ncols = accumulate(dim.begin()+1, dim.end(), 1.0, std::multiplies<idx_type>());
176  // check each name subvector x; it has to be either 0 or of the corresponding dim[x]
177  v.resize(ncols);
178  for (idx_type n=0; n<ncols; ++n) {
179  v[n] = make_unique<Vector<T,O>>(init, dim[0], allocf->get(n));
180  }
181  buildNames(names, dim_p, *allocf, names_p);
182  }
183  }
184 
185 
188  Array(std::unique_ptr<AllocFactory>&& allocf_p)
189  : allocf(std::move(allocf_p))
190  {
191  // grab the dim vector:
192  dim = Vector<idx_type>(allocf->get("dim"));
193  // grab the data vectors
194  auto ncols = dim.size() ?
195  accumulate(dim.begin()+1, dim.end(), 1.0, std::multiplies<idx_type>()) : 0;
196  for (idx_type j=0; j<ncols; ++j) {
197  v.emplace_back(make_unique<Vector<T,O>>(allocf->get(j)));
198  }
199  // grab the name vector:
200  for (idx_type j=0; j<dim.size(); ++j) {
201  names.emplace_back
202  (make_unique<Dname>(dim[j], Vector<zstring>(allocf->get("names" + std::to_string(j)))));
203  }
204  }
205 
206 
207  Array(const Vector<idx_type> dim_p,
208  const Array<T>& v_p,
209  const vector<Vector<zstring>> names_p=vector<Vector<zstring>>(),
210  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
211  : allocf(std::move(allocf_p))
212  {
213  dim = Vector<idx_type>(dim_p, allocf->get("dim"));
214  auto ncols = dim.size() ?
215  accumulate(dim.begin()+1, dim.end(), 1.0, std::multiplies<idx_type>()) : 0;
216  // data initialization from v_p:
217  if (v_p.size() == 0) {
218  for (idx_type n=0; n<ncols; ++n) {
219  v.emplace_back(make_unique<Vector<T,O>>(rsv, dim[0], allocf->get(n)));
220  }
221  }
222  else if (v_p.size() == 1) {
223  for (idx_type n=0; n<ncols; ++n) {
224  v.emplace_back(make_unique<Vector<T,O>>(dim[0], v_p[0], allocf->get(n)));
225  }
226  } else {
227  if (v_p.getdim(0) == dim[0] && ncols == v_p.v.size()) {
228  // same dimensions, we can take advantage of that:
229  for (idx_type n=0; n<ncols; ++n) {
230  v.emplace_back(make_unique<Vector<T,O>>(*v_p.v[n], allocf->get(n)));
231  }
232  }
233  else {
234  if (v_p.size() != ncols * dim[0]) {
235  throw range_error("data does not fit array dimensions");
236  }
237  // different dimensions, copy elements one by one:
238  idx_type idx=0;
239  for (idx_type n=0; n<ncols; ++n) {
240  v.emplace_back(make_unique<Vector<T,O>>(rsv, dim[0], allocf->get(n)));
241  for (idx_type i=0; i<dim[0]; ++i) {
242  v.back()->push_back(v_p[idx++]);
243  }
244  }
245  }
246  }
247  buildNames(names, dim_p, *allocf, names_p);
248  }
249 
254  Array(const Vector<idx_type> dim_p,
255  const Vector<T,O>& v_p,
256  const vector<Vector<zstring>> names_p=vector<Vector<zstring>>(),
257  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
258  : allocf(std::move(allocf_p))
259  {
260  // if (dim_p.size() == 0) {
261  // throw range_error("'dims' cannot be of length 0");
262  // }
263  dim = Vector<idx_type>(dim_p, allocf->get("dim"));
264  auto ncols = dim.size() ?
265  accumulate(dim.begin()+1, dim.end(), 1.0, std::multiplies<idx_type>()) : 0;
266  // check each name subvector x; it has to be either 0 or of the corresponding dim[x]
267  if (v_p.size() == 0) {
268  // in R:
269  // array(vector(mode="numeric"), c(2,2))
270  // [,1] [,2]
271  // [1,] NA NA
272  // [2,] NA NA
273  // only if dim=0 is v_p.size() allowed to be 0
274  // but I think we want to do away with vectors, so this case might never arise...
275  for (idx_type n=0; n<ncols; ++n) {
276  v.emplace_back(make_unique<Vector<T,O>>(dim[0], getInitValue<T>(), allocf->get(n)));
277  }
278  }
279  else if (v_p.size() == 1) {
280  for (idx_type n=0; n<ncols; ++n) {
281  v.emplace_back(make_unique<Vector<T,O>>(dim[0], v_p[0], allocf->get(n)));
282  }
283  } else {
284  if (v_p.size() != ncols * dim[0]) {
285  throw range_error("data does not fit array dimensions");
286  }
287  idx_type offset = 0;
288  for (idx_type n=0; n<ncols; ++n) {
289  v.emplace_back(make_unique<Vector<T,O>>(v_p.begin() + offset,
290  v_p.begin() + offset + dim[0],
291  allocf->get(n)));
292  offset += dim[0];
293  }
294  }
295  buildNames(names, dim_p, *allocf, names_p);
296  }
297 
300  Array(const Vector<idx_type> dim_p,
301  const vector<unique_ptr<Vector<T,O>>>& v_p,
302  const vector<unique_ptr<Dname>>& names_p,
303  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
304  : dim(dim_p), allocf(std::move(allocf_p))
305  {
306 #ifdef DEBUG_COPY
307  if (dim_p.size() == 1) {
308  cout << "nearly copying array " << dim_p[0] << endl;
309  }
310  else {
311  cout << "nearly copying array " << dim_p[0] << "x" << dim_p[1] << endl;
312  }
313 #endif
314  for (auto& e : v_p) {
315  v.emplace_back(make_unique<Vector<T,O>>(*e));
316  }
317  for (auto& e : names_p) {
318  names.emplace_back(make_unique<Dname>(*e));
319  }
320  }
321 
322  // copy constructors:
323  Array(const Array& u,
324  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
325  : allocf(std::move(allocf_p)) {
326 #ifdef DEBUG_COPY
327  if (u.dim.size() == 1) {
328  cout << "copying array " << u.dim[0] << endl;
329  }
330  else {
331  cout << "copying array " << u.dim[0] << "x" << u.dim[1] << endl;
332  }
333 #endif
334  dim = Vector<idx_type>(u.dim, allocf->get("dim"));
335  // for each vector, make a unique_ptr point to a copy:
336  v.reserve(u.v.size());
337  for (idx_type i=0; i<u.v.size(); ++i) {
338  v.emplace_back(make_unique<Vector<T,O>>(Vector<T,O>(*u.v[i]), allocf->get(i)));
339  }
340  for (idx_type i=0; i<u.names.size(); ++i) {
341  names.emplace_back
342  (make_unique<Dname>(dim[i],
343  Vector<zstring>(u.names[i]->names,
344  allocf->get("names" + std::to_string(i)))));
345  }
346  }
347 
349  template<typename U, typename OU=std::less<U>>
351  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
352  : dim(u.dim), allocf(std::move(allocf_p))
353  {
354  v.reserve(u.v.size());
355  for (idx_type n=0; n<u.v.size(); ++n) {
356  v.emplace_back(make_unique<Vector<T,O>>(rsv, dim[0]));
357  for (idx_type r=0; r<dim[0]; ++r) {
358  v[n]->push_back(convert<T,U>((*u.v[n])[r]));
359  }
360  }
361  for (auto& e : u.names) {
362  names.emplace_back(make_unique<Dname>(Dname(*e)));
363  }
364  }
365 
366  Array(Array&& u) {
367 #ifdef DEBUG_COPY
368  if (u.dim.size() == 1) {
369  cout << "swapping array on universal reference " << u.dim[0] << endl;
370  }
371  else {
372  cout << "swapping array on universal reference " << u.dim[0] << "x" << u.dim[1] << endl;
373  }
374 #endif
375  swap(u);
376  }
377 
378  // template<typename U, class F>
379  // Array<T>(const Array<U>& u, F f) : dim(u.dim)
380  // {
381  // v.reserve(u.v.size());
382  // for (idx_type n=0; n<u.v.size(); ++n) {
383  // v.emplace_back(make_unique<Vector<T,O>>(dim[0]));
384  // for (idx_type r=0; r<dim[0]; ++r) {
385  // (*v[n])[r] = f((*u.v[n])[r]);
386  // }
387  // }
388  // for (auto& e : u.names) {
389  // names.emplace_back(make_unique<Dname>(Dname(*e)));
390  // }
391  // }
392 
393  template<typename U, class G>
394  Array(const Array<U>& u, G g, bool abba,
395  std::unique_ptr<AllocFactory>&& allocf_p=std::make_unique<MemAllocFactory>())
396  : dim(u.dim) // LLL use a type to discriminate, not a dummy arg
397  {
398  v.reserve(u.v.size());
399  for (idx_type n=0; n<u.v.size(); ++n) {
400  v.emplace_back(make_unique<Vector<T,O>>());
401  *v[n] = g(*u.v[n]);
402  }
403  for (auto& e : u.names) {
404  names.emplace_back(make_unique<Dname>(Dname(*e)));
405  }
406  }
407 
408  Array(Vector<T,O>&& vv) : dim({vv.size()}) {
409  v.emplace_back(make_unique<Vector<T,O>>(vv));
410  names.emplace_back(make_unique<Dname>(vv.size()));
411  }
412 
413  virtual ~Array() { }
414 
415  // assignment operator this won't work with the allocf LLL
416  Array& operator=(Array u) {
417 #ifdef DEBUG_COPY
418  if (u.dim.size() == 1) {
419  cout << "array assignment " << u.dim[0] << endl;
420  }
421  else {
422  cout << "array assignment " << u.dim[0] << "x" << u.dim[1] << endl;
423  }
424 #endif
425  return swap(u);
426  }
427 
428  Array& swap(Array& u) {
429  std::swap(v, u.v);
430  std::swap(dim, u.dim);
431  std::swap(names, u.names);
432  std::swap(allocf, u.allocf);
433  return *this;
434  }
435 
438  bool operator==(const Array<T>& u) const {
439  if (dim == u.dim) {
440  for (idx_type j=0; j<u.names.size(); ++j) {
441  if (*names[j] != *u.names[j]) {
442  return false;
443  }
444  }
445  for (idx_type j=0; j<u.v.size(); ++j) {
446  if (*v[j] != *u.v[j]) {
447  return false;
448  }
449  }
450  return true;
451  }
452  else {
453  return false;
454  }
455  }
456 
457  // address the Array as if it were a column vector
458  inline T& operator[](idx_type i) {
459  if (dim[0] == 0) {
460  throw range_error("subscript out of bounds");
461  }
462  idx_type col = i / dim[0];
463  idx_type off = i % dim[0];
464  if (col >= v.size() || off >= dim[0]) {
465  throw range_error("subscript out of bounds");
466  }
467  return (*v[col].get())[off];
468  }
469 
470  // address the Array as if it were a column vector
471  inline const T& operator[](idx_type i) const {
472  if (size() == 0) {
473  throw range_error("subscript out of bounds");
474  }
475  idx_type col = i / dim[0];
476  idx_type off = i % dim[0];
477 
478  if (col >= v.size() || off >= dim[0]) {
479  throw range_error("subscript out of bounds");
480  }
481  return (*v[col])[off];
482  }
483 
484 
485  // to buffer ------------------
486 
489 
490  size_t getBufferSize() const {
491  return dim.getBufferSize() + v.size() * v[0]->getBufferSize();
492  }
493 
494  size_t to_buffer(char* buf) const {
495  // what if we have null vectors... LLL
496  // figure out the size we need to allocate:
497  size_t offset = 0;
498  offset += dim.to_buffer(buf);
499  for (auto& colptr : v) {
500  offset += colptr->to_buffer(buf + offset);
501  }
502  return offset;
503  }
504 
505  // Indexing operations (subsetting, subassign) ------------------
506 
508 
514  template<typename INDEX>
515  Array<T> operator()(const INDEX& i) const {
516  Array<T> r(rsv, {0});
517 
518  idx_type ii = 0;
519  idx_type iv = 0;
520  idx_type pos = 0;
521  for (idx_type k=0; k<v.size(); ++k) {
522  i.subset(*r.v[0], *v[k], ii, iv, pos);
523  pos += dim[0];
524  }
525 
526  arr::setv(r.dim, 0, r.v[0]->size());
527  r.names[0]->resize(r.v[0]->size());
528  return r;
529  }
530 
531  template<typename INDEX>
532  Array operator()(const vector<INDEX>& i, bool drop=true, idx_type dropfirst=0) const {
533  if (i.size() == 1 && dim.size() > 1) {
534  return (*this)(i[0]);
535  }
536  if (i.size() != dim.size()) {
537  throw std::range_error("incorrect number of dimensions");
538  }
539 
540  // figure out the end size of the array
541  Vector<idx_type> rdim(i.size());
542  auto transfop = [](const INDEX& i) { return i.size(); };
543  std::transform(i.begin(), i.end(), rdim.begin(), transfop);
544  auto rdimUndropped = rdim;
545  if (drop && dropfirst<rdim.size()) { // remove all dimensions of size 1:
546  rdim.erase(std::remove_if(rdim.begin()+dropfirst,
547  rdim.end(),
548  [](idx_type i) { return i == 1; }),
549  rdim.end());
550  if (!rdim.size()) {
551  rdim.push_back(1);
552  }
553  }
554 
555  idx_type rncols = rdim.size() == 1 ? 1 :
556  accumulate(rdim.begin()+1, rdim.end(), 1.0, std::multiplies<idx_type>());
557  auto pi = vector<idx_type>(dim.size(), 0); // keep track of the current index
558  auto val = vector<idx_type>(dim.size(), 0); // keep track of the value of the index
559  auto rv = vector<unique_ptr<Vector<T,O>>>();
560  rv.reserve(rncols);
561  for (idx_type j=0; j<rncols; ++j) {
562  rv.emplace_back(make_unique<Vector<T,O>>());
563  }
564 
565  // We have two cases here; the special case which is when we
566  // drop the first dim. In that case we have one unique index
567  // 'ui' in dimension 0 and the end result is composed of
568  // extracting for all columns at index 'ui' and with these
569  // forming the resulting subsetted array:
570  if (drop && dropfirst==0 && rdimUndropped[0] == 1) {
571  idx_type col = 0;
572  idx_type uidx, pidx;
573  auto r = i[0].getfirst(uidx, pidx);
574  if (!r) {
575  // might just have a 0 dimension here, so treat like false false LLL
576  throw std::range_error("failed getfirst on dim[0] = 1");
577  }
578  if (uidx >= dim[0]) {
579  throw range_error("subscript out of bounds");
580  }
581  bool res = INDEX::getfirstcol(col, val, pi, i, dim);
582  idx_type rcol = 0;
583  while (res) {
584  if (col >= v.size()) {
585  throw range_error("subscript out of bounds");
586  }
587  rv[rcol]->push_back((*v[col])[uidx]);
588  if (rv[rcol]->size() == rdim[0]) {
589  ++rcol;
590  }
591  res = INDEX::getnextcol(col, val, pi, i, dim);
592  }
593  }
594  // and here is the usual case where the first dimension is not
595  // dropped and we just subset each column; the result of the
596  // subset is of course stored in rv (our result vector of
597  // vector):
598  else {
599  idx_type col = 0;
600  bool res = INDEX::getfirstcol(col, val, pi, i, dim);
601  idx_type rcol = 0;
602  while (res) {
603  i[0].subset(*rv[rcol], *v[col]);
604  res = INDEX::getnextcol(col, val, pi, i, dim);
605  ++rcol;
606  }
607 
608  // for indices that do not know the correct end size, we check
609  // again to either resize or reorganize in case of a drop LLL
610  if (rv.size() && rv[0]->size() != rdim[0]) {
611  if (drop && dropfirst==0 && rv[0]->size() == 1 && rv.size()!=1) {
612  throw range_error("resize first dim to 1 not implemented");
613  }
614  else {
615  arr::setv(rdim, 0, rv[0]->size());
616  }
617  }
618  }
619 
620  // handle the copying of names:
621  auto rnames = vector<unique_ptr<Dname>>();
622  idx_type rnameIdx = 0;
623  for (idx_type j=0; j<rdimUndropped.size(); ++j) {
624  bool scalarResult = rdim.size()==1 && rdim[0]==1;
625  // test if the dimension under question is being dropped; conditions for dropping:
626  // 1: first iteration (j==0), not a vector, drop, dropfirst==0, dim 0 is 1, not scalar
627  bool cond1 = j==0 && drop && dropfirst==0 && rdimUndropped[0]==1 && !scalarResult;
628  // 2: other iterations (j>0), drop, dropfirst <= j, dim under question is 1
629  bool cond2 = j>0 && drop && dropfirst <= j && rdimUndropped[j]==1;
630  // if none of the 2 conditions for dropping is fullfilled, then don't copy dim:
631  if (!cond1 && !cond2) {
632  rnames.emplace_back(make_unique<Dname>(0));
633  if (names[j]->names.size() > 0) {
634  i[j].selectNames(*rnames[rnameIdx], *names[j]);
635  }
636  else {
637  *rnames[rnameIdx] = Dname(rdim[rnameIdx]);
638  }
639  ++rnameIdx;
640  }
641  }
642 
643  return Array<T>(rdim, rv, rnames);
644  }
645 
646 
647  // array elt/subset assignment -----
648 
650 
651  template<typename INDEX, typename U>
652  Array& operator()(const vector<INDEX>& i, const Array<U>& u) {
653  if (i.size() != dim.size()) {
654  throw std::range_error("incorrect number of dimensions");
655  }
656  // check u has same number of elements as i. This will be
657  // inefficient for indices that have complex calculations to
658  // figure out their size, but we can't really let an incorrect
659  // subassign mess up an array:
660  idx_type nelts = i[0].trueSize();
661  for (idx_type d = 1; d < i.size(); ++d) {
662  nelts *= i[d].trueSize();
663  }
664  if (nelts != u.size()) {
665  throw range_error("number of items to replace is not equal to the replacement length");
666  }
667 
668  // assign a according to i
669  auto pi = vector<idx_type>(dim.size(), 0); // keep track of the current index
670  auto val = vector<idx_type>(dim.size(), 0); // keep track of the value of the index
671  idx_type col = 0;
672  bool res = INDEX::getfirstcol(col, val, pi, i, dim);
673  idx_type uj = 0;
674  while (res) {
675  if (col >= v.size()) {
676  throw range_error("subscript out of bounds");
677  }
678  i[0].subassign(*v[col], u, uj);
679  res = INDEX::getnextcol(col, val, pi, i, dim);
680  }
681  return *this;
682  }
683 
685  template<typename INDEX, typename U>
686  Array& operator()(const vector<INDEX>& i, U u) {
687  if (i.size() != dim.size()) {
688  throw std::range_error("incorrect number of dimensions");
689  }
690 
691  // assign a according to i
692  auto pi = vector<idx_type>(dim.size(), 0); // keep track of the current index
693  auto val = vector<idx_type>(dim.size(), 0); // keep track of the value of the index
694  idx_type col = 0;
695  bool res = INDEX::getfirstcol(col, val, pi, i, dim);
696  // idx_type ucol = 0;
697  // can use the following (should be faster) if we know i.dim is the same a u.dim:
698  // while (res) {
699  // docopyIndexResult(v[col], u.v[ucol++], i[0]);
700  // res = getnextcol(col, val, pi, i, dim);
701  // }
702  while (res) {
703  if (col >= v.size()) {
704  throw range_error("subscript out of bounds");
705  }
706  i[0].subassignScalar(*v[col], u);
707  res = INDEX::getnextcol(col, val, pi, i, dim);
708  }
709  return *this;
710  }
711 
712 
713  T operator[](const vector<idx_type>& i) const {
714  if (i.size() != dim.size()) {
715  throw range_error("index dimension mismatch");
716  }
717  if (i[0] >= dim[0]) {
718  throw range_error("subscript out of bounds");
719  }
720  idx_type col = 0;
721  idx_type mul = 1;
722  for (idx_type k=1; k<dim.size(); ++k) {
723  if (i[k] >= dim[k]) {
724  throw range_error("subscript out of bounds");
725  }
726  col += i[k]*mul;
727  mul *= dim[k];
728  }
729  return (*v[col])[i[0]];
730  }
731 
732 
734  Array subsetRows(idx_type n, idx_type from=0, bool addrownums=false) const {
735  if (from + n > dim[0]) {
736  throw range_error("range out of bounds");
737  }
738  auto aDim = dim;
739  arr::setv(aDim, 0, n);
740  Array a(arr::rsv, aDim);
741  for (idx_type i=0; i<ncols(); ++i) {
742  for (idx_type j=from; j<from+n; ++j) {
743  a.v[i]->push_back((*v[i])[j]);
744  }
745  }
746  // copy dnames:
747  // first dimension is the only tricky one:
748  if (hasNames(0) || addrownums) {
749  for (idx_type j=from; j<from+n; ++j) {
750  (*a.names[0]).assign(j-from, hasNames(0) ? (*names[0])[j] :
751  "[" +std::to_string(j+1)+std::string(dim.size()-1, ',')+"]");
752  }
753  }
754  // copy the other dimension's dnames as is:
755  for (idx_type j=1; j<dim.size(); ++j) {
756  *a.names[j] = *names[j];
757  }
758 
759  return a;
760  }
761 
762  inline bool hasNames() const {
763  for (arr::idx_type d=0; d<dim.size(); ++d) {
764  if (hasNames(d)) {
765  return true;
766  }
767  }
768  return false;
769  }
770 
771  inline bool hasNames(idx_type d) const {
772  if (d >= names.size()) {
773  throw out_of_range("name index out of bound");
774  }
775  return names[d]->hasNames();
776  }
777 
778  inline const Dname& getNames(idx_type d) const {
779  if (d >= names.size()) {
780  throw out_of_range("name index out of bound");
781  }
782  return *names[d];
783  }
784 
785  inline const Vector<zstring>& getNamesVector(idx_type d) const {
786  if (d >= names.size()) {
787  throw out_of_range("name index out of bound");
788  }
789  return names[d]->names;
790  }
791 
792  inline bool isVector() const {
793  return ncols() == 1;
794  }
795 
796  inline bool isScalar() const {
797  return dim.size() == 1 && dim[0] == 1;
798  }
799 
800  inline bool isOrdered() const {
801  return std::all_of(v.begin(), v.end(), [](const auto& e){ return e->isOrdered(); });
802  }
803 
804  inline bool isOrdered(idx_type i) const {
805  return v[i]->isOrdered();
806  }
807 
808  inline const AllocFactory& getAllocFactory() const { return *allocf.get(); }
809 
810  Array& addprefix(const string& prefix, idx_type d) {
811  if (prefix.length() > 0) {
812  if (d < dim.size()) {
813  names[d]->addprefix(prefix);
814  }
815  else if (d >= dim.size()) {
816  // so a vector of 3 might become 3x1 or 3x1x1, etc.
817  names.reserve(d+1);
818  for (idx_type j=dim.size(); j<d+1; ++j) {
819  names.emplace_back(make_unique<Dname>(1));
820  }
821  dim.resize(d+1, 1);
822  names[d]->addprefix(prefix);
823  }
824  else {
825  throw range_error("addprefix: dimension out of range");
826  }
827  }
828  return *this;
829  }
830 
831 
833  inline idx_type size() const { return v.size() * nrows(); }
834  inline idx_type ncols() const { return v.size(); }
835  inline idx_type nrows() const { return dim.size() ? dim[0] : 0; }
836 
837  // Get a reference to the underlying column:
838  inline const Vector<T,O>& getcol(idx_type i) const {
839  if (i >= v.size()) {
840  throw std::out_of_range("getcol: column out of range");
841  }
842  return *(v[i]);
843  }
844  inline Vector<T,O>& getcol(idx_type i) {
845  if (i >= v.size()) {
846  throw std::out_of_range("getcol: column out of range");
847  }
848  return *(v[i].get());
849  }
850 
852  inline const Dname& getnames(idx_type d) const {
853  if (d >= names.size()) {
854  throw std::out_of_range("getnames: dimension out of range");
855  }
856  return *names[d];
857  }
858  inline const Vector<idx_type>& getdim() const { return dim; }
859  inline const idx_type getdim(idx_type d) const {
860  if (d >= dim.size()) {
861  throw std::out_of_range("getdim: dimension out of range");
862  }
863  return dim[d];
864  }
865 
866  inline fsys::path getAllocfDirname() const { return allocf->getDirname(); }
867  inline bool isPersistent() const { return allocf->isPersistent(); }
868 
869  void msync(bool async) const {
870  dim.getAllocator()->msync(async);
871  for (auto& col : v) {
872  col->getAllocator()->msync(async);
873  }
874  for (auto& nm : names) {
875  nm->names.getAllocator()->msync(async);
876  }
877  }
878 
879  // concat and bind operations -------------------------------------------
880 
883  static inline bool checkdims(const Vector<idx_type>& v, const Vector<idx_type>& u, idx_type i) {
884  // a 0 dimentional array can be bound with any other array:
885  if (!v.size() || !u.size()) {
886  return true;
887  }
888  idx_type up = min(v.size(), u.size());
889  for (idx_type j=0; j<up; ++j) {
890  if (j == i) continue;
891  if (v[j] != u[j]) {
892  return false;
893  }
894  }
895  // if different number of dims, then they have to be all 1
896  for (idx_type j=up; j<v.size(); ++j) {
897  if (j == i) continue;
898  if (v[j] != 1) return false;
899  }
900  for (idx_type j=up; j<u.size(); ++j) {
901  if (j == i) continue;
902  if (u[j] != 1) return false;
903  }
904 
905  return true;
906  }
907 
908 
909  template<typename U>
910  Array& concat(const Array<U>& u, const string& prefix="") {
911  if (dim.size() > 1 || u.dim.size() > 1) {
912  throw range_error("concat can only handle dim==0 and vectors");
913  }
914  for (auto e: *u.v[0]) {
915  v[0]->push_back(convert<T,U>(e));
916  }
917  auto newnames = *u.names[0];
918  if (prefix.length()) {
919  newnames.addprefix(prefix);
920  }
921  names[0]->addafter(newnames);
922  setv(dim, 0, dim[0] + u.dim[0]);
923  return *this;
924  }
925 
926 
929  template<typename U>
930  Array& concat(const U& u, const string& name="") {
931  if (dim.size() == 0) {
932  dim.push_back(0);
933  names.push_back(make_unique<Dname>(0));
934  }
935  else if (dim.size() > 1) {
936  throw range_error("concat can only handle dim==0 and vectors");
937  }
938  if (v.size() == 0) {
939  v.push_back(make_unique<Vector<T,O>>(dim[0]));
940  }
941 
942  v[0]->push_back(convert<T,U>(u));
943  names[0]->addafter(name);
944  arr::setv(dim, 0, dim[0] + 1);
945  return *this;
946  }
947 
948 
956  template<typename U>
957  Array& abind(const Array<U>& u, idx_type d, const string& prefix="") {
958  if (reinterpret_cast<const Array<U>*>(this) == &u) {
959  // prevent binding to self!
960  throw range_error("cannot bind to self.");
961  }
962 
963  // For the purpose of rbind, column vectors are always considered
964  // to be row vectors. It seems weird, especially when one
965  // considers that t(1:3) gives a row vector... But we follow R's
966  // convention (albeit with a rather inefficient implementation):
967  if (d==0) {
968  if (dim.size() == 1) {
969  *this = transpose(*this);
970  return abind<U>(u.dim.size() == 1 ? transpose(u) : u, d, prefix);
971  }
972  else if (u.dim.size() == 1) {
973  return abind<U>(transpose(u), d, prefix);
974  }
975  }
976  if (!checkdims(dim, u.dim, d)) {
977  throw range_error("incorrect dimensions for abind");
978  }
979  // handle the copying of dim when dim.size()==0, i.e. when we
980  // are binding with the null matrix; the dim of u is copied:
981  if (dim.size() == 0) {
982  for (idx_type j=0; j<u.dim.size(); ++j) {
983  dim.push_back(j == d ? 0 : u.dim[j]); // the last part of this function sets dim d
984  names.emplace_back(
985  make_unique<Dname>(j == d ? 0 : u.dim[j],
986  Vector<zstring>(0, getInitValue<zstring>(),
987  allocf->get("names" + std::to_string(j)))));
988  }
989  }
990  // potentially need to increase dimensions:
991  if (d >= dim.size()) {
992  // we add dimension of size 1 (i.e. a vector of 3 is
993  // considered a 3x1 for a cbind) except for the null array,
994  // which needs size 0 in dimension d to avoid spurious column
995  // creation:
996  for (idx_type j=dim.size(); j<d+1; ++j) {
997  names.emplace_back(
998  make_unique<Dname>(v.size()==0 && j==d ? 0 : 1,
999  Vector<zstring>(0, getInitValue<zstring>(),
1000  allocf->get("names" + std::to_string(j)))));
1001  dim.push_back(v.size()==0 && j==d ? 0 : 1);
1002  }
1003  }
1004  if (d==0) {
1005  // not touching columns, just making the rows longer;
1006  for (idx_type j=0; j<u.v.size(); ++j) {
1007  if (j >= v.size()) {
1008  // if the column doesn't exist, create it:
1009  v.emplace_back(make_unique<Vector<T,O>>(rsv, u.v[j]->size(),
1010  allocf->get(std::to_string(j))));
1011  }
1012  for (auto e: *u.v[j]) {
1013  v[j]->push_back(convert<T,U>(e));
1014  }
1015  }
1016  } else {
1017  // adding u.v.size() columns:
1018  auto origSize = v.size();
1019  v.reserve(origSize + u.v.size());
1020  for (idx_type j=0; j<u.v.size(); ++j) {
1021  // the following is potentially mmapped if 'dirname' is not "":
1022  v.emplace_back(make_unique<Vector<T,O>>(rsv, dim[0],
1023  allocf->get(std::to_string(origSize+j))));
1024  }
1025  idx_type ucols = d < u.dim.size() ?
1026  accumulate(u.dim.begin()+1, u.dim.begin()+d+1, 1, multiplies<idx_type>()) :
1027  accumulate(u.dim.begin()+1, u.dim.end(), 1, multiplies<idx_type>());
1028  idx_type cols = d < dim.size() ?
1029  accumulate(dim.begin()+1, dim.begin()+d+1, 1, multiplies<idx_type>()) :
1030  accumulate(dim.begin()+1, dim.end(), 1, multiplies<idx_type>());
1031  // how many times we need to do this:
1032  idx_type n = d < dim.size() ?
1033  accumulate(dim.begin()+d+1, dim.end(), 1, multiplies<idx_type>()) : 1;
1034  idx_type destoff = v.size() - 1; // to where we need to copy the cols in r
1035  idx_type srcoff = origSize - 1; // from where we need to swap the cols in r
1036  idx_type uoff = u.v.size() - 1; // index in array u
1037  for (idx_type j=n; j>0; --j) {
1038  // copy the slice of cols from t to r:
1039  bool needSwap = true;
1040  for (idx_type k=0; k < ucols; ++k) {
1041  for (idx_type l=0; l<u.v[uoff]->size(); ++l) {
1042  v[destoff]->push_back(convert<T,U>((*u.v[uoff])[l]));
1043  }
1044  --destoff;
1045  if (uoff > 0) {
1046  --uoff;
1047  } else {
1048  needSwap = false;
1049  }
1050  }
1051  if (needSwap) {
1052  // move the columns:
1053  for (idx_type k=0; k < cols; ++k) {
1054  std::swap(v[srcoff--], v[destoff--]);
1055  }
1056  }
1057  }
1058  }
1059  // add new names in dimension d if present:
1060  auto newnames = d < u.dim.size() ? *u.names[d] : Dname(1);
1061  newnames.addprefix(prefix);
1062  names[d]->addafter(newnames);
1063  // add new names in for dimensions other than 'd' if it was missing them:
1064  for (idx_type i=0; i<names.size(); ++i) {
1065  if (i==d) continue;
1066  if (!hasNames(i) && i < u.names.size() && u.hasNames(i)) {
1067  *names[i] = *u.names[i];
1068  }
1069  }
1070  setv(dim, d, dim[d] + (d < u.dim.size() ? u.dim[d] : 1));
1071  return *this;
1072  }
1073 
1074  template<typename U>
1075  Array& rbind(const Array<U>& u) {
1076  return abind<U>(u, 0);
1077  }
1078 
1079  template<typename U>
1080  Array& cbind(const Array<U>& u) {
1081  return abind<U>(u, 1);
1082  }
1083 
1084 
1085  // that's not an offset, it's the nb bytes read LLL
1086  Array& append(const char* buf, size_t buflen, size_t& offset) {
1087  auto adim = Vector<idx_type>(const_cast<char*>(buf), buflen);
1088 
1089  if (dim.size() == 0) {
1090  // figure out if we want to support that...
1092  throw out_of_range("append on null array not implemented");
1093  }
1094  if (!checkdims(dim, adim, 0)) {
1095  throw out_of_range("incorrect dimensions for append");
1096  }
1097 
1098  offset = sizeof(RawVector<idx_type>) + sizeof(idx_type)*adim.size();
1099  try {
1100  for (size_t i=0; i<v.size(); ++i) {
1101  offset += v[i]->append(buf + offset, buflen - offset);
1102  }
1103  }
1104  catch (...) {
1105  for (size_t i=0; i<v.size(); ++i) {
1106  v[i]->resize(dim[0]); // stay in a coherent state!
1107  }
1108  throw;
1109  }
1110 
1111  // it's important to do the dimension update at the end, as it
1112  // allows mmapped arrays to recover in the event of a crash in
1113  // the middle of an append:
1114  names[0]->addafter(Dname(adim[0]));
1115  setv(dim, 0, dim[0] + adim[0]);
1116  return *this;
1117  }
1118 
1119 
1120  Array& appendVector(const char* buf, size_t buflen) {
1121  auto data = Vector<T,O>(const_cast<char*>(buf), buflen);
1122  // the data has to be a multiple of the number of columns:
1123  if (v.size() == 0) {
1124  // figure out if we want to support that...
1126  throw out_of_range("appendVector on null array not implemented");
1127  }
1128  if (data.size() % v.size()) {
1129  throw out_of_range("appendVector: incorrect vector size");
1130  }
1131  idx_type nrows = data.size() / v.size();
1132  idx_type i = 0, j=0;
1133  for (auto e : data) {
1134  v[i]->push_back(e);
1135  if (++j == nrows) { j = 0; ++i; }
1136  }
1137 
1138  names[0]->addafter(Dname(nrows));
1139  setv(dim, 0, dim[0] + nrows);
1140  return *this;
1141  }
1142 
1143 
1144  Array& resize(idx_type d, idx_type sz, idx_type from=0) {
1145  if (d !=0) {
1146  throw logic_error("resize only implemented for dim 0");
1147  }
1148  if (dim.size() == 0) {
1149  dim.push_back(0);
1150  names.push_back(make_unique<Dname>(0));
1151  }
1152  if (v.size() == 0) {
1153  v.push_back(make_unique<Vector<T,O>>(dim[0]));
1154  }
1155 
1156  for (idx_type j=0; j<v.size(); ++j) {
1157  v[j]->resize(sz, from);
1158  }
1159  setv(dim, d, sz);
1160  names[0]->resize(sz, from);
1161  return *this;
1162  }
1163 
1164  // apply operations -------------------------------------------
1165 
1167  template<class F>
1168  Array& applyf(F f) {
1169  for (idx_type n=0; n<v.size(); ++n) {
1170  for (idx_type i=0; i<v[n]->size(); ++i) {
1171  setv_checkbefore(*v[n], i, f((*v[n])[i]));
1172  }
1173  }
1174  return *this;
1175  }
1176 
1181  template<typename F, typename ...U>
1182  Array& apply(const U&... u) {
1183  if (!checksize(*this, u...)) throw std::out_of_range("size mismatch");
1184  for (idx_type n=0; n<v.size(); ++n) {
1185  v[n]->template apply<F, typename U::vector_type...>(u.getcol(n)...);
1186  }
1187  return *this;
1188  }
1189 
1190  template<class F, typename U>
1191  Array& apply_scalar_post(U u) {
1192  for (idx_type n=0; n<v.size(); ++n) {
1193  v[n]->template apply_scalar_post<F, U>(u);
1194  }
1195  return *this;
1196  }
1197 
1198  template<typename F, typename U>
1199  Array& apply(const U& u) {
1200  if (u.size() == 1) {
1201  return apply_scalar_post<F, typename U::value_type>(u[0]);
1202  }
1203  for (idx_type n=0; n<v.size(); ++n) {
1204  v[n]->template apply<F, typename U::vector_type>(u.getcol(n));
1205  }
1206  return *this;
1207  }
1208 
1209  template<class F, typename U, typename A>
1210  Array& apply_attrib(const Array<U>& u, const A& a) {
1211  if (u.size() == 1) {
1212  return apply_attrib_scalar_post<F,U>(u[0], a);
1213  }
1214  // checkdim will check all the dimensions:
1215  if (!checkdims(dim, u.dim, std::max(dim.size(), u.size()) + 1)) {
1216  throw(std::out_of_range("in apply, array dimension mismatch"));
1217  }
1218  for (idx_type n=0; n<v.size(); ++n) {
1219  for (idx_type i=0; i<v[n]->size(); ++i) {
1220  setv_checkbefore(*v[n], i, F()((*v[n])[i], (*u.v[n])[i], a));
1221  }
1222  }
1223  return *this;
1224  }
1225 
1226  template<class F, typename U, typename A>
1227  Array& apply_attrib_scalar_post(U u, const A& a) {
1228  for (idx_type n=0; n<v.size(); ++n) {
1229  for (idx_type i=0; i<v[n]->size(); ++i) {
1230  setv_checkbefore(*v[n], i, F()((*v[n])[i], u, a));
1231  }
1232  }
1233  return *this;
1234  }
1235 
1236 
1237  template <typename AO=O>
1238  Array& sort() {
1239  std::for_each(v.begin(), v.end(), [](const auto& e){ e->template sort<AO>(); });
1240  return *this;
1241  }
1242 
1243  template <typename AO=O>
1244  Array& sort(idx_type col) {
1245  if (col >= v.size()) {
1246  throw std::range_error("sort column out of bounds");
1247  }
1248  v[col]->template sort<AO>();
1249  return *this;
1250  }
1251 
1252  template<class UnaryFunction>
1253  UnaryFunction for_each(UnaryFunction f) const {
1254  for (auto& vi : v) {
1255  f = std::for_each(vi->begin(), vi->end(), f);
1256  }
1257  return f;
1258  }
1259 
1260  // We would want to enforce that U has to be numeric and
1261  // convertible to an idx_type.
1262  template<typename U, typename AO=O>
1263  Array<U> sort_idx(idx_type base) const {
1264  vector<unique_ptr<Vector<U>>> idxv;
1265  for (idx_type j=0; j<v.size(); ++j) {
1266  idxv.emplace_back(make_unique<Vector<U>>(v[j]->template sort_idx<U, AO>(base)));
1267  }
1268  return Array<U>(dim, idxv, names); // this makes a copy of idxv, so not very good...
1269  }
1270 
1271  // ----------------------- what should really be a private section:
1272  Vector<idx_type> dim;
1273  // we use unique_ptr below to avoid the copying of 'Vector' or
1274  // 'Dname' that would otherwise occur on 'vector' resize:
1275  vector<unique_ptr<Vector<T,O>>> v;
1276  vector<unique_ptr<Dname>> names;
1277 
1278  std::unique_ptr<AllocFactory> allocf;
1279  };
1280  // ---------------------- end struct Array --------------------
1281 
1282 
1284  template<typename R, typename T>
1285  std::vector<R> stdvector(const Array<T> u) {
1286  std::vector<R> r;
1287  for (auto& e: u.v) {
1288  r.insert(r.end(), e->begin(), e->end());
1289  }
1290  return r;
1291  }
1292 
1294  template<typename R, typename T>
1296  Vector<R> r;
1297  for (auto& e: u.v) {
1298  r.insert(r.end(), e->begin(), e->end());
1299  }
1300  return r;
1301  }
1302 
1304 
1305 
1307  template<typename T>
1309  if (u.isVector()) {
1310  return u;
1311  }
1312  idx_type n = u.v.size() * u.dim[0];
1313  Array<T> r(rsv, Vector<idx_type>{n});
1314  for (auto& e: u.v) {
1315  r.v[0]->insert(r.v[0]->end(), e->begin(), e->end());
1316  }
1317  return r;
1318  }
1319 
1320 
1321  template<>
1322  inline Array<arr::zstring> vectorize(const Array<arr::zstring>& u) {
1323  idx_type n = u.v.size() * u.dim[0];
1324  Array<zstring> r(rsv, {n});
1325  for (auto& e: u.v) {
1326  for (auto iter=e->begin(); iter!=e->end(); ++iter) {
1327  r.v[0]->push_back(*iter);
1328  }
1329  }
1330  return r;
1331  }
1332 
1333 
1334  template<typename T, typename O=std::less<T>>
1335  Array<T,O> transpose(const Array<T,O>& t) {
1336  if (t.dim.size() == 1) {
1337  return Array<T,O>({1, t.dim[0]}, static_cast<Vector<T,O>>(*t.v[0]), {{}, t.names[0]->names});
1338  }
1339  else if (t.dim.size() == 2) {
1340  Array<T,O> a(noinit_tag,
1341  {t.getdim(1), t.getdim(0)}, {{t.getnames(1).names}, t.getnames(0).names});
1342  for (idx_type j=0; j<a.getdim(1); ++j)
1343  for (idx_type k=0; k<a.getdim(0); ++k)
1344  setv_checkbefore(a.getcol(j), k, t.getcol(k)[j]);
1345  return a;
1346  }
1347  else {
1348  throw range_error("argument is not a matrix");
1349  }
1350  }
1351 
1352 
1353  // swap dimensions:
1354 
1355  // resize? LLL
1356 
1357 
1358  // array ops -----------------------------------------------
1359 
1360  template<typename T, typename O=std::less<T>>
1361  Array<T,O>& drop(Array<T,O>& a) {
1362  auto di = a.dim.end()-1;
1363  auto ni = a.names.end()-1;
1364  for (; di > a.dim.begin(); --di, --ni) {
1365  if (*di == 1) {
1366  a.dim.erase(di);
1367  a.names.erase(ni);
1368  }
1369  }
1370  return a;
1371  }
1372 
1373  template<typename T, typename R, typename OT=std::less<T>, typename OR=std::less<R>>
1374  Array<R,OR> applyf(const Array<T,OT>& t, function<R(T)> f) {
1375  auto ret = Array<R,OR>(rsv, t.dim);
1376  for (idx_type j=0; j<t.names.size(); ++j) {
1377  ret.names[j] = make_unique<Dname>(*t.names[j]);
1378  }
1379  for (idx_type n=0; n<t.v.size(); ++n) {
1380  for (idx_type i=0; i<t.v[n]->size(); ++i) {
1381  ret.v[n]->push_back(f((*t.v[n])[i]));
1382  }
1383  }
1384  return ret;
1385  }
1386 
1387 
1388  template<typename T, typename U, typename R, class F,
1389  typename OU=std::less<U>, typename OR=std::less<R>>
1390  Array<R,OR> apply_scalar(T t, const Array<U,OU>& u) {
1391  Array<R,OR> ret(rsv, u.dim);
1392  for (idx_type j=0; j<u.names.size(); ++j) {
1393  ret.names[j] = make_unique<Dname>(*u.names[j]);
1394  }
1395  for (idx_type n=0; n<u.v.size(); ++n) {
1396  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1397  ret.v[n]->push_back(F()(t, (*u.v[n])[i]));
1398  }
1399  }
1400  return ret;
1401  }
1402 
1403  template<typename U, typename T, typename R, class F,
1404  typename OU=std::less<U>, typename OR=std::less<R>>
1405  Array<R,OR> apply_scalar(const Array<U,OU>& u, T t) {
1406  Array<R,OR> ret(rsv, u.dim);
1407  for (idx_type j=0; j<u.names.size(); ++j) {
1408  ret.names[j] = make_unique<Dname>(*u.names[j]);
1409  }
1410  for (idx_type n=0; n<u.v.size(); ++n) {
1411  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1412  ret.v[n]->push_back(F()((*u.v[n])[i], t));
1413  }
1414  }
1415  return ret;
1416  }
1417 
1418 
1419  template <typename T, typename O>
1420  void setv_checkbefore(Array<T,O>& a, idx_type i, T t) {
1421  if (!a.dim.size()) throw range_error("subscript out of bounds");
1422  idx_type col = i / a.dim[0];
1423  idx_type off = i % a.dim[0];
1424  if (col >= a.ncols()) throw range_error("subscript out of bounds");
1425  setv_checkbefore(*a.v[col], off, t);
1426  }
1427 
1428  template <typename T, typename O>
1429  void setv(Array<T,O>& a, idx_type i, T t) {
1430  if (!a.dim.size()) throw range_error("subscript out of bounds");
1431  idx_type col = i / a.dim[0];
1432  idx_type off = i % a.dim[0];
1433  if (col >= a.ncols()) throw range_error("subscript out of bounds");
1434  setv(*a.v[col], off, t);
1435  }
1436 
1437 
1441  template<typename F, typename R, typename O, typename U1, typename ...U>
1442  Array<R,O> apply(const U1 u1, const U&... u) {
1443  if (!checksize(u1, u...)) throw std::out_of_range("size mismatch");
1444 
1445  auto ret = Array<R,O>(rsv, u1.dim);
1446  idx_type ii = 0;
1447  for (idx_type j=0; j<u1.names.size(); ++j) {
1448  ret.names[j] = make_unique<Dname>(*u1.names[j]);
1449  }
1450  for (idx_type n=0; n<u1.v.size(); ++n) {
1451  for (idx_type j=0; j<u1.v[n]->size(); ++j) {
1452  ret.v[n]->push_back(F()((*u1.v[n])[j], u[ii]...));
1453  ++ii;
1454  }
1455  }
1456  return ret;
1457  }
1458 
1459  template<typename F, typename R, typename O, typename U1, typename ...U>
1460  Array<R,O> apply_samesize(const U1 u1, const U&... u) {
1461  if (!checkdims(u1, u...)) throw std::out_of_range("size mismatch");
1462  auto ret = Array<R,O>(rsv, u1.dim);
1463  for (idx_type j=0; j<u1.names.size(); ++j) {
1464  ret.names[j] = make_unique<Dname>(*u1.names[j]);
1465  }
1466  for (idx_type n=0; n<u1.v.size(); ++n) {
1467  for (idx_type i=0; i<u1.v[n]->size(); ++i) {
1468  ret.v[n]->push_back(F()((u1->getcol[n])[i], (u->getcol[n])[i])...);
1469  }
1470  }
1471  return ret;
1472  }
1473 
1474  template<typename T, typename U, typename R, class F,
1475  typename OT=std::less<T>, typename OU=std::less<U>, typename OR=std::less<R>>
1476  Array<R,OR> apply(const Array<T,OT>& t, const Array<U,OU>& u) {
1477  if (t.size() == 1) {
1478  return apply_scalar<T,U,R,F,OU,OR>(t[0], u);
1479  }
1480  else if (u.size() == 1) {
1481  return apply_scalar<T,U,R,F,OT,OR>(t, u[0]);
1482  }
1483  // if we are not in the case where one or the other array sizes is
1484  // 1, then they have to be of the same dimensions; we could allow
1485  // same size at the detriment of efficiency...
1486  if (t.dim != u.dim) { // LLL allow differences with trailing 1's
1487  throw std::range_error("incompatible array sizes");
1488  }
1489  auto ret = Array<R>(rsv, u.dim);
1490  // create variadic find names function LLL
1491  for (idx_type j=0; j<u.names.size(); ++j) {
1492  ret.names[j] = make_unique<Dname>(t.hasNames(j) ? *t.names[j] : *u.names[j]);
1493  }
1494  for (idx_type n=0; n<u.v.size(); ++n) {
1495  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1496  ret.v[n]->push_back(F()((*t.v[n])[i], (*u.v[n])[i]));
1497  }
1498  }
1499  return ret;
1500  }
1501 
1502  template<typename T, typename U, typename R, class F, typename A,
1503  typename OU=std::less<U>, typename OR=std::less<R>>
1504  Array<R,OR> apply_attrib_scalar_post(T t, const Array<U,OU>& u, const A& a) {
1505  Array<R,OR> ret(rsv, u.dim);
1506  for (idx_type j=0; j<u.names.size(); ++j) {
1507  ret.names[j] = make_unique<Dname>(*u.names[j]);
1508  }
1509  for (idx_type n=0; n<u.v.size(); ++n) {
1510  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1511  ret.v[n]->push_back(F()(t, (*u.v[n])[i], a));
1512  }
1513  }
1514  return ret;
1515  }
1516 
1517  template<typename U, typename T, typename R, class F, typename A,
1518  typename OU=std::less<U>, typename OR=std::less<R>>
1519  Array<R,OR> apply_attrib_scalar_pre(const Array<U,OU>& u, T t, const A& a) {
1520  Array<R,OR> ret(rsv, u.dim);
1521  for (idx_type j=0; j<u.names.size(); ++j) {
1522  ret.names[j] = make_unique<Dname>(*u.names[j]);
1523  }
1524  for (idx_type n=0; n<u.v.size(); ++n) {
1525  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1526  ret.v[n]->push_back(F()((*u.v[n])[i], t, a));
1527  }
1528  }
1529  return ret;
1530  }
1531 
1532 
1533  template<typename T, typename U, typename R, class F, typename A,
1534  typename OT=std::less<T>, typename OR=std::less<R>, typename OU=std::less<U>>
1535  Array<R,OR> apply_attrib(const Array<T,OT>& t, const Array<U,OU>& u, const A& a) {
1536  if (t.size() == 1) {
1537  return apply_attrib_scalar_post<T,U,R,F,A,OU,OR>(t[0], u, a);
1538  }
1539  else if (u.size() == 1) {
1540  return apply_attrib_scalar_pre<T,U,R,F,A,OT,OR>(t, u[0], a);
1541  }
1542  // if we are not in the case where one or the other array sizes is
1543  // 1, then they have to be of the same dimensions; we could allow
1544  // same size at the detriment of efficiency...
1545  if (t.dim != u.dim) { // LLL allow differences with trailing 1's
1546  throw std::range_error("incompatible array sizes");
1547  }
1548  auto ret = Array<R,OR>(rsv, u.dim);
1549  for (idx_type j=0; j<u.names.size(); ++j) {
1550  ret.names[j] = make_unique<Dname>(t.hasNames(j) ? *t.names[j] : *u.names[j]);
1551  }
1552  for (idx_type n=0; n<u.v.size(); ++n) {
1553  for (idx_type i=0; i<u.v[n]->size(); ++i) {
1554  ret.v[n]->push_back(F()((*t.v[n])[i], (*u.v[n])[i], a));
1555  }
1556  }
1557  return ret;
1558  }
1559 
1560  template<typename T, typename O=std::less<T>>
1561  Array<T,O> maxcol(const Array<T,O>& t) {
1562  // take care of usual too small matrices etc. LLL
1563  Vector<idx_type> dim{1};
1564  dim.insert(dim.end(), t.dim.begin()+1, t.dim.end());
1565  Array<T,O> ret(rsv, dim);
1566  for (idx_type n=0; n<t.v.size(); ++n) {
1567  T maxelt = (*t.v[n])[0];
1568  for (auto elt : *t.v[n]) {
1569  if (elt > maxelt) maxelt = elt;
1570  }
1571  ret.v[n]->push_back(maxelt);
1572  }
1573  return ret;
1574  }
1575 
1576  template<typename T, typename R, template <class> class F, typename O=std::less<T>>
1577  R cumul(const Array<T,O>& t, const R& init) {
1578  auto res = init;
1579  for (idx_type n=0; n<t.v.size(); ++n) {
1580  for (idx_type i=0; i<t.v[n]->size(); ++i) {
1581  res = F<R>()(res, (*t.v[n])[i]);
1582  }
1583  }
1584  return res;
1585  }
1586 
1588  // template<typename T, typename R>
1589  template<typename T, typename R, template <class> class F, typename O=std::less<T>>
1590  R cumul_until(const Array<T,O>& t, const R& init, const R& until) {
1591  auto res = init;
1592  for (idx_type n=0; n<t.v.size(); ++n) {
1593  for (idx_type i=0; i<t.v[n]->size(); ++i) {
1594  res = F<R>()(res, (*t.v[n])[i]);
1595  if (res == until) {
1596  return res;
1597  }
1598  }
1599  }
1600  return res;
1601  }
1602 
1603  // helper function to convert unidimentional arrays to standard vectors
1604  template<typename T, typename O=std::less<T>>
1605  inline vector<T> toStdVector(const Array<T,O>& a) {
1606  if (!a.isVector()) {
1607  throw out_of_range("multidimentional array cannot be converted to 'vector'");
1608  }
1609  return a.v[0];
1610  }
1611 
1613  template<typename T, typename O=std::less<T>>
1615  {
1616  if (!data.size()) {
1617  throw std::out_of_range("array_from_vector: null array creation not supported");
1618  }
1619  Array<T> a(rsv, {data.size()});
1620  a.v[0]->swap(data);
1621  return a;
1622  }
1623 
1624 
1625 }
1626 
1627 #endif
arr::Array::operator==
bool operator==(const Array< T > &u) const
Definition: array.hpp:438
arr::Array::size
idx_type size() const
Get the total number of elements in the array.
Definition: array.hpp:833
arr::Array::Array
Array(seq_n_t, T t1, U by, idx_type n, std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
construct a sequence in place, from 't1', with 'n' elements.
Definition: array.hpp:148
arr::Array::append
Array & append(const char *buf, size_t buflen, size_t &offset)
Definition: array.hpp:1086
arr::Vector
Definition: vector_base.hpp:103
arr::createDir
void createDir(const zstring &dirname)
Create a directory if dirname is not "".
arr::convert_cons_t
Definition: array.hpp:61
val
Contains the types that constitute the output of an evaluation by the interpreter.
Definition: display.hpp:31
arr::Array::Array
Array(seq_to_t, T t1, T t2, U by, std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
construct a sequence in place, from 't1' to 't2'.
Definition: array.hpp:119
arr::Array::applyf
Array & applyf(F f)
Apply a function on every element of an array.
Definition: array.hpp:1168
arr::buildNames
void buildNames(vector< unique_ptr< Dname >> &names, const Vector< idx_type > dim, const AllocFactory &allocf, const vector< Vector< zstring >> names_p={Vector< zstring >()})
Definition: array.cpp:23
arr::apply
Array< R, O > apply(const U1 u1, const U &... u)
Definition: array.hpp:1442
arr::Array::names
vector< unique_ptr< Dname > > names
v is a set of cols, v[0], v[1] are the cols
Definition: array.hpp:1276
arr::Array::apply
Array & apply(const U &... u)
Definition: array.hpp:1182
arr::Array::getnames
const Dname & getnames(idx_type d) const
Get the vector of names in dimention d.
Definition: array.hpp:852
arr::Array::appendVector
Array & appendVector(const char *buf, size_t buflen)
Definition: array.hpp:1120
arr::Array::abind
Array & abind(const Array< U > &u, idx_type d, const string &prefix="")
Definition: array.hpp:957
arr::Array::Array
Array(const Vector< idx_type > dim_p, const Vector< T, O > &v_p, const vector< Vector< zstring >> names_p=vector< Vector< zstring >>(), std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
Definition: array.hpp:254
arr::Array::operator()
Array< T > operator()(const INDEX &i) const
Complex subsetting.
Definition: array.hpp:515
arr::vectorize
Array< T > vectorize(const Array< T > &u)
provide a const version of the above that works only with arrays that have 1 col LLL
Definition: array.hpp:1308
arr::Array::checkdims
static bool checkdims(const Vector< idx_type > &v, const Vector< idx_type > &u, idx_type i)
Definition: array.hpp:883
arr::Array::Array
Array(INIT_TYPE_TAG init, const Vector< idx_type > dim_p, const vector< Vector< zstring >> names_p=vector< Vector< zstring >>(), std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
Definition: array.hpp:164
arr::Dname
Definition: dname.hpp:32
arr::seq_n_t
Definition: array.hpp:57
arr
Contains the classes and functions that implement a multidimentional array type.
Definition: allocator.hpp:29
arr::array_from_vector
Array< T, O > array_from_vector(Vector< T, O > &&data)
Construct an array moving a vector.
Definition: array.hpp:1614
arr::checksize
bool checksize(const V &v)
Definition: array.hpp:71
arr::Array::concat
Array & concat(const U &u, const string &name="")
Definition: array.hpp:930
arr::Array::subsetRows
Array subsetRows(idx_type n, idx_type from=0, bool addrownums=false) const
Subset the specified row range:
Definition: array.hpp:734
arr::RawVector< idx_type >
arr::stdvector
std::vector< R > stdvector(const Array< T > u)
Vectorize to a std::vector of type R.
Definition: array.hpp:1285
arr::Array::getBufferSize
size_t getBufferSize() const
Definition: array.hpp:490
arr::arrvector
Vector< R > arrvector(const Array< T > u)
Vectorize to a arr::Vector of type R.
Definition: array.hpp:1295
arr::Array::Array
Array(convert_cons_t, const Array< U, OU > &u, std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
Convert to another array type.
Definition: array.hpp:350
arr::Array::Array
Array(const Vector< idx_type > dim_p, const vector< unique_ptr< Vector< T, O >>> &v_p, const vector< unique_ptr< Dname >> &names_p, std::unique_ptr< AllocFactory > &&allocf_p=std::make_unique< MemAllocFactory >())
Definition: array.hpp:300
arr::Array::Array
Array(std::unique_ptr< AllocFactory > &&allocf_p)
Definition: array.hpp:188
arr::Array
Definition: array.hpp:109
arr::Array::operator()
Array & operator()(const vector< INDEX > &i, U u)
subassign a scalar.
Definition: array.hpp:686
arr::seq_to_t
Definition: array.hpp:56
arr::Array::operator()
Array & operator()(const vector< INDEX > &i, const Array< U > &u)
subassign an array.
Definition: array.hpp:652
arr::checkdims
bool checkdims(const V &v)
Definition: array.hpp:84
arr::cumul_until
R cumul_until(const Array< T, O > &t, const R &init, const R &until)
among other things, this allows to implement an 'all' and 'any' function.
Definition: array.hpp:1590