Browse Source

Simple ZARR format reader

lintest
Michael Uleysky 10 months ago
parent
commit
8a4fbea40a
  1. 822
      include/nczarrcommon.h
  2. 174
      include/zarr.h
  3. 5
      src/CMakeLists.txt
  4. 236
      src/zarr.cpp

822
include/nczarrcommon.h

@ -0,0 +1,822 @@
#pragma once
#include "merrors.h"
#include <utility>
#include <variant>
using michlib::Error;
using michlib::int1;
using michlib::int2;
using michlib::int4;
using michlib::int8;
using michlib::int_cast;
using michlib::MString;
using michlib::RetVal;
using michlib::uint1;
using michlib::uint8;
class NcZarrTypes
{
protected:
using AttVT = std::variant<std::monostate, int8, uint8, double, MString, bool>;
class ArrCounter
{
using VT = std::vector<size_t>;
const VT count;
VT ind;
bool end;
public:
static size_t Index(const VT& i, const VT& c)
{
size_t out = 0;
size_t mul = 1;
for(size_t ii = i.size(); ii != 0; ii--)
{
out += mul * i[ii - 1];
mul *= c[ii - 1];
}
return out;
}
static VT Index(size_t lind, const VT& c)
{
VT out(c.size());
size_t j = lind;
for(auto i = c.size(); i > 0; i--)
{
out[i - 1] = j % c[i - 1];
j = j / c[i - 1];
}
return out;
}
ArrCounter() = delete;
ArrCounter(const VT& cnt): count(cnt), ind(cnt.size(), 0), end(false) {}
size_t operator[](size_t i) const { return ind[i]; }
ArrCounter& operator++()
{
size_t curind = count.size();
while(curind != 0)
{
ind[curind - 1]++;
if(ind[curind - 1] >= count[curind - 1])
{
ind[curind - 1] = 0;
curind--;
}
else
return *this;
}
ind = count;
end = true;
return *this;
}
explicit operator bool() const { return !end; }
size_t Index() const { return Index(ind, count); }
size_t Index(const VT& i) const { return Index(i, count); }
VT Index(size_t lind) const { return Index(lind, count); }
size_t Count(size_t i) const { return count[i]; }
const VT& VIndex() const { return ind; }
VT VIndex(const VT& start) const
{
VT out(ind.size());
for(size_t i = 0; i < ind.size(); i++) out[i] = ind[i] + start[i];
return out;
}
const auto& Count() const { return count; }
size_t N() const
{
size_t out = 1;
for(size_t i = 0; i < count.size(); i++) out *= count[i];
return out;
}
};
public:
enum class AttType
{
UNDEF,
INT,
UINT,
REAL,
STRING,
BOOL
};
enum class VarType
{
UNDEF,
FLOAT,
DOUBLE,
INT1,
INT2,
INT4,
INT8,
UINT1
};
protected:
template<VarType VT, class Dummy = void> struct VarType2Type;
template<class Dummy> struct VarType2Type<VarType::FLOAT, Dummy>
{
using type = float;
};
template<class Dummy> struct VarType2Type<VarType::DOUBLE, Dummy>
{
using type = double;
};
template<class Dummy> struct VarType2Type<VarType::INT1, Dummy>
{
using type = int1;
};
template<class Dummy> struct VarType2Type<VarType::INT2, Dummy>
{
using type = int2;
};
template<class Dummy> struct VarType2Type<VarType::INT4, Dummy>
{
using type = int4;
};
template<class Dummy> struct VarType2Type<VarType::INT8, Dummy>
{
using type = int8;
};
template<class Dummy> struct VarType2Type<VarType::UINT1, Dummy>
{
using type = uint1;
};
template<VarType VT> using Type = VarType2Type<VT>::type;
static constexpr size_t SizeOf(VarType vt)
{
switch(vt)
{
case(VarType::UNDEF): return 0;
case(VarType::FLOAT): return sizeof(Type<VarType::FLOAT>);
case(VarType::DOUBLE): return sizeof(Type<VarType::DOUBLE>);
case(VarType::INT1): return sizeof(Type<VarType::INT1>);
case(VarType::INT2): return sizeof(Type<VarType::INT2>);
case(VarType::INT4): return sizeof(Type<VarType::INT4>);
case(VarType::INT8): return sizeof(Type<VarType::INT8>);
case(VarType::UINT1): return sizeof(Type<VarType::UINT1>);
}
return 0;
}
template<class T> static size_t FindInd(const MString& name, const std::vector<T>& arr)
{
for(size_t i = 0; i < arr.size(); i++)
if(arr[i].Name() == name) return i;
return arr.size();
}
class Attribute: public AttVT
{
MString name;
public:
Attribute(const MString& n, AttVT&& v): AttVT(std::move(v)), name(n) {}
Attribute(const std::string& n, AttVT&& v): AttVT(std::move(v)), name(n.c_str(), n.size()) {}
const MString& Name() const { return name; }
AttType Type() const
{
if(std::holds_alternative<int8>(*this))
return AttType::INT;
else if(std::holds_alternative<uint8>(*this))
return AttType::UINT;
else if(std::holds_alternative<double>(*this))
return AttType::REAL;
else if(std::holds_alternative<MString>(*this))
return AttType::STRING;
else if(std::holds_alternative<bool>(*this))
return AttType::BOOL;
return AttType::UNDEF;
}
int8 I() const
{
if(std::holds_alternative<int8>(*this))
return std::get<int8>(*this);
else if(std::holds_alternative<uint8>(*this))
return int_cast<int8>(std::get<uint8>(*this));
else if(std::holds_alternative<double>(*this))
return static_cast<int8>(std::get<double>(*this));
else if(std::holds_alternative<MString>(*this))
return std::get<MString>(*this).ToInteger<int8>();
else if(std::holds_alternative<bool>(*this))
return std::get<bool>(*this) ? 1 : 0;
return 0;
}
uint8 U() const
{
if(std::holds_alternative<int8>(*this))
return int_cast<uint8>(std::get<int8>(*this));
else if(std::holds_alternative<uint8>(*this))
return std::get<uint8>(*this);
else if(std::holds_alternative<double>(*this))
return static_cast<uint8>(std::get<double>(*this));
else if(std::holds_alternative<MString>(*this))
return std::get<MString>(*this).ToInteger<uint8>();
else if(std::holds_alternative<bool>(*this))
return std::get<bool>(*this) ? 1 : 0;
return 0;
}
double D() const
{
if(std::holds_alternative<int8>(*this))
return std::get<int8>(*this);
else if(std::holds_alternative<uint8>(*this))
return std::get<uint8>(*this);
else if(std::holds_alternative<double>(*this))
return std::get<double>(*this);
else if(std::holds_alternative<MString>(*this))
return michlib_internal::RealType<sizeof(double)>::String2Real(std::get<MString>(*this).Buf());
else if(std::holds_alternative<bool>(*this))
return std::get<bool>(*this) ? 1 : 0;
return 0;
}
MString S() const
{
if(std::holds_alternative<int8>(*this))
return MString().FromInt(std::get<int8>(*this));
else if(std::holds_alternative<uint8>(*this))
return MString().FromUInt(std::get<uint8>(*this));
else if(std::holds_alternative<double>(*this))
return MString().FromReal(std::get<double>(*this));
else if(std::holds_alternative<MString>(*this))
return std::get<MString>(*this);
else if(std::holds_alternative<bool>(*this))
return MString().FromBool(std::get<bool>(*this));
return "";
}
bool B() const
{
if(std::holds_alternative<int8>(*this))
return std::get<int8>(*this) != 0;
else if(std::holds_alternative<uint8>(*this))
return std::get<uint8>(*this) != 0;
else if(std::holds_alternative<double>(*this))
return std::get<double>(*this) != 0.0;
else if(std::holds_alternative<MString>(*this))
return std::get<MString>(*this).ToBool();
else if(std::holds_alternative<bool>(*this))
return std::get<bool>(*this);
return false;
}
};
class Dimension
{
MString name;
size_t size;
public:
Dimension(const MString& str, size_t num): name(str), size(num) {}
const MString& Name() const { return name; }
size_t Size() const { return size; }
};
class Variable
{
public:
using FillType = std::variant<std::monostate, int8, uint8, double>;
private:
MString name;
VarType type = VarType::UNDEF;
std::vector<size_t> dims;
std::vector<Attribute> atts;
FillType fill;
public:
Variable(const MString& name_, VarType type_, std::vector<size_t>&& dims_, std::vector<Attribute>&& atts_, FillType fill_ = 0):
name(name_), type(type_), dims(std::move(dims_)), atts(std::move(atts_)), fill(fill_)
{
}
explicit operator bool() const { return type != VarType::UNDEF; }
const auto& Dims() const { return dims; }
size_t NDim() const { return dims.size(); }
size_t NAtt() const { return atts.size(); }
auto AttNames() const
{
std::vector<MString> out;
std::transform(atts.cbegin(), atts.cend(), std::back_inserter(out), [](const Attribute& a) { return a.Name(); });
return out;
}
AttType AttT(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].Type() : AttType::UNDEF;
}
int8 AttInt(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].I() : 0;
}
uint8 AttUInt(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].U() : 0;
}
double AttReal(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].D() : 0.0;
}
MString AttString(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].S() : MString();
}
bool AttBool(const MString& name) const
{
size_t ind = FindInd(name, atts);
return ind < atts.size() ? atts[ind].B() : false;
}
const MString& Name() const { return name; }
auto Type() const { return type; }
const auto& Fill() const { return fill; }
};
protected:
std::vector<Attribute> gats;
std::vector<Dimension> dims;
std::vector<Variable> vars;
public:
operator bool() const { return !vars.empty(); }
size_t NDim() const { return dims.size(); }
size_t NDim(const MString& var) const
{
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].NDim() : 0;
}
size_t NAtt() const { return gats.size(); }
auto AttNames() const
{
std::vector<MString> out;
std::transform(gats.cbegin(), gats.cend(), std::back_inserter(out), [](const Attribute& a) { return a.Name(); });
return out;
}
size_t NAtt(const MString& var) const
{
if(!var.Exist()) return NAtt();
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].NAtt() : 0;
}
auto AttNames(const MString& var) const
{
if(!var.Exist()) return AttNames();
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttNames() : decltype(AttNames())();
}
auto VarNames() const
{
std::vector<MString> out;
std::transform(vars.cbegin(), vars.cend(), std::back_inserter(out), [](const Variable& v) { return v.Name(); });
return out;
}
VarType VarT(const MString& var) const
{
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].Type() : VarType::UNDEF;
}
auto VarFill(const MString& var) const
{
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].Fill() : Variable::FillType();
}
auto DimNames() const
{
std::vector<MString> out;
std::transform(dims.cbegin(), dims.cend(), std::back_inserter(out), [](const Dimension& d) { return d.Name(); });
return out;
}
auto DimNames(const MString& var) const
{
size_t ind = FindInd(var, vars);
std::vector<MString> out;
if(ind >= vars.size()) return out;
auto vdims = vars[ind].Dims();
std::transform(vdims.cbegin(), vdims.cend(), std::back_inserter(out), [&dims = std::as_const(dims)](const size_t& i) { return dims[i].Name(); });
return out;
}
size_t DimSize(const MString& dim) const
{
size_t ind = FindInd(dim, dims);
return ind < dims.size() ? dims[ind].Size() : 0;
}
AttType AttT(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].Type() : AttType::UNDEF;
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttT(name) : AttType::UNDEF;
}
int8 AttInt(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].I() : 0;
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttInt(name) : 0;
}
uint8 AttUInt(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].U() : 0;
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttUInt(name) : 0;
}
double AttReal(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].D() : 0.0;
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttReal(name) : 0.0;
}
MString AttString(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].S() : MString();
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttString(name) : MString();
}
bool AttBool(const MString& var, const MString& name) const
{
if(!var.Exist())
{
size_t ind = FindInd(name, gats);
return ind < gats.size() ? gats[ind].B() : false;
}
size_t ind = FindInd(var, vars);
return ind < vars.size() ? vars[ind].AttBool(name) : false;
}
auto AttT(const MString& name) const { return AttT("", name); }
auto AttInt(const MString& name) const { return AttInt("", name); }
auto AttUInt(const MString& name) const { return AttUInt("", name); }
auto AttReal(const MString& name) const { return AttReal("", name); }
auto AttString(const MString& name) const { return AttString("", name); }
auto AttBool(const MString& name) const { return AttBool("", name); }
bool HasDim(const MString& name) const { return FindInd(name, dims) < dims.size(); }
bool HasVar(const MString& name) const { return FindInd(name, vars) < vars.size(); }
bool HasAtt(const MString& vname, const MString& aname) const { return AttT(vname, aname) != AttType::UNDEF; }
bool HasAtt(const MString& aname) const { return AttT(aname) != AttType::UNDEF; }
};
class DimReqDef
{
protected:
struct DimReq
{
static const auto fill = std::numeric_limits<size_t>::max();
MString name;
size_t beg, count;
DimReq(): name(MString()), beg(fill), count(fill) {}
DimReq(const char* n): name(n), beg(fill), count(fill) {}
DimReq(const MString& n): name(n), beg(fill), count(fill) {}
DimReq(MString&& n): name(std::move(n)), beg(fill), count(fill) {}
DimReq(const char* n, size_t s): name(n), beg(s), count(fill) {}
DimReq(const MString& n, size_t s): name(n), beg(s), count(fill) {}
DimReq(MString&& n, size_t s): name(std::move(n)), beg(s), count(fill) {}
DimReq(const char* n, size_t s, size_t c): name(n), beg(s), count(c) {}
DimReq(const MString& n, size_t s, size_t c): name(n), beg(s), count(c) {}
DimReq(MString&& n, size_t s, size_t c): name(std::move(n)), beg(s), count(c) {}
const MString& Name() const { return name; }
};
};
template<class C> class NcZarrRead: public C, public DimReqDef
{
template<class Data> static constexpr size_t Dimensionity()
{
if constexpr(requires(Data& d) { d(0, 0, 0, 0); }) return 4;
if constexpr(requires(Data& d) { d(0, 0, 0); }) return 3;
if constexpr(requires(Data& d) { d(0, 0); }) return 2;
if constexpr(requires(Data& d) { d(0); }) return 1;
return 0;
}
template<class Data, size_t D, class Dummy = void> struct DataTypeExtractorS;
template<class Data, class Dummy> struct DataTypeExtractorS<Data, 1, Dummy>
{
using type = std::decay_t<decltype(std::declval<Data>()(0))>;
};
template<class Data, class Dummy> struct DataTypeExtractorS<Data, 2, Dummy>
{
using type = std::decay_t<decltype(std::declval<Data>()(0, 0))>;
};
template<class Data, class Dummy> struct DataTypeExtractorS<Data, 3, Dummy>
{
using type = std::decay_t<decltype(std::declval<Data>()(0, 0, 0))>;
};
template<class Data, class Dummy> struct DataTypeExtractorS<Data, 4, Dummy>
{
using type = std::decay_t<decltype(std::declval<Data>()(0, 0, 0, 0))>;
};
template<class Data> using DataTypeExtractor = DataTypeExtractorS<Data, Dimensionity<Data>()>::type;
template<class VType, class Data, class Transform>
Error Read(const MString& vname, const std::vector<size_t>& transindex, Data& data, Transform transform, std::vector<DimReq> reqs) const
{
size_t nval = 1;
for(const auto& r: reqs) nval *= r.count;
const size_t indim = reqs.size();
constexpr size_t outdim = Dimensionity<Data>();
std::vector<size_t> start;
std::vector<size_t> count;
start.resize(indim);
count.resize(indim);
for(size_t i = 0; i < indim; i++)
{
start[i] = reqs[i].beg;
count[i] = reqs[i].count;
}
using DataType = DataTypeExtractor<Data>;
DataType fillout;
bool havefill = C::VarFill(vname).index() > 0;
VType fillin = std::visit(
[](auto v)
{
if constexpr(std::is_convertible_v<decltype(v), VType>)
return static_cast<VType>(v);
else
return std::numeric_limits<VType>::max();
},
C::VarFill(vname));
if constexpr(requires(Data& d) { // Data have own fillvalue
{
d.Fillval()
} -> std::convertible_to<DataType>;
})
fillout = data.Fillval();
else // Data does'nt have own fillvalue, using variable fillvalue
fillout = static_cast<DataType>(fillin);
auto ret = C::template Read<VType>(vname, start.data(), count.data());
if(!ret) return ret;
const auto& rawdata = ret.Value();
std::vector<size_t> mul(indim, 1);
for(size_t i = indim - 1; i > 0; i--) mul[i - 1] = mul[i] * count[i];
size_t inind = 0;
for(typename C::ArrCounter i(count); i; ++i)
{
// TODO: Remove this testing block
size_t cind = 0;
for(size_t j = 0; j < indim; j++) cind += i[j] * mul[j];
if(cind != inind) return {"NcZarrRead::Read", "Internal error"};
if(i.Index() != inind) return {"NcZarrRead::Read", "Internal error"};
if(inind != i.Index(i.Index(inind, count), count)) return {"NcZarrRead::Read", "Internal error"};
DataType out;
const VType& in = rawdata(inind);
if(havefill && in == fillin)
out = fillout;
else
out = transform(in);
if constexpr(outdim == 1)
data(i[transindex[0]]) = out;
else if constexpr(outdim == 2)
data(i[transindex[0]], i[transindex[1]]) = out;
else if constexpr(outdim == 3)
data(i[transindex[0]], i[transindex[1]], i[transindex[2]]) = out;
else if constexpr(outdim == 4)
data(i[transindex[0]], i[transindex[1]], i[transindex[2]], i[transindex[3]]) = out;
inind++;
}
michlib::message("Variable " + vname + ", request size " + nval);
for(const auto& r: reqs) michlib::message(r.name + " from " + r.beg + ", count " + r.count);
return Error();
}
public:
// Request is string
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, const char* request) const
{
return Read(vname, data, transform, MString(request));
}
// Request by one dimension
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1) const
{
return Read(vname, data, transform, std::vector<DimReq>{std::move(req1)});
}
// Request by two dimension
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2) const
{
return Read(vname, data, transform, std::vector<DimReq>{std::move(req1), std::move(req2)});
}
// Request by three dimension
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2, DimReq&& req3) const
{
return Read(vname, data, transform, std::vector<DimReq>{std::move(req1), std::move(req2), std::move(req3)});
}
// Request by four dimension
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2, DimReq&& req3, DimReq&& req4) const
{
return Read(vname, data, transform, std::vector<DimReq>{std::move(req1), std::move(req2), std::move(req3), std::move(req4)});
}
// Request full variable
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform) const
{
static const MString pref = "NcZarrRead::Read";
if(!C::HasVar(vname)) return {pref, "Variable " + vname + " not found"};
std::vector<struct DimReq> pdims;
const auto vdims = C::DimNames(vname);
std::transform(
vdims.cbegin(), vdims.cend(), std::back_inserter(pdims), [this](const MString& n) -> struct DimReq {
return {n, 0, C::DimSize(n)};
});
return Read(vname, data, transform, pdims);
}
// Base function for all Read's
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, std::vector<DimReq> reqs) const
{
static const MString pref = "NcZarrRead::Read";
if(!C::HasVar(vname)) return {pref, "Variable " + vname + " not found"};
std::vector<struct DimReq> pdims;
{
const auto vdims = C::DimNames(vname);
std::transform(
vdims.cbegin(), vdims.cend(), std::back_inserter(pdims), [](const MString& n) -> struct DimReq {
return {n, 0, 1};
});
}
std::vector<size_t> transindex;
// Parse request
if(reqs.size() == 0) return {pref, "Empty request"};
for(const auto& req: reqs)
{
size_t ind = C::FindInd(req.name, pdims);
if(ind >= pdims.size()) return {pref, "Variable " + vname + " has no dimension " + req.name};
for(size_t i = 0; i < transindex.size(); i++)
if(transindex[i] == ind) return {pref, "Parameters for dimension " + req.name + " already defined"};
transindex.push_back(ind);
size_t dlen = C::DimSize(pdims[ind].name);
if(req.beg == req.fill && req.count == req.fill) // Only name, so, we request full length
{
pdims[ind].beg = 0;
pdims[ind].count = dlen;
}
else if(req.count == req.fill) // Name and first index
{
pdims[ind].beg = req.beg;
pdims[ind].count = 1;
}
else // Name, first index, count
{
pdims[ind].beg = req.beg;
pdims[ind].count = req.count;
}
// Sanity checks
if(pdims[ind].count <= 0) return {pref, "Error parsing request: count must be greter then zero"};
if(pdims[ind].beg >= dlen) return {pref, MString("Error parsing request: start index ") + pdims[ind].beg + " must be lesser then " + pdims[ind].name + " size " + dlen};
if(pdims[ind].beg + pdims[ind].count > dlen)
return {pref, MString("Error parsing request: start index ") + pdims[ind].beg + " with count " + pdims[ind].count + " exceeds " + pdims[ind].name + " size " + dlen};
}
if(transindex.size() != Dimensionity<Data>()) return {pref, "Output data dimensions not correspondind request dimensions"};
switch(C::VarT(vname))
{
case(C::VarType::UNDEF): return {pref, "No variable with name " + vname + " (impossible)"};
case(C::VarType::FLOAT): return Read<typename C::template Type<C::VarType::FLOAT>>(vname, transindex, data, transform, pdims);
case(C::VarType::DOUBLE): return Read<typename C::template Type<C::VarType::DOUBLE>>(vname, transindex, data, transform, pdims);
case(C::VarType::INT1): return Read<typename C::template Type<C::VarType::INT1>>(vname, transindex, data, transform, pdims);
case(C::VarType::INT2): return Read<typename C::template Type<C::VarType::INT2>>(vname, transindex, data, transform, pdims);
case(C::VarType::INT4): return Read<typename C::template Type<C::VarType::INT4>>(vname, transindex, data, transform, pdims);
case(C::VarType::INT8): return Read<typename C::template Type<C::VarType::INT8>>(vname, transindex, data, transform, pdims);
case(C::VarType::UINT1): return Read<typename C::template Type<C::VarType::UINT1>>(vname, transindex, data, transform, pdims);
}
return {pref, "Internal error (impossible)"};
}
// Request by string argument
template<class Data, class Transform> Error Read(const MString& vname, Data& data, Transform transform, const MString& request) const
{
static const MString pref = "NcZarrRead::Read";
std::vector<struct DimReq> pdims;
// Parse request
const auto dimdesc = request.Split(";, \t");
if(dimdesc.size() == 0) return {pref, "Empty request"};
for(const auto& dd: dimdesc)
{
const auto dimpar = dd.Split(":", true);
if(dimpar.size() == 1) // Only name, so, we request full length
pdims.emplace_back(dimpar[0]);
else if(dimpar.size() == 2) // Name and first index
pdims.emplace_back(dimpar[0], dimpar[1].ToInteger<size_t>());
else if(dimpar.size() == 3) // Name, first index, count
pdims.emplace_back(dimpar[0], dimpar[1].ToInteger<size_t>(), dimpar[2].ToInteger<size_t>());
else
return {pref, "Can't parse expression " + dd};
}
return Read(vname, data, transform, pdims);
}
};

174
include/zarr.h

@ -0,0 +1,174 @@
#pragma once
#include "GPL.h"
#include "cache.h"
#include "curlfuncs.h"
#include "nczarrcommon.h"
#include <json/json.h>
#include <variant>
class ZarrTypes: public NcZarrTypes
{
protected:
template<class VType> class ReadedData
{
//public:
using Vec = std::vector<size_t>;
private:
Vec start, chunkstart;
ArrCounter mainind, chunkind, inchunkind;
std::vector<std::unique_ptr<VType[]>> data;
public:
ReadedData():mainind(Vec()),chunkind(Vec()),inchunkind(Vec()){}
ReadedData(size_t N, const size_t* start, const size_t* count, const size_t* csize, std::vector<std::unique_ptr<VType[]>>&& d):
start(start, start + N),
chunkstart(
[](size_t N, const size_t* st, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i];
return out;
}(N, start, csize)),
mainind(Vec(count, count + N)),
chunkind(
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1;
return out;
}(N, start, count, csize)),
inchunkind(Vec(csize, csize + N)),
data(std::move(d))
{
}
VType operator()(size_t lini) const
{
Vec ind = mainind.Index(lini, mainind.Count());
Vec cind(ind.size()), inind(ind.size());
for(size_t i = 0; i < ind.size(); i++)
{
cind[i] = (ind[i] + start[i]) / inchunkind.Count(i) - chunkstart[i]; // indes of chunk
inind[i] = (ind[i] + start[i]) % inchunkind.Count(i); // index inside chunk
}
size_t chunk = chunkind.Index(cind);
size_t inside = inchunkind.Index(inind);
return data[chunk][inside];
}
};
private:
// Create attribute from json value
static AttVT CreateAtt(const Json::Value& val)
{
if(val.type() == Json::intValue) return AttVT{std::in_place_type<int8>, val.asInt64()};
if(val.type() == Json::uintValue) return AttVT(std::in_place_type<uint8>, val.asUInt64());
if(val.type() == Json::realValue) return AttVT(std::in_place_type<double>, val.asDouble());
if(val.type() == Json::stringValue)
{
auto str = val.asString();
return AttVT(std::in_place_type<MString>, MString(str.c_str(), str.size()));
}
if(val.type() == Json::booleanValue) return AttVT(std::in_place_type<bool>, val.asBool());
return AttVT();
}
public:
// Read attributes from .zattrs
static auto ReadAtts(const Json::Value& obj)
{
std::vector<Attribute> out;
if(obj.type() != Json::objectValue) return out;
const auto keys = obj.getMemberNames();
for(const auto& key: keys)
if(key != "_ARRAY_DIMENSIONS") out.emplace_back(key, CreateAtt(obj[key]));
return out;
}
};
class ZarrFunctions: public ZarrTypes
{
std::unique_ptr<GenericCache> cache;
CURLRAII chandle;
MString url;
std::vector<std::vector<size_t>> chunks;
// Find variable names in metadata
static std::vector<MString> ReadVarNames(const Json::Value& meta);
Error AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray);
protected:
ZarrFunctions()
{
auto oldprefix = michlib::GPL.UsePrefix("ZARR");
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", "")));
michlib::GPL.UsePrefix(oldprefix);
if(!cache)
{
michlib::errmessage("Can't init data cache");
cache.reset(new FakeCache);
}
}
template<class VType> RetVal<ReadedData<VType>> Read(const MString& var, const size_t* start, const size_t* count) const
{
using Vec = std::vector<size_t>;
size_t ind = FindInd(var, vars);
const size_t N = vars[ind].NDim();
const auto& csize = chunks[ind];
Vec chunkstart(
[](size_t N, const size_t* st, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i];
return out;
}(N, start, csize.data()));
ArrCounter chunkind(
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1;
return out;
}(N, start, count, csize.data()));
bool havefill = vars[ind].Fill().index() > 0;
VType fill = std::visit(
[](auto v)
{
if constexpr(std::is_convertible_v<decltype(v), VType>)
return static_cast<VType>(v);
else
return std::numeric_limits<VType>::max();
},
vars[ind].Fill());
std::vector<std::unique_ptr<VType[]>> cdata;
size_t chunksize = 1;
for(const auto c: csize) chunksize *= c;
cdata.resize(chunkind.N());
for(; chunkind; ++chunkind)
{
cdata[chunkind.Index()].reset(new VType[chunksize]);
auto res = GetChunk(var, chunkind.VIndex(chunkstart), chunksize, sizeof(VType), cdata[chunkind.Index()].get(),havefill?&fill:nullptr);
if(!res) return res;
}
return ReadedData<VType>(N, start, count, csize.data(), std::move(cdata));
}
Error GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const;
public:
Error Open(const MString& product, const MString& dataset, bool time = true);
};
using Zarr = NcZarrRead<ZarrFunctions>;

5
src/CMakeLists.txt

@ -9,16 +9,17 @@ find_package(CURL REQUIRED)
find_package(LibXml2 REQUIRED)
find_package(SQLite3 REQUIRED)
pkg_check_modules(JSONCPP REQUIRED jsoncpp)
pkg_check_modules(BLOSC REQUIRED blosc)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
include_directories(${JSONCPP_INCLUDE_DIRS} ${LIBXML2_INCLUDE_DIRS} ${SQLite3_INCLUDE_DIRS})
include_directories(${JSONCPP_INCLUDE_DIRS} ${BLOSC_INCLUDE_DIRS} ${LIBXML2_INCLUDE_DIRS} ${SQLite3_INCLUDE_DIRS})
file(GLOB srcs CONFIGURE_DEPENDS *.cpp)
add_executable(${EXENAME} ${srcs} ${ACTIONLISTINC} ${SOURCELISTINC})
target_include_directories(${EXENAME} PRIVATE ../michlib/michlib ${CMAKE_CURRENT_BINARY_DIR}/../include)
target_link_libraries(${EXENAME} ${linker_options} ${netcdf} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos)
target_link_libraries(${EXENAME} ${linker_options} ${netcdf} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} ${BLOSC_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos)
set_target_properties(${EXENAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS ${EXENAME})

236
src/zarr.cpp

@ -0,0 +1,236 @@
#define MICHLIB_NOSOURCE
#include "zarr.h"
#include "copcat.h"
#include <blosc.h>
std::vector<MString> ZarrFunctions::ReadVarNames(const Json::Value& meta)
{
std::vector<MString> out;
if(meta.type() != Json::objectValue) return out;
const auto keys = meta.getMemberNames();
for(const auto& key: keys)
{
if(!key.ends_with("/.zarray")) continue;
const auto vname = key.substr(0, key.size() - 8);
const auto& zattr = meta[vname + "/.zattrs"];
if(!(zattr && zattr.type() == Json::objectValue)) continue;
MString name(vname.c_str(), vname.size());
bool found = false;
for(size_t id = 0; id < out.size(); id++)
if(out[id] == name)
{
found = true;
break;
}
if(!found) out.emplace_back(std::move(name));
}
return out;
}
Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray)
{
static const MString pref = "Zarr::AddVar";
VarType newtype;
Variable::FillType fill;
// Checks for parameters in zarray
{
const auto& cid = zarray["compressor"]["id"];
if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())};
}
{
const auto& zf = zarray["zarr_format"];
if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())};
}
{
const auto& ord = zarray["order"];
if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"};
}
{
const auto& f = zarray["filters"];
if(f.type() != Json::nullValue) return {pref, "Filters is not null"};
}
// Read dtype
{
const auto& dtype = zarray["dtype"];
if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"};
const auto str = dtype.asString();
if(str == "<f4")
newtype = VarType::FLOAT;
else if(str == "<f8")
newtype = VarType::DOUBLE;
else if(str == "|i1")
newtype = VarType::INT1;
else if(str == "|u1")
newtype = VarType::UINT1;
else if(str == "<i2")
newtype = VarType::INT2;
else if(str == "<i4")
newtype = VarType::INT4;
else if(str == "<i8")
newtype = VarType::INT8;
else
return {pref, "Unsupported datatype: " + MString(str.c_str())};
}
// Read fill_value
{
const auto& fillval = zarray["fill_value"];
if(!fillval) return {pref, "No fillval"};
if(fillval.type() == Json::uintValue)
fill = fillval.asUInt64();
else if(fillval.type() == Json::intValue)
fill = fillval.asInt64();
else if(fillval.type() == Json::realValue)
fill = fillval.asDouble();
else if(fillval.type() == Json::stringValue && fillval.asString() == "NaN")
fill = NAN;
}
// Read attributes
auto atts = ReadAtts(zattrs);
std::vector<MString> dnames;
std::vector<size_t> dsizes;
std::vector<size_t> csizes;
std::vector<size_t> dids;
// Read dimensions names
{
const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"];
if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"};
for(Json::ArrayIndex i = 0; i < arrdim.size(); i++)
if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue)
{
const auto val = dim.asString();
dnames.emplace_back(val.c_str(), val.size());
}
}
// Read dimensions sizes
{
const auto& shape = zarray["shape"];
if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"};
for(Json::ArrayIndex i = 0; i < shape.size(); i++)
if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt());
}
// Read chunk sizes
{
const auto& chunk = zarray["chunks"];
if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"};
for(Json::ArrayIndex i = 0; i < chunk.size(); i++)
if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt());
}
if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"};
dids.resize(dnames.size());
// Check dimensions names and sizes
for(size_t i = 0; i < dnames.size(); i++)
{
bool found = false;
for(size_t id = 0; id < dims.size(); id++)
if(dims[id].Name() == dnames[i])
{
found = true;
if(dims[id].Size() != dsizes[i])
return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]};
dids[i] = id;
break;
}
if(!found)
{
dids[i] = dims.size();
dims.emplace_back(dnames[i], dsizes[i]);
}
}
vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill);
chunks.push_back(std::move(csizes));
return Error();
}
Error ZarrFunctions::GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const
{
static const MString pref = "Zarr::GetChunk";
MString str = url + "/" + var + "/";
for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]);
auto [content, suc] = cache->Get(str);
if(!suc)
{
michlib::message(str + " not found in cache, downloading");
auto [out, res] = GetUrl(chandle, str);
if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err());
long respcode;
curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode);
michlib::message("Response: ", respcode);
if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill
cache->Put(str, out, 3600);
content = std::move(out);
}
if(content.Exist())
{
size_t nb, cb, bs;
blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs);
if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb);
if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize);
auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1);
if(int_cast<size_t>(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize);
}
else
{
if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err());
for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize);
}
return Error();
}
Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time)
{
static const MString pref = "Zarr::Open";
gats.clear();
dims.clear();
vars.clear();
CopernicusCatalog cat;
Json::Value json;
{
auto urlret = time ? cat.DatasetTimeURL(product, dataset) : cat.DatasetGeoURL(product, dataset);
if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + dataset + " of product " + product);
url = urlret.Value();
auto ret = cat.GetJSON(url + "/.zmetadata");
if(ret)
json = ret.Value();
else
return ret.Add(pref, "can't download .zmetadata");
}
const auto& meta = json["metadata"];
if(!meta) return {pref, "No \"metadata\" key in JSON data"};
if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]);
auto vnames = ReadVarNames(meta);
for(size_t i = 0; i < vnames.size(); i++)
{
auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]);
if(!err) return err.Add(pref, "Can't init variable " + vnames[i]);
}
return Error();
}
Loading…
Cancel
Save