diff --git a/include/nczarrcommon.h b/include/nczarrcommon.h new file mode 100644 index 0000000..a1a6807 --- /dev/null +++ b/include/nczarrcommon.h @@ -0,0 +1,822 @@ +#pragma once +#include "merrors.h" +#include +#include + +using michlib::Error; +using michlib::int1; +using michlib::int2; +using michlib::int4; +using michlib::int8; +using michlib::int_cast; +using michlib::MString; +using michlib::RetVal; +using michlib::uint1; +using michlib::uint8; + +class NcZarrTypes +{ + protected: + using AttVT = std::variant; + + class ArrCounter + { + using VT = std::vector; + const VT count; + VT ind; + bool end; + + public: + static size_t Index(const VT& i, const VT& c) + { + size_t out = 0; + size_t mul = 1; + for(size_t ii = i.size(); ii != 0; ii--) + { + out += mul * i[ii - 1]; + mul *= c[ii - 1]; + } + return out; + } + + static VT Index(size_t lind, const VT& c) + { + VT out(c.size()); + + size_t j = lind; + + for(auto i = c.size(); i > 0; i--) + { + out[i - 1] = j % c[i - 1]; + j = j / c[i - 1]; + } + + return out; + } + + ArrCounter() = delete; + ArrCounter(const VT& cnt): count(cnt), ind(cnt.size(), 0), end(false) {} + + size_t operator[](size_t i) const { return ind[i]; } + + ArrCounter& operator++() + { + size_t curind = count.size(); + while(curind != 0) + { + ind[curind - 1]++; + if(ind[curind - 1] >= count[curind - 1]) + { + ind[curind - 1] = 0; + curind--; + } + else + return *this; + } + ind = count; + end = true; + return *this; + } + + explicit operator bool() const { return !end; } + + size_t Index() const { return Index(ind, count); } + size_t Index(const VT& i) const { return Index(i, count); } + VT Index(size_t lind) const { return Index(lind, count); } + + size_t Count(size_t i) const { return count[i]; } + + const VT& VIndex() const { return ind; } + VT VIndex(const VT& start) const + { + VT out(ind.size()); + for(size_t i = 0; i < ind.size(); i++) out[i] = ind[i] + start[i]; + return out; + } + + const auto& Count() const { return count; } + + size_t N() const + { + size_t out = 1; + for(size_t i = 0; i < count.size(); i++) out *= count[i]; + return out; + } + }; + + public: + enum class AttType + { + UNDEF, + INT, + UINT, + REAL, + STRING, + BOOL + }; + enum class VarType + { + UNDEF, + FLOAT, + DOUBLE, + INT1, + INT2, + INT4, + INT8, + UINT1 + }; + + protected: + template struct VarType2Type; + + template struct VarType2Type + { + using type = float; + }; + template struct VarType2Type + { + using type = double; + }; + template struct VarType2Type + { + using type = int1; + }; + template struct VarType2Type + { + using type = int2; + }; + template struct VarType2Type + { + using type = int4; + }; + template struct VarType2Type + { + using type = int8; + }; + template struct VarType2Type + { + using type = uint1; + }; + + template using Type = VarType2Type::type; + + static constexpr size_t SizeOf(VarType vt) + { + switch(vt) + { + case(VarType::UNDEF): return 0; + case(VarType::FLOAT): return sizeof(Type); + case(VarType::DOUBLE): return sizeof(Type); + case(VarType::INT1): return sizeof(Type); + case(VarType::INT2): return sizeof(Type); + case(VarType::INT4): return sizeof(Type); + case(VarType::INT8): return sizeof(Type); + case(VarType::UINT1): return sizeof(Type); + } + return 0; + } + + template static size_t FindInd(const MString& name, const std::vector& arr) + { + for(size_t i = 0; i < arr.size(); i++) + if(arr[i].Name() == name) return i; + return arr.size(); + } + + class Attribute: public AttVT + { + MString name; + + public: + Attribute(const MString& n, AttVT&& v): AttVT(std::move(v)), name(n) {} + Attribute(const std::string& n, AttVT&& v): AttVT(std::move(v)), name(n.c_str(), n.size()) {} + + const MString& Name() const { return name; } + + AttType Type() const + { + if(std::holds_alternative(*this)) + return AttType::INT; + else if(std::holds_alternative(*this)) + return AttType::UINT; + else if(std::holds_alternative(*this)) + return AttType::REAL; + else if(std::holds_alternative(*this)) + return AttType::STRING; + else if(std::holds_alternative(*this)) + return AttType::BOOL; + + return AttType::UNDEF; + } + + int8 I() const + { + if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return int_cast(std::get(*this)); + else if(std::holds_alternative(*this)) + return static_cast(std::get(*this)); + else if(std::holds_alternative(*this)) + return std::get(*this).ToInteger(); + else if(std::holds_alternative(*this)) + return std::get(*this) ? 1 : 0; + return 0; + } + + uint8 U() const + { + if(std::holds_alternative(*this)) + return int_cast(std::get(*this)); + else if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return static_cast(std::get(*this)); + else if(std::holds_alternative(*this)) + return std::get(*this).ToInteger(); + else if(std::holds_alternative(*this)) + return std::get(*this) ? 1 : 0; + return 0; + } + + double D() const + { + if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return michlib_internal::RealType::String2Real(std::get(*this).Buf()); + else if(std::holds_alternative(*this)) + return std::get(*this) ? 1 : 0; + return 0; + } + + MString S() const + { + if(std::holds_alternative(*this)) + return MString().FromInt(std::get(*this)); + else if(std::holds_alternative(*this)) + return MString().FromUInt(std::get(*this)); + else if(std::holds_alternative(*this)) + return MString().FromReal(std::get(*this)); + else if(std::holds_alternative(*this)) + return std::get(*this); + else if(std::holds_alternative(*this)) + return MString().FromBool(std::get(*this)); + return ""; + } + + bool B() const + { + if(std::holds_alternative(*this)) + return std::get(*this) != 0; + else if(std::holds_alternative(*this)) + return std::get(*this) != 0; + else if(std::holds_alternative(*this)) + return std::get(*this) != 0.0; + else if(std::holds_alternative(*this)) + return std::get(*this).ToBool(); + else if(std::holds_alternative(*this)) + return std::get(*this); + return false; + } + }; + + class Dimension + { + MString name; + size_t size; + + public: + Dimension(const MString& str, size_t num): name(str), size(num) {} + const MString& Name() const { return name; } + + size_t Size() const { return size; } + }; + + class Variable + { + public: + using FillType = std::variant; + + private: + MString name; + VarType type = VarType::UNDEF; + std::vector dims; + std::vector atts; + FillType fill; + + public: + Variable(const MString& name_, VarType type_, std::vector&& dims_, std::vector&& atts_, FillType fill_ = 0): + name(name_), type(type_), dims(std::move(dims_)), atts(std::move(atts_)), fill(fill_) + { + } + + explicit operator bool() const { return type != VarType::UNDEF; } + + const auto& Dims() const { return dims; } + + size_t NDim() const { return dims.size(); } + + size_t NAtt() const { return atts.size(); } + + auto AttNames() const + { + std::vector out; + std::transform(atts.cbegin(), atts.cend(), std::back_inserter(out), [](const Attribute& a) { return a.Name(); }); + return out; + } + + AttType AttT(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].Type() : AttType::UNDEF; + } + + int8 AttInt(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].I() : 0; + } + + uint8 AttUInt(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].U() : 0; + } + + double AttReal(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].D() : 0.0; + } + + MString AttString(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].S() : MString(); + } + + bool AttBool(const MString& name) const + { + size_t ind = FindInd(name, atts); + return ind < atts.size() ? atts[ind].B() : false; + } + + const MString& Name() const { return name; } + + auto Type() const { return type; } + + const auto& Fill() const { return fill; } + }; + + protected: + std::vector gats; + std::vector dims; + std::vector vars; + + public: + operator bool() const { return !vars.empty(); } + + size_t NDim() const { return dims.size(); } + + size_t NDim(const MString& var) const + { + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].NDim() : 0; + } + + size_t NAtt() const { return gats.size(); } + + auto AttNames() const + { + std::vector out; + std::transform(gats.cbegin(), gats.cend(), std::back_inserter(out), [](const Attribute& a) { return a.Name(); }); + return out; + } + + size_t NAtt(const MString& var) const + { + if(!var.Exist()) return NAtt(); + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].NAtt() : 0; + } + + auto AttNames(const MString& var) const + { + if(!var.Exist()) return AttNames(); + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttNames() : decltype(AttNames())(); + } + + auto VarNames() const + { + std::vector out; + std::transform(vars.cbegin(), vars.cend(), std::back_inserter(out), [](const Variable& v) { return v.Name(); }); + return out; + } + + VarType VarT(const MString& var) const + { + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].Type() : VarType::UNDEF; + } + + auto VarFill(const MString& var) const + { + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].Fill() : Variable::FillType(); + } + + auto DimNames() const + { + std::vector out; + std::transform(dims.cbegin(), dims.cend(), std::back_inserter(out), [](const Dimension& d) { return d.Name(); }); + return out; + } + + auto DimNames(const MString& var) const + { + size_t ind = FindInd(var, vars); + + std::vector out; + if(ind >= vars.size()) return out; + + auto vdims = vars[ind].Dims(); + std::transform(vdims.cbegin(), vdims.cend(), std::back_inserter(out), [&dims = std::as_const(dims)](const size_t& i) { return dims[i].Name(); }); + return out; + } + + size_t DimSize(const MString& dim) const + { + size_t ind = FindInd(dim, dims); + return ind < dims.size() ? dims[ind].Size() : 0; + } + + AttType AttT(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].Type() : AttType::UNDEF; + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttT(name) : AttType::UNDEF; + } + + int8 AttInt(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].I() : 0; + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttInt(name) : 0; + } + + uint8 AttUInt(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].U() : 0; + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttUInt(name) : 0; + } + + double AttReal(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].D() : 0.0; + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttReal(name) : 0.0; + } + + MString AttString(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].S() : MString(); + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttString(name) : MString(); + } + + bool AttBool(const MString& var, const MString& name) const + { + if(!var.Exist()) + { + size_t ind = FindInd(name, gats); + return ind < gats.size() ? gats[ind].B() : false; + } + + size_t ind = FindInd(var, vars); + return ind < vars.size() ? vars[ind].AttBool(name) : false; + } + + auto AttT(const MString& name) const { return AttT("", name); } + auto AttInt(const MString& name) const { return AttInt("", name); } + auto AttUInt(const MString& name) const { return AttUInt("", name); } + auto AttReal(const MString& name) const { return AttReal("", name); } + auto AttString(const MString& name) const { return AttString("", name); } + auto AttBool(const MString& name) const { return AttBool("", name); } + + bool HasDim(const MString& name) const { return FindInd(name, dims) < dims.size(); } + bool HasVar(const MString& name) const { return FindInd(name, vars) < vars.size(); } + bool HasAtt(const MString& vname, const MString& aname) const { return AttT(vname, aname) != AttType::UNDEF; } + bool HasAtt(const MString& aname) const { return AttT(aname) != AttType::UNDEF; } +}; + +class DimReqDef +{ + protected: + struct DimReq + { + static const auto fill = std::numeric_limits::max(); + MString name; + size_t beg, count; + + DimReq(): name(MString()), beg(fill), count(fill) {} + DimReq(const char* n): name(n), beg(fill), count(fill) {} + DimReq(const MString& n): name(n), beg(fill), count(fill) {} + DimReq(MString&& n): name(std::move(n)), beg(fill), count(fill) {} + DimReq(const char* n, size_t s): name(n), beg(s), count(fill) {} + DimReq(const MString& n, size_t s): name(n), beg(s), count(fill) {} + DimReq(MString&& n, size_t s): name(std::move(n)), beg(s), count(fill) {} + DimReq(const char* n, size_t s, size_t c): name(n), beg(s), count(c) {} + DimReq(const MString& n, size_t s, size_t c): name(n), beg(s), count(c) {} + DimReq(MString&& n, size_t s, size_t c): name(std::move(n)), beg(s), count(c) {} + + const MString& Name() const { return name; } + }; +}; + +template class NcZarrRead: public C, public DimReqDef +{ + template static constexpr size_t Dimensionity() + { + if constexpr(requires(Data& d) { d(0, 0, 0, 0); }) return 4; + if constexpr(requires(Data& d) { d(0, 0, 0); }) return 3; + if constexpr(requires(Data& d) { d(0, 0); }) return 2; + if constexpr(requires(Data& d) { d(0); }) return 1; + return 0; + } + + template struct DataTypeExtractorS; + + template struct DataTypeExtractorS + { + using type = std::decay_t()(0))>; + }; + template struct DataTypeExtractorS + { + using type = std::decay_t()(0, 0))>; + }; + template struct DataTypeExtractorS + { + using type = std::decay_t()(0, 0, 0))>; + }; + template struct DataTypeExtractorS + { + using type = std::decay_t()(0, 0, 0, 0))>; + }; + + template using DataTypeExtractor = DataTypeExtractorS()>::type; + + template + Error Read(const MString& vname, const std::vector& transindex, Data& data, Transform transform, std::vector reqs) const + { + size_t nval = 1; + for(const auto& r: reqs) nval *= r.count; + const size_t indim = reqs.size(); + constexpr size_t outdim = Dimensionity(); + + std::vector start; + std::vector count; + + start.resize(indim); + count.resize(indim); + for(size_t i = 0; i < indim; i++) + { + start[i] = reqs[i].beg; + count[i] = reqs[i].count; + } + + using DataType = DataTypeExtractor; + DataType fillout; + bool havefill = C::VarFill(vname).index() > 0; + VType fillin = std::visit( + [](auto v) + { + if constexpr(std::is_convertible_v) + return static_cast(v); + else + return std::numeric_limits::max(); + }, + C::VarFill(vname)); + + if constexpr(requires(Data& d) { // Data have own fillvalue + { + d.Fillval() + } -> std::convertible_to; + }) + + fillout = data.Fillval(); + else // Data does'nt have own fillvalue, using variable fillvalue + fillout = static_cast(fillin); + + auto ret = C::template Read(vname, start.data(), count.data()); + if(!ret) return ret; + const auto& rawdata = ret.Value(); + + std::vector mul(indim, 1); + for(size_t i = indim - 1; i > 0; i--) mul[i - 1] = mul[i] * count[i]; + + size_t inind = 0; + for(typename C::ArrCounter i(count); i; ++i) + { + // TODO: Remove this testing block + size_t cind = 0; + for(size_t j = 0; j < indim; j++) cind += i[j] * mul[j]; + if(cind != inind) return {"NcZarrRead::Read", "Internal error"}; + if(i.Index() != inind) return {"NcZarrRead::Read", "Internal error"}; + if(inind != i.Index(i.Index(inind, count), count)) return {"NcZarrRead::Read", "Internal error"}; + + DataType out; + const VType& in = rawdata(inind); + if(havefill && in == fillin) + out = fillout; + else + out = transform(in); + + if constexpr(outdim == 1) + data(i[transindex[0]]) = out; + else if constexpr(outdim == 2) + data(i[transindex[0]], i[transindex[1]]) = out; + else if constexpr(outdim == 3) + data(i[transindex[0]], i[transindex[1]], i[transindex[2]]) = out; + else if constexpr(outdim == 4) + data(i[transindex[0]], i[transindex[1]], i[transindex[2]], i[transindex[3]]) = out; + + inind++; + } + + michlib::message("Variable " + vname + ", request size " + nval); + for(const auto& r: reqs) michlib::message(r.name + " from " + r.beg + ", count " + r.count); + return Error(); + } + + public: + // Request is string + template Error Read(const MString& vname, Data& data, Transform transform, const char* request) const + { + return Read(vname, data, transform, MString(request)); + } + + // Request by one dimension + template Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1) const + { + return Read(vname, data, transform, std::vector{std::move(req1)}); + } + // Request by two dimension + template Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2) const + { + return Read(vname, data, transform, std::vector{std::move(req1), std::move(req2)}); + } + // Request by three dimension + template Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2, DimReq&& req3) const + { + return Read(vname, data, transform, std::vector{std::move(req1), std::move(req2), std::move(req3)}); + } + // Request by four dimension + template Error Read(const MString& vname, Data& data, Transform transform, DimReq&& req1, DimReq&& req2, DimReq&& req3, DimReq&& req4) const + { + return Read(vname, data, transform, std::vector{std::move(req1), std::move(req2), std::move(req3), std::move(req4)}); + } + + // Request full variable + template Error Read(const MString& vname, Data& data, Transform transform) const + { + static const MString pref = "NcZarrRead::Read"; + if(!C::HasVar(vname)) return {pref, "Variable " + vname + " not found"}; + + std::vector pdims; + + const auto vdims = C::DimNames(vname); + std::transform( + vdims.cbegin(), vdims.cend(), std::back_inserter(pdims), [this](const MString& n) -> struct DimReq { + return {n, 0, C::DimSize(n)}; + }); + + return Read(vname, data, transform, pdims); + } + + // Base function for all Read's + template Error Read(const MString& vname, Data& data, Transform transform, std::vector reqs) const + { + static const MString pref = "NcZarrRead::Read"; + + if(!C::HasVar(vname)) return {pref, "Variable " + vname + " not found"}; + + std::vector pdims; + { + const auto vdims = C::DimNames(vname); + std::transform( + vdims.cbegin(), vdims.cend(), std::back_inserter(pdims), [](const MString& n) -> struct DimReq { + return {n, 0, 1}; + }); + } + + std::vector transindex; + + // Parse request + if(reqs.size() == 0) return {pref, "Empty request"}; + for(const auto& req: reqs) + { + size_t ind = C::FindInd(req.name, pdims); + if(ind >= pdims.size()) return {pref, "Variable " + vname + " has no dimension " + req.name}; + + for(size_t i = 0; i < transindex.size(); i++) + if(transindex[i] == ind) return {pref, "Parameters for dimension " + req.name + " already defined"}; + transindex.push_back(ind); + + size_t dlen = C::DimSize(pdims[ind].name); + if(req.beg == req.fill && req.count == req.fill) // Only name, so, we request full length + { + pdims[ind].beg = 0; + pdims[ind].count = dlen; + } + else if(req.count == req.fill) // Name and first index + { + pdims[ind].beg = req.beg; + pdims[ind].count = 1; + } + else // Name, first index, count + { + pdims[ind].beg = req.beg; + pdims[ind].count = req.count; + } + // Sanity checks + if(pdims[ind].count <= 0) return {pref, "Error parsing request: count must be greter then zero"}; + if(pdims[ind].beg >= dlen) return {pref, MString("Error parsing request: start index ") + pdims[ind].beg + " must be lesser then " + pdims[ind].name + " size " + dlen}; + if(pdims[ind].beg + pdims[ind].count > dlen) + return {pref, MString("Error parsing request: start index ") + pdims[ind].beg + " with count " + pdims[ind].count + " exceeds " + pdims[ind].name + " size " + dlen}; + } + + if(transindex.size() != Dimensionity()) return {pref, "Output data dimensions not correspondind request dimensions"}; + switch(C::VarT(vname)) + { + case(C::VarType::UNDEF): return {pref, "No variable with name " + vname + " (impossible)"}; + case(C::VarType::FLOAT): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::DOUBLE): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::INT1): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::INT2): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::INT4): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::INT8): return Read>(vname, transindex, data, transform, pdims); + case(C::VarType::UINT1): return Read>(vname, transindex, data, transform, pdims); + } + + return {pref, "Internal error (impossible)"}; + } + + // Request by string argument + template Error Read(const MString& vname, Data& data, Transform transform, const MString& request) const + { + static const MString pref = "NcZarrRead::Read"; + + std::vector pdims; + + // Parse request + const auto dimdesc = request.Split(";, \t"); + if(dimdesc.size() == 0) return {pref, "Empty request"}; + for(const auto& dd: dimdesc) + { + const auto dimpar = dd.Split(":", true); + + if(dimpar.size() == 1) // Only name, so, we request full length + pdims.emplace_back(dimpar[0]); + else if(dimpar.size() == 2) // Name and first index + pdims.emplace_back(dimpar[0], dimpar[1].ToInteger()); + else if(dimpar.size() == 3) // Name, first index, count + pdims.emplace_back(dimpar[0], dimpar[1].ToInteger(), dimpar[2].ToInteger()); + else + return {pref, "Can't parse expression " + dd}; + } + + return Read(vname, data, transform, pdims); + } +}; diff --git a/include/zarr.h b/include/zarr.h new file mode 100644 index 0000000..0f692ec --- /dev/null +++ b/include/zarr.h @@ -0,0 +1,174 @@ +#pragma once +#include "GPL.h" +#include "cache.h" +#include "curlfuncs.h" +#include "nczarrcommon.h" +#include +#include + +class ZarrTypes: public NcZarrTypes +{ + protected: + template class ReadedData + { + //public: + using Vec = std::vector; + + private: + Vec start, chunkstart; + ArrCounter mainind, chunkind, inchunkind; + std::vector> data; + + public: + ReadedData():mainind(Vec()),chunkind(Vec()),inchunkind(Vec()){} + + ReadedData(size_t N, const size_t* start, const size_t* count, const size_t* csize, std::vector>&& d): + start(start, start + N), + chunkstart( + [](size_t N, const size_t* st, const size_t* cs) + { + Vec out(N); + for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i]; + return out; + }(N, start, csize)), + mainind(Vec(count, count + N)), + chunkind( + [](size_t N, const size_t* st, const size_t* cn, const size_t* cs) + { + Vec out(N); + for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1; + return out; + }(N, start, count, csize)), + inchunkind(Vec(csize, csize + N)), + data(std::move(d)) + { + } + + VType operator()(size_t lini) const + { + Vec ind = mainind.Index(lini, mainind.Count()); + Vec cind(ind.size()), inind(ind.size()); + + for(size_t i = 0; i < ind.size(); i++) + { + cind[i] = (ind[i] + start[i]) / inchunkind.Count(i) - chunkstart[i]; // indes of chunk + inind[i] = (ind[i] + start[i]) % inchunkind.Count(i); // index inside chunk + } + size_t chunk = chunkind.Index(cind); + size_t inside = inchunkind.Index(inind); + return data[chunk][inside]; + } + }; + + private: + // Create attribute from json value + static AttVT CreateAtt(const Json::Value& val) + { + if(val.type() == Json::intValue) return AttVT{std::in_place_type, val.asInt64()}; + if(val.type() == Json::uintValue) return AttVT(std::in_place_type, val.asUInt64()); + if(val.type() == Json::realValue) return AttVT(std::in_place_type, val.asDouble()); + if(val.type() == Json::stringValue) + { + auto str = val.asString(); + return AttVT(std::in_place_type, MString(str.c_str(), str.size())); + } + if(val.type() == Json::booleanValue) return AttVT(std::in_place_type, val.asBool()); + return AttVT(); + } + + public: + // Read attributes from .zattrs + static auto ReadAtts(const Json::Value& obj) + { + std::vector out; + if(obj.type() != Json::objectValue) return out; + const auto keys = obj.getMemberNames(); + for(const auto& key: keys) + if(key != "_ARRAY_DIMENSIONS") out.emplace_back(key, CreateAtt(obj[key])); + return out; + } +}; + +class ZarrFunctions: public ZarrTypes +{ + std::unique_ptr cache; + CURLRAII chandle; + MString url; + + std::vector> chunks; + + // Find variable names in metadata + static std::vector ReadVarNames(const Json::Value& meta); + + Error AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray); + + protected: + ZarrFunctions() + { + auto oldprefix = michlib::GPL.UsePrefix("ZARR"); + cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", ""))); + michlib::GPL.UsePrefix(oldprefix); + if(!cache) + { + michlib::errmessage("Can't init data cache"); + cache.reset(new FakeCache); + } + } + + template RetVal> Read(const MString& var, const size_t* start, const size_t* count) const + { + using Vec = std::vector; + + size_t ind = FindInd(var, vars); + const size_t N = vars[ind].NDim(); + const auto& csize = chunks[ind]; + Vec chunkstart( + [](size_t N, const size_t* st, const size_t* cs) + { + Vec out(N); + for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i]; + return out; + }(N, start, csize.data())); + ArrCounter chunkind( + [](size_t N, const size_t* st, const size_t* cn, const size_t* cs) + { + Vec out(N); + for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1; + return out; + }(N, start, count, csize.data())); + + bool havefill = vars[ind].Fill().index() > 0; + VType fill = std::visit( + [](auto v) + { + if constexpr(std::is_convertible_v) + return static_cast(v); + else + return std::numeric_limits::max(); + }, + vars[ind].Fill()); + + std::vector> cdata; + + size_t chunksize = 1; + for(const auto c: csize) chunksize *= c; + + cdata.resize(chunkind.N()); + + for(; chunkind; ++chunkind) + { + cdata[chunkind.Index()].reset(new VType[chunksize]); + auto res = GetChunk(var, chunkind.VIndex(chunkstart), chunksize, sizeof(VType), cdata[chunkind.Index()].get(),havefill?&fill:nullptr); + if(!res) return res; + } + + return ReadedData(N, start, count, csize.data(), std::move(cdata)); + } + + Error GetChunk(const MString& var, const std::vector& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const; + + public: + Error Open(const MString& product, const MString& dataset, bool time = true); +}; + +using Zarr = NcZarrRead; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c631727..a866fb7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,16 +9,17 @@ find_package(CURL REQUIRED) find_package(LibXml2 REQUIRED) find_package(SQLite3 REQUIRED) pkg_check_modules(JSONCPP REQUIRED jsoncpp) +pkg_check_modules(BLOSC REQUIRED blosc) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -include_directories(${JSONCPP_INCLUDE_DIRS} ${LIBXML2_INCLUDE_DIRS} ${SQLite3_INCLUDE_DIRS}) +include_directories(${JSONCPP_INCLUDE_DIRS} ${BLOSC_INCLUDE_DIRS} ${LIBXML2_INCLUDE_DIRS} ${SQLite3_INCLUDE_DIRS}) file(GLOB srcs CONFIGURE_DEPENDS *.cpp) add_executable(${EXENAME} ${srcs} ${ACTIONLISTINC} ${SOURCELISTINC}) target_include_directories(${EXENAME} PRIVATE ../michlib/michlib ${CMAKE_CURRENT_BINARY_DIR}/../include) -target_link_libraries(${EXENAME} ${linker_options} ${netcdf} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos) +target_link_libraries(${EXENAME} ${linker_options} ${netcdf} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} ${BLOSC_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos) set_target_properties(${EXENAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) install(TARGETS ${EXENAME}) diff --git a/src/zarr.cpp b/src/zarr.cpp new file mode 100644 index 0000000..0ae4d06 --- /dev/null +++ b/src/zarr.cpp @@ -0,0 +1,236 @@ +#define MICHLIB_NOSOURCE +#include "zarr.h" +#include "copcat.h" +#include + +std::vector ZarrFunctions::ReadVarNames(const Json::Value& meta) +{ + std::vector out; + if(meta.type() != Json::objectValue) return out; + const auto keys = meta.getMemberNames(); + for(const auto& key: keys) + { + if(!key.ends_with("/.zarray")) continue; + const auto vname = key.substr(0, key.size() - 8); + const auto& zattr = meta[vname + "/.zattrs"]; + if(!(zattr && zattr.type() == Json::objectValue)) continue; + + MString name(vname.c_str(), vname.size()); + bool found = false; + for(size_t id = 0; id < out.size(); id++) + if(out[id] == name) + { + found = true; + break; + } + if(!found) out.emplace_back(std::move(name)); + } + return out; +} + +Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray) +{ + static const MString pref = "Zarr::AddVar"; + + VarType newtype; + + Variable::FillType fill; + + // Checks for parameters in zarray + { + const auto& cid = zarray["compressor"]["id"]; + if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())}; + } + { + const auto& zf = zarray["zarr_format"]; + if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())}; + } + { + const auto& ord = zarray["order"]; + if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"}; + } + { + const auto& f = zarray["filters"]; + if(f.type() != Json::nullValue) return {pref, "Filters is not null"}; + } + + // Read dtype + { + const auto& dtype = zarray["dtype"]; + if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"}; + const auto str = dtype.asString(); + if(str == " dnames; + std::vector dsizes; + std::vector csizes; + std::vector dids; + + // Read dimensions names + { + const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"]; + if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"}; + for(Json::ArrayIndex i = 0; i < arrdim.size(); i++) + if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue) + { + const auto val = dim.asString(); + dnames.emplace_back(val.c_str(), val.size()); + } + } + + // Read dimensions sizes + { + const auto& shape = zarray["shape"]; + if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"}; + for(Json::ArrayIndex i = 0; i < shape.size(); i++) + if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt()); + } + + // Read chunk sizes + { + const auto& chunk = zarray["chunks"]; + if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"}; + for(Json::ArrayIndex i = 0; i < chunk.size(); i++) + if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt()); + } + + if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"}; + + dids.resize(dnames.size()); + + // Check dimensions names and sizes + for(size_t i = 0; i < dnames.size(); i++) + { + bool found = false; + for(size_t id = 0; id < dims.size(); id++) + if(dims[id].Name() == dnames[i]) + { + found = true; + if(dims[id].Size() != dsizes[i]) + return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]}; + dids[i] = id; + break; + } + if(!found) + { + dids[i] = dims.size(); + dims.emplace_back(dnames[i], dsizes[i]); + } + } + + vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill); + chunks.push_back(std::move(csizes)); + + return Error(); +} + +Error ZarrFunctions::GetChunk(const MString& var, const std::vector& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const +{ + static const MString pref = "Zarr::GetChunk"; + + MString str = url + "/" + var + "/"; + for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]); + + auto [content, suc] = cache->Get(str); + + if(!suc) + { + michlib::message(str + " not found in cache, downloading"); + auto [out, res] = GetUrl(chandle, str); + if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err()); + long respcode; + curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode); + michlib::message("Response: ", respcode); + if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill + cache->Put(str, out, 3600); + content = std::move(out); + } + + if(content.Exist()) + { + size_t nb, cb, bs; + blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs); + if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb); + if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize); + auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1); + if(int_cast(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize); + } + else + { + if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err()); + for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize); + } + + return Error(); +} + +Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time) +{ + static const MString pref = "Zarr::Open"; + + gats.clear(); + dims.clear(); + vars.clear(); + + CopernicusCatalog cat; + Json::Value json; + + { + auto urlret = time ? cat.DatasetTimeURL(product, dataset) : cat.DatasetGeoURL(product, dataset); + if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + dataset + " of product " + product); + url = urlret.Value(); + + auto ret = cat.GetJSON(url + "/.zmetadata"); + if(ret) + json = ret.Value(); + else + return ret.Add(pref, "can't download .zmetadata"); + } + + const auto& meta = json["metadata"]; + if(!meta) return {pref, "No \"metadata\" key in JSON data"}; + + if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]); + auto vnames = ReadVarNames(meta); + + for(size_t i = 0; i < vnames.size(); i++) + { + auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]); + if(!err) return err.Add(pref, "Can't init variable " + vnames[i]); + } + + return Error(); +}