#define MICHLIB_NOSOURCE #include "zarr.h" #include "copcat.h" #include std::vector ZarrFunctions::ReadVarNames(const Json::Value& meta) { std::vector out; if(meta.type() != Json::objectValue) return out; const auto keys = meta.getMemberNames(); for(const auto& key: keys) { if(!key.ends_with("/.zarray")) continue; const auto vname = key.substr(0, key.size() - 8); const auto& zattr = meta[vname + "/.zattrs"]; if(!(zattr && zattr.type() == Json::objectValue)) continue; MString name(vname.c_str(), vname.size()); bool found = false; for(size_t id = 0; id < out.size(); id++) if(out[id] == name) { found = true; break; } if(!found) out.emplace_back(std::move(name)); } return out; } Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray) { static const MString pref = "Zarr::AddVar"; VarType newtype; Variable::FillType fill; // Checks for parameters in zarray { const auto& cid = zarray["compressor"]["id"]; if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())}; } { const auto& zf = zarray["zarr_format"]; if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())}; } { const auto& ord = zarray["order"]; if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"}; } { const auto& f = zarray["filters"]; if(f.type() != Json::nullValue) return {pref, "Filters is not null"}; } // Read dtype { const auto& dtype = zarray["dtype"]; if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"}; const auto str = dtype.asString(); if(str == " dnames; std::vector dsizes; std::vector csizes; std::vector dids; // Read dimensions names { const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"]; if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"}; for(Json::ArrayIndex i = 0; i < arrdim.size(); i++) if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue) { const auto val = dim.asString(); dnames.emplace_back(val.c_str(), val.size()); } } // Read dimensions sizes { const auto& shape = zarray["shape"]; if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"}; for(Json::ArrayIndex i = 0; i < shape.size(); i++) if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt()); } // Read chunk sizes { const auto& chunk = zarray["chunks"]; if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"}; for(Json::ArrayIndex i = 0; i < chunk.size(); i++) if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt()); } if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"}; dids.resize(dnames.size()); // Check dimensions names and sizes for(size_t i = 0; i < dnames.size(); i++) { bool found = false; for(size_t id = 0; id < dims.size(); id++) if(dims[id].Name() == dnames[i]) { found = true; if(dims[id].Size() != dsizes[i]) return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]}; dids[i] = id; break; } if(!found) { dids[i] = dims.size(); dims.emplace_back(dnames[i], dsizes[i]); } } vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill); chunks.push_back(std::move(csizes)); return Error(); } Error ZarrFunctions::GetChunk(const MString& var, const std::vector& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const { static const MString pref = "Zarr::GetChunk"; MString str = url + "/" + var + "/"; for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]); auto [content, suc] = cache->Get(str); if(!suc) { michlib::message(str + " not found in cache, downloading"); auto [out, res] = GetUrl(chandle, str); if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err()); long respcode; curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode); michlib::message("Response: ", respcode); if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill cache->Put(str, out, 3600); content = std::move(out); } if(content.Exist()) { size_t nb, cb, bs; blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs); if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb); if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize); auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1); if(int_cast(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize); } else { if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err()); for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize); } return Error(); } Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time) { static const MString pref = "Zarr::Open"; gats.clear(); dims.clear(); vars.clear(); CopernicusCatalog cat; Json::Value json; { auto urlret = time ? cat.DatasetTimeURL(product, dataset) : cat.DatasetGeoURL(product, dataset); if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + dataset + " of product " + product); url = urlret.Value(); auto ret = cat.GetJSON(url + "/.zmetadata"); if(ret) json = ret.Value(); else return ret.Add(pref, "can't download .zmetadata"); } const auto& meta = json["metadata"]; if(!meta) return {pref, "No \"metadata\" key in JSON data"}; if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]); auto vnames = ReadVarNames(meta); for(size_t i = 0; i < vnames.size(); i++) { auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]); if(!err) return err.Add(pref, "Can't init variable " + vnames[i]); } return Error(); }