You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
246 lines
7.6 KiB
246 lines
7.6 KiB
#define MICHLIB_NOSOURCE |
|
#include "zarr.h" |
|
#include "copcat.h" |
|
#include <blosc.h> |
|
|
|
std::vector<MString> ZarrFunctions::ReadVarNames(const Json::Value& meta) |
|
{ |
|
std::vector<MString> out; |
|
if(meta.type() != Json::objectValue) return out; |
|
const auto keys = meta.getMemberNames(); |
|
for(const auto& key: keys) |
|
{ |
|
if(!key.ends_with("/.zarray")) continue; |
|
const auto vname = key.substr(0, key.size() - 8); |
|
const auto& zattr = meta[vname + "/.zattrs"]; |
|
if(!(zattr && zattr.type() == Json::objectValue)) continue; |
|
|
|
MString name(vname.c_str(), vname.size()); |
|
bool found = false; |
|
for(size_t id = 0; id < out.size(); id++) |
|
if(out[id] == name) |
|
{ |
|
found = true; |
|
break; |
|
} |
|
if(!found) out.emplace_back(std::move(name)); |
|
} |
|
return out; |
|
} |
|
|
|
Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray) |
|
{ |
|
static const MString pref = "Zarr::AddVar"; |
|
|
|
VarType newtype; |
|
|
|
Variable::FillType fill; |
|
|
|
// Checks for parameters in zarray |
|
{ |
|
const auto& cid = zarray["compressor"]["id"]; |
|
if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())}; |
|
} |
|
{ |
|
const auto& zf = zarray["zarr_format"]; |
|
if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())}; |
|
} |
|
{ |
|
const auto& ord = zarray["order"]; |
|
if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"}; |
|
} |
|
{ |
|
const auto& f = zarray["filters"]; |
|
if(f.type() != Json::nullValue) return {pref, "Filters is not null"}; |
|
} |
|
|
|
// Read dtype |
|
{ |
|
const auto& dtype = zarray["dtype"]; |
|
if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"}; |
|
const auto str = dtype.asString(); |
|
if(str == "<f4") |
|
newtype = VarType::FLOAT; |
|
else if(str == "<f8") |
|
newtype = VarType::DOUBLE; |
|
else if(str == "|i1") |
|
newtype = VarType::INT1; |
|
else if(str == "|u1") |
|
newtype = VarType::UINT1; |
|
else if(str == "<i2") |
|
newtype = VarType::INT2; |
|
else if(str == "<i4") |
|
newtype = VarType::INT4; |
|
else if(str == "<i8") |
|
newtype = VarType::INT8; |
|
else |
|
return {pref, "Unsupported datatype: " + MString(str.c_str())}; |
|
} |
|
|
|
// Read fill_value |
|
{ |
|
const auto& fillval = zarray["fill_value"]; |
|
if(!fillval) return {pref, "No fillval"}; |
|
if(fillval.type() == Json::uintValue) |
|
fill = fillval.asUInt64(); |
|
else if(fillval.type() == Json::intValue) |
|
fill = fillval.asInt64(); |
|
else if(fillval.type() == Json::realValue) |
|
fill = fillval.asDouble(); |
|
else if(fillval.type() == Json::stringValue && fillval.asString() == "NaN") |
|
fill = NAN; |
|
} |
|
|
|
// Read attributes |
|
auto atts = ReadAtts(zattrs); |
|
|
|
std::vector<MString> dnames; |
|
std::vector<size_t> dsizes; |
|
std::vector<size_t> csizes; |
|
std::vector<size_t> dids; |
|
|
|
// Read dimensions names |
|
{ |
|
const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"]; |
|
if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"}; |
|
for(Json::ArrayIndex i = 0; i < arrdim.size(); i++) |
|
if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue) |
|
{ |
|
const auto val = dim.asString(); |
|
dnames.emplace_back(val.c_str(), val.size()); |
|
} |
|
} |
|
|
|
// Read dimensions sizes |
|
{ |
|
const auto& shape = zarray["shape"]; |
|
if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"}; |
|
for(Json::ArrayIndex i = 0; i < shape.size(); i++) |
|
if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt()); |
|
} |
|
|
|
// Read chunk sizes |
|
{ |
|
const auto& chunk = zarray["chunks"]; |
|
if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"}; |
|
for(Json::ArrayIndex i = 0; i < chunk.size(); i++) |
|
if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt()); |
|
} |
|
|
|
if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"}; |
|
|
|
dids.resize(dnames.size()); |
|
|
|
// Check dimensions names and sizes |
|
for(size_t i = 0; i < dnames.size(); i++) |
|
{ |
|
bool found = false; |
|
for(size_t id = 0; id < dims.size(); id++) |
|
if(dims[id].Name() == dnames[i]) |
|
{ |
|
found = true; |
|
if(dims[id].Size() != dsizes[i]) |
|
return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]}; |
|
dids[i] = id; |
|
break; |
|
} |
|
if(!found) |
|
{ |
|
dids[i] = dims.size(); |
|
dims.emplace_back(dnames[i], dsizes[i]); |
|
} |
|
} |
|
|
|
vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill); |
|
chunks.push_back(std::move(csizes)); |
|
|
|
return Error(); |
|
} |
|
|
|
Error ZarrFunctions::GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const |
|
{ |
|
static const MString pref = "Zarr::GetChunk"; |
|
|
|
MString str = url + "/" + var + "/"; |
|
for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]); |
|
|
|
auto [content, suc] = cache->Get(str); |
|
|
|
if(!suc) |
|
{ |
|
michlib::message(str + " not found in cache, downloading"); |
|
auto [out, res] = GetUrl(chandle, str); |
|
if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err()); |
|
long respcode; |
|
curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode); |
|
michlib::message("Response: ", respcode); |
|
if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill |
|
cache->Put(str, out, 3600); |
|
content = std::move(out); |
|
} |
|
|
|
if(content.Exist()) |
|
{ |
|
size_t nb, cb, bs; |
|
blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs); |
|
if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb); |
|
if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize); |
|
auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1); |
|
if(int_cast<size_t>(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize); |
|
} |
|
else |
|
{ |
|
if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err()); |
|
for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize); |
|
} |
|
|
|
return Error(); |
|
} |
|
|
|
Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time) |
|
{ |
|
static const MString pref = "Zarr::Open"; |
|
|
|
gats.clear(); |
|
dims.clear(); |
|
vars.clear(); |
|
|
|
CopernicusCatalog cat; |
|
Json::Value json; |
|
|
|
MString realdataset; |
|
if(!dataset.Exist()) |
|
{ |
|
auto dsets = cat.DatasetList(product); |
|
if(!dsets) return dsets.Add(pref, "Can't get default dataset of product " + product); |
|
realdataset = dsets.Value()[0]; |
|
} |
|
else |
|
realdataset = dataset; |
|
|
|
{ |
|
auto urlret = time ? cat.DatasetTimeURL(product, realdataset) : cat.DatasetGeoURL(product, realdataset); |
|
if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + realdataset + " of product " + product); |
|
url = urlret.Value(); |
|
|
|
auto ret = cat.GetJSON(url + "/.zmetadata"); |
|
if(ret) |
|
json = ret.Value(); |
|
else |
|
return ret.Add(pref, "can't download .zmetadata"); |
|
} |
|
|
|
const auto& meta = json["metadata"]; |
|
if(!meta) return {pref, "No \"metadata\" key in JSON data"}; |
|
|
|
if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]); |
|
auto vnames = ReadVarNames(meta); |
|
|
|
for(size_t i = 0; i < vnames.size(); i++) |
|
{ |
|
auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]); |
|
if(!err) return err.Add(pref, "Can't init variable " + vnames[i]); |
|
} |
|
|
|
return Error(); |
|
}
|
|
|