|
|
|
#define MICHLIB_NOSOURCE
|
|
|
|
#include "zarr.h"
|
|
|
|
#include "copcat.h"
|
|
|
|
#include <blosc.h>
|
|
|
|
|
|
|
|
std::vector<MString> ZarrFunctions::ReadVarNames(const Json::Value& meta)
|
|
|
|
{
|
|
|
|
std::vector<MString> out;
|
|
|
|
if(meta.type() != Json::objectValue) return out;
|
|
|
|
const auto keys = meta.getMemberNames();
|
|
|
|
for(const auto& key: keys)
|
|
|
|
{
|
|
|
|
if(!key.ends_with("/.zarray")) continue;
|
|
|
|
const auto vname = key.substr(0, key.size() - 8);
|
|
|
|
const auto& zattr = meta[vname + "/.zattrs"];
|
|
|
|
if(!(zattr && zattr.type() == Json::objectValue)) continue;
|
|
|
|
|
|
|
|
MString name(vname.c_str(), vname.size());
|
|
|
|
bool found = false;
|
|
|
|
for(size_t id = 0; id < out.size(); id++)
|
|
|
|
if(out[id] == name)
|
|
|
|
{
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(!found) out.emplace_back(std::move(name));
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray)
|
|
|
|
{
|
|
|
|
static const MString pref = "Zarr::AddVar";
|
|
|
|
|
|
|
|
VarType newtype;
|
|
|
|
|
|
|
|
Variable::FillType fill;
|
|
|
|
|
|
|
|
// Checks for parameters in zarray
|
|
|
|
{
|
|
|
|
const auto& cid = zarray["compressor"]["id"];
|
|
|
|
if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())};
|
|
|
|
}
|
|
|
|
{
|
|
|
|
const auto& zf = zarray["zarr_format"];
|
|
|
|
if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())};
|
|
|
|
}
|
|
|
|
{
|
|
|
|
const auto& ord = zarray["order"];
|
|
|
|
if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"};
|
|
|
|
}
|
|
|
|
{
|
|
|
|
const auto& f = zarray["filters"];
|
|
|
|
if(f.type() != Json::nullValue) return {pref, "Filters is not null"};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read dtype
|
|
|
|
{
|
|
|
|
const auto& dtype = zarray["dtype"];
|
|
|
|
if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"};
|
|
|
|
const auto str = dtype.asString();
|
|
|
|
if(str == "<f4")
|
|
|
|
newtype = VarType::FLOAT;
|
|
|
|
else if(str == "<f8")
|
|
|
|
newtype = VarType::DOUBLE;
|
|
|
|
else if(str == "|i1")
|
|
|
|
newtype = VarType::INT1;
|
|
|
|
else if(str == "|u1")
|
|
|
|
newtype = VarType::UINT1;
|
|
|
|
else if(str == "<i2")
|
|
|
|
newtype = VarType::INT2;
|
|
|
|
else if(str == "<i4")
|
|
|
|
newtype = VarType::INT4;
|
|
|
|
else if(str == "<i8")
|
|
|
|
newtype = VarType::INT8;
|
|
|
|
else
|
|
|
|
return {pref, "Unsupported datatype: " + MString(str.c_str())};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read fill_value
|
|
|
|
{
|
|
|
|
const auto& fillval = zarray["fill_value"];
|
|
|
|
if(!fillval) return {pref, "No fillval"};
|
|
|
|
if(fillval.type() == Json::uintValue)
|
|
|
|
fill = fillval.asUInt64();
|
|
|
|
else if(fillval.type() == Json::intValue)
|
|
|
|
fill = fillval.asInt64();
|
|
|
|
else if(fillval.type() == Json::realValue)
|
|
|
|
fill = fillval.asDouble();
|
|
|
|
else if(fillval.type() == Json::stringValue && fillval.asString() == "NaN")
|
|
|
|
fill = NAN;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read attributes
|
|
|
|
auto atts = ReadAtts(zattrs);
|
|
|
|
|
|
|
|
std::vector<MString> dnames;
|
|
|
|
std::vector<size_t> dsizes;
|
|
|
|
std::vector<size_t> csizes;
|
|
|
|
std::vector<size_t> dids;
|
|
|
|
|
|
|
|
// Read dimensions names
|
|
|
|
{
|
|
|
|
const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"];
|
|
|
|
if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"};
|
|
|
|
for(Json::ArrayIndex i = 0; i < arrdim.size(); i++)
|
|
|
|
if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue)
|
|
|
|
{
|
|
|
|
const auto val = dim.asString();
|
|
|
|
dnames.emplace_back(val.c_str(), val.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read dimensions sizes
|
|
|
|
{
|
|
|
|
const auto& shape = zarray["shape"];
|
|
|
|
if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"};
|
|
|
|
for(Json::ArrayIndex i = 0; i < shape.size(); i++)
|
|
|
|
if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read chunk sizes
|
|
|
|
{
|
|
|
|
const auto& chunk = zarray["chunks"];
|
|
|
|
if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"};
|
|
|
|
for(Json::ArrayIndex i = 0; i < chunk.size(); i++)
|
|
|
|
if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt());
|
|
|
|
}
|
|
|
|
|
|
|
|
if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"};
|
|
|
|
|
|
|
|
dids.resize(dnames.size());
|
|
|
|
|
|
|
|
// Check dimensions names and sizes
|
|
|
|
for(size_t i = 0; i < dnames.size(); i++)
|
|
|
|
{
|
|
|
|
bool found = false;
|
|
|
|
for(size_t id = 0; id < dims.size(); id++)
|
|
|
|
if(dims[id].Name() == dnames[i])
|
|
|
|
{
|
|
|
|
found = true;
|
|
|
|
if(dims[id].Size() != dsizes[i])
|
|
|
|
return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]};
|
|
|
|
dids[i] = id;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(!found)
|
|
|
|
{
|
|
|
|
dids[i] = dims.size();
|
|
|
|
dims.emplace_back(dnames[i], dsizes[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill);
|
|
|
|
chunks.push_back(std::move(csizes));
|
|
|
|
|
|
|
|
return Error();
|
|
|
|
}
|
|
|
|
|
|
|
|
Error ZarrFunctions::GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const
|
|
|
|
{
|
|
|
|
static const MString pref = "Zarr::GetChunk";
|
|
|
|
|
|
|
|
MString str = url + "/" + var + "/";
|
|
|
|
for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]);
|
|
|
|
|
|
|
|
auto [content, suc] = cache->Get(str);
|
|
|
|
|
|
|
|
if(!suc)
|
|
|
|
{
|
|
|
|
michlib::message(str + " not found in cache, downloading");
|
|
|
|
auto [out, res] = GetUrl(chandle, str);
|
|
|
|
if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err());
|
|
|
|
long respcode;
|
|
|
|
curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode);
|
|
|
|
michlib::message("Response: ", respcode);
|
|
|
|
if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill
|
|
|
|
cache->Put(str, out, 3600);
|
|
|
|
content = std::move(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(content.Exist())
|
|
|
|
{
|
|
|
|
size_t nb, cb, bs;
|
|
|
|
blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs);
|
|
|
|
if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb);
|
|
|
|
if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize);
|
|
|
|
auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1);
|
|
|
|
if(int_cast<size_t>(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err());
|
|
|
|
for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Error();
|
|
|
|
}
|
|
|
|
|
|
|
|
Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time)
|
|
|
|
{
|
|
|
|
static const MString pref = "Zarr::Open";
|
|
|
|
|
|
|
|
gats.clear();
|
|
|
|
dims.clear();
|
|
|
|
vars.clear();
|
|
|
|
|
|
|
|
CopernicusCatalog cat;
|
|
|
|
Json::Value json;
|
|
|
|
|
|
|
|
MString realdataset;
|
|
|
|
if(!dataset.Exist())
|
|
|
|
{
|
|
|
|
auto dsets = cat.DatasetList(product);
|
|
|
|
if(!dsets) return dsets.Add(pref, "Can't get default dataset of product " + product);
|
|
|
|
realdataset = dsets.Value()[0];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
realdataset = dataset;
|
|
|
|
|
|
|
|
{
|
|
|
|
auto urlret = time ? cat.DatasetTimeURL(product, realdataset) : cat.DatasetGeoURL(product, realdataset);
|
|
|
|
if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + realdataset + " of product " + product);
|
|
|
|
url = urlret.Value();
|
|
|
|
|
|
|
|
auto ret = cat.GetJSON(url + "/.zmetadata");
|
|
|
|
if(ret)
|
|
|
|
json = ret.Value();
|
|
|
|
else
|
|
|
|
return ret.Add(pref, "can't download .zmetadata");
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto& meta = json["metadata"];
|
|
|
|
if(!meta) return {pref, "No \"metadata\" key in JSON data"};
|
|
|
|
|
|
|
|
if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]);
|
|
|
|
auto vnames = ReadVarNames(meta);
|
|
|
|
|
|
|
|
for(size_t i = 0; i < vnames.size(); i++)
|
|
|
|
{
|
|
|
|
auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]);
|
|
|
|
if(!err) return err.Add(pref, "Can't init variable " + vnames[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Error();
|
|
|
|
}
|