You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

237 lines
7.3 KiB

#define MICHLIB_NOSOURCE
#include "zarr.h"
#include "copcat.h"
#include <blosc.h>
std::vector<MString> ZarrFunctions::ReadVarNames(const Json::Value& meta)
{
std::vector<MString> out;
if(meta.type() != Json::objectValue) return out;
const auto keys = meta.getMemberNames();
for(const auto& key: keys)
{
if(!key.ends_with("/.zarray")) continue;
const auto vname = key.substr(0, key.size() - 8);
const auto& zattr = meta[vname + "/.zattrs"];
if(!(zattr && zattr.type() == Json::objectValue)) continue;
MString name(vname.c_str(), vname.size());
bool found = false;
for(size_t id = 0; id < out.size(); id++)
if(out[id] == name)
{
found = true;
break;
}
if(!found) out.emplace_back(std::move(name));
}
return out;
}
Error ZarrFunctions::AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray)
{
static const MString pref = "Zarr::AddVar";
VarType newtype;
Variable::FillType fill;
// Checks for parameters in zarray
{
const auto& cid = zarray["compressor"]["id"];
if(!cid || cid.type() != Json::stringValue || cid.asString() != "blosc") return {pref, "Unsupported compressor: " + MString(cid.asString().c_str())};
}
{
const auto& zf = zarray["zarr_format"];
if(!zf || (zf.type() != Json::uintValue && zf.type() != Json::intValue) || zf.asUInt() != 2) return {pref, "Unsupported format version: " + MString(zf.asUInt())};
}
{
const auto& ord = zarray["order"];
if(!ord || ord.type() != Json::stringValue || ord.asString() != "C") return {pref, "Order in not C"};
}
{
const auto& f = zarray["filters"];
if(f.type() != Json::nullValue) return {pref, "Filters is not null"};
}
// Read dtype
{
const auto& dtype = zarray["dtype"];
if(!dtype || dtype.type() != Json::stringValue) return {pref, "No datatype"};
const auto str = dtype.asString();
if(str == "<f4")
newtype = VarType::FLOAT;
else if(str == "<f8")
newtype = VarType::DOUBLE;
else if(str == "|i1")
newtype = VarType::INT1;
else if(str == "|u1")
newtype = VarType::UINT1;
else if(str == "<i2")
newtype = VarType::INT2;
else if(str == "<i4")
newtype = VarType::INT4;
else if(str == "<i8")
newtype = VarType::INT8;
else
return {pref, "Unsupported datatype: " + MString(str.c_str())};
}
// Read fill_value
{
const auto& fillval = zarray["fill_value"];
if(!fillval) return {pref, "No fillval"};
if(fillval.type() == Json::uintValue)
fill = fillval.asUInt64();
else if(fillval.type() == Json::intValue)
fill = fillval.asInt64();
else if(fillval.type() == Json::realValue)
fill = fillval.asDouble();
else if(fillval.type() == Json::stringValue && fillval.asString() == "NaN")
fill = NAN;
}
// Read attributes
auto atts = ReadAtts(zattrs);
std::vector<MString> dnames;
std::vector<size_t> dsizes;
std::vector<size_t> csizes;
std::vector<size_t> dids;
// Read dimensions names
{
const auto& arrdim = zattrs["_ARRAY_DIMENSIONS"];
if(!(arrdim && arrdim.type() == Json::arrayValue)) return {pref, "_ARRAY_DIMENSIONS not found"};
for(Json::ArrayIndex i = 0; i < arrdim.size(); i++)
if(const auto& dim = arrdim[i]; dim.type() == Json::stringValue)
{
const auto val = dim.asString();
dnames.emplace_back(val.c_str(), val.size());
}
}
// Read dimensions sizes
{
const auto& shape = zarray["shape"];
if(!(shape && shape.type() == Json::arrayValue)) return {pref, "shape not found"};
for(Json::ArrayIndex i = 0; i < shape.size(); i++)
if(const auto& s = shape[i]; s.type() == Json::uintValue || s.type() == Json::intValue) dsizes.push_back(s.asUInt());
}
// Read chunk sizes
{
const auto& chunk = zarray["chunks"];
if(!(chunk && chunk.type() == Json::arrayValue)) return {pref, "chunks not found"};
for(Json::ArrayIndex i = 0; i < chunk.size(); i++)
if(const auto& c = chunk[i]; c.type() == Json::uintValue || c.type() == Json::intValue) csizes.push_back(c.asUInt());
}
if(dnames.size() != dsizes.size() || dnames.size() != csizes.size()) return {pref, "shape and chunks are in contradiction"};
dids.resize(dnames.size());
// Check dimensions names and sizes
for(size_t i = 0; i < dnames.size(); i++)
{
bool found = false;
for(size_t id = 0; id < dims.size(); id++)
if(dims[id].Name() == dnames[i])
{
found = true;
if(dims[id].Size() != dsizes[i])
return {pref, "According to previous data, the dimension " + dnames[i] + " has a size of " + dims[id].Size() + ", but here it is defined as " + dsizes[i]};
dids[i] = id;
break;
}
if(!found)
{
dids[i] = dims.size();
dims.emplace_back(dnames[i], dsizes[i]);
}
}
vars.emplace_back(name, newtype, std::move(dids), std::move(atts), fill);
chunks.push_back(std::move(csizes));
return Error();
}
Error ZarrFunctions::GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const
{
static const MString pref = "Zarr::GetChunk";
MString str = url + "/" + var + "/";
for(size_t i = 0; i < chunkind.size(); i++) str += (i == 0 ? "" : ".") + MString(chunkind[i]);
auto [content, suc] = cache->Get(str);
if(!suc)
{
michlib::message(str + " not found in cache, downloading");
auto [out, res] = GetUrl(chandle, str);
if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err());
long respcode;
curl_easy_getinfo(chandle, CURLINFO_RESPONSE_CODE, &respcode);
michlib::message("Response: ", respcode);
if(respcode == 403) out = ""; // Failed chunk download mean that this chunk contains only fill
cache->Put(str, out, 3600);
content = std::move(out);
}
if(content.Exist())
{
size_t nb, cb, bs;
blosc_cbuffer_sizes(content.Buf(), &nb, &cb, &bs);
if(cb != content.Len()) return Error(pref, MString("bytes download: ") + content.Len() + ", but compressed bytes " + cb);
if(nb != chunksize * elsize) return Error(pref, MString("decompressed bytes: ") + nb + ", but buffer size " + chunksize * elsize);
auto res = blosc_decompress_ctx(content.Buf(), data, chunksize * elsize, 1);
if(int_cast<size_t>(res) != chunksize * elsize) return Error(pref, MString("decompress only ") + res + " bytes of " + chunksize * elsize);
}
else
{
if(fill == nullptr) return Error(pref, MString("can't download chunk: ") + chandle.Err());
for(size_t i = 0; i < chunksize; i++) memcpy(michlib::P1(data) + i * elsize, fill, elsize);
}
return Error();
}
Error ZarrFunctions::Open(const MString& product, const MString& dataset, bool time)
{
static const MString pref = "Zarr::Open";
gats.clear();
dims.clear();
vars.clear();
CopernicusCatalog cat;
Json::Value json;
{
auto urlret = time ? cat.DatasetTimeURL(product, dataset) : cat.DatasetGeoURL(product, dataset);
if(!urlret) return urlret.Add(pref, "Can't get url for the dataset " + dataset + " of product " + product);
url = urlret.Value();
auto ret = cat.GetJSON(url + "/.zmetadata");
if(ret)
json = ret.Value();
else
return ret.Add(pref, "can't download .zmetadata");
}
const auto& meta = json["metadata"];
if(!meta) return {pref, "No \"metadata\" key in JSON data"};
if(meta[".zattrs"]) gats = ReadAtts(meta[".zattrs"]);
auto vnames = ReadVarNames(meta);
for(size_t i = 0; i < vnames.size(); i++)
{
auto err = AddVar(vnames[i], meta[(vnames[i] + "/.zattrs").Buf()], meta[(vnames[i] + "/.zarray").Buf()]);
if(!err) return err.Add(pref, "Can't init variable " + vnames[i]);
}
return Error();
}