You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
178 lines
5.4 KiB
178 lines
5.4 KiB
#pragma once |
|
#include "GPL.h" |
|
#include "cache.h" |
|
#include "curlfuncs.h" |
|
#include "nczarrcommon.h" |
|
#include <json/json.h> |
|
#include <variant> |
|
|
|
class ZarrTypes: public NcZarrTypes |
|
{ |
|
protected: |
|
template<class VType> class ReadedData |
|
{ |
|
//public: |
|
using Vec = std::vector<size_t>; |
|
|
|
private: |
|
Vec start, chunkstart; |
|
ArrCounter mainind, chunkind, inchunkind; |
|
std::vector<std::unique_ptr<VType[]>> data; |
|
|
|
public: |
|
ReadedData(): mainind(Vec()), chunkind(Vec()), inchunkind(Vec()) {} |
|
|
|
ReadedData(size_t N, const size_t* start, const size_t* count, const size_t* csize, std::vector<std::unique_ptr<VType[]>>&& d): |
|
start(start, start + N), |
|
chunkstart( |
|
[](size_t N, const size_t* st, const size_t* cs) |
|
{ |
|
Vec out(N); |
|
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i]; |
|
return out; |
|
}(N, start, csize)), |
|
mainind(Vec(count, count + N)), |
|
chunkind( |
|
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs) |
|
{ |
|
Vec out(N); |
|
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1; |
|
return out; |
|
}(N, start, count, csize)), |
|
inchunkind(Vec(csize, csize + N)), |
|
data(std::move(d)) |
|
{ |
|
} |
|
|
|
VType operator()(size_t lini) const |
|
{ |
|
Vec ind = mainind.Index(lini, mainind.Count()); |
|
Vec cind(ind.size()), inind(ind.size()); |
|
|
|
for(size_t i = 0; i < ind.size(); i++) |
|
{ |
|
cind[i] = (ind[i] + start[i]) / inchunkind.Count(i) - chunkstart[i]; // indes of chunk |
|
inind[i] = (ind[i] + start[i]) % inchunkind.Count(i); // index inside chunk |
|
} |
|
size_t chunk = chunkind.Index(cind); |
|
size_t inside = inchunkind.Index(inind); |
|
return data[chunk][inside]; |
|
} |
|
}; |
|
|
|
private: |
|
// Create attribute from json value |
|
static AttVT CreateAtt(const Json::Value& val) |
|
{ |
|
if(val.type() == Json::intValue) return AttVT{std::in_place_type<int8>, val.asInt64()}; |
|
if(val.type() == Json::uintValue) return AttVT(std::in_place_type<uint8>, val.asUInt64()); |
|
if(val.type() == Json::realValue) return AttVT(std::in_place_type<double>, val.asDouble()); |
|
if(val.type() == Json::stringValue) |
|
{ |
|
auto str = val.asString(); |
|
return AttVT(std::in_place_type<MString>, MString(str.c_str(), str.size())); |
|
} |
|
if(val.type() == Json::booleanValue) return AttVT(std::in_place_type<bool>, val.asBool()); |
|
return AttVT(); |
|
} |
|
|
|
public: |
|
// Read attributes from .zattrs |
|
static auto ReadAtts(const Json::Value& obj) |
|
{ |
|
std::vector<Attribute> out; |
|
if(obj.type() != Json::objectValue) return out; |
|
const auto keys = obj.getMemberNames(); |
|
for(const auto& key: keys) |
|
if(key != "_ARRAY_DIMENSIONS") out.emplace_back(key, CreateAtt(obj[key])); |
|
return out; |
|
} |
|
}; |
|
|
|
class ZarrFunctions: public ZarrTypes |
|
{ |
|
std::unique_ptr<GenericCache> cache; |
|
CURLRAII chandle; |
|
MString url; |
|
MString proxyurl; |
|
|
|
std::vector<std::vector<size_t>> chunks; |
|
|
|
// Find variable names in metadata |
|
static std::vector<MString> ReadVarNames(const Json::Value& meta); |
|
|
|
Error AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray); |
|
|
|
protected: |
|
ZarrFunctions() |
|
{ |
|
auto oldprefix = michlib::GPL.UsePrefix("ZARR"); |
|
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", ""))); |
|
proxyurl = michlib::GPL.ParameterSValue("Proxy", ""); |
|
if(proxyurl.Exist()) curl_easy_setopt(chandle, CURLOPT_PROXY, proxyurl.Buf()); |
|
michlib::GPL.UsePrefix(oldprefix); |
|
if(!cache) |
|
{ |
|
michlib::errmessage("Can't init data cache"); |
|
cache.reset(new FakeCache); |
|
} |
|
} |
|
|
|
template<class VType> RetVal<ReadedData<VType>> Read(const MString& var, const size_t* start, const size_t* count) const |
|
{ |
|
using Vec = std::vector<size_t>; |
|
|
|
size_t ind = FindInd(var, vars); |
|
const size_t N = vars[ind].NDim(); |
|
const auto& csize = chunks[ind]; |
|
|
|
Vec chunkstart( |
|
[](size_t N, const size_t* st, const size_t* cs) |
|
{ |
|
Vec out(N); |
|
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i]; |
|
return out; |
|
}(N, start, csize.data())); |
|
ArrCounter chunkind( |
|
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs) |
|
{ |
|
Vec out(N); |
|
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i] - 1) / cs[i] - st[i] / cs[i] + 1; |
|
return out; |
|
}(N, start, count, csize.data())); |
|
|
|
bool havefill = vars[ind].Fill().index() > 0; |
|
VType fill = std::visit( |
|
[](auto v) |
|
{ |
|
if constexpr(std::is_convertible_v<decltype(v), VType>) |
|
return static_cast<VType>(v); |
|
else |
|
return std::numeric_limits<VType>::max(); |
|
}, |
|
vars[ind].Fill()); |
|
|
|
std::vector<std::unique_ptr<VType[]>> cdata; |
|
|
|
size_t chunksize = 1; |
|
for(const auto c: csize) chunksize *= c; |
|
|
|
cdata.resize(chunkind.N()); |
|
|
|
for(; chunkind; ++chunkind) |
|
{ |
|
cdata[chunkind.Index()].reset(new VType[chunksize]); |
|
auto res = GetChunk(var, chunkind.VIndex(chunkstart), chunksize, sizeof(VType), cdata[chunkind.Index()].get(), havefill ? &fill : nullptr); |
|
if(!res) return res; |
|
} |
|
|
|
return ReadedData<VType>(N, start, count, csize.data(), std::move(cdata)); |
|
} |
|
|
|
Error GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const; |
|
|
|
public: |
|
Error Open(const MString& product, const MString& dataset, bool time = true); |
|
}; |
|
|
|
using Zarr = NcZarrRead<ZarrFunctions>;
|
|
|