You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

179 lines
5.4 KiB

#pragma once
#include "GPL.h"
#include "cache.h"
#include "curlfuncs.h"
#include "nczarrcommon.h"
#include <json/json.h>
#include <variant>
class ZarrTypes: public NcZarrTypes
{
protected:
template<class VType> class ReadedData
{
//public:
using Vec = std::vector<size_t>;
private:
Vec start, chunkstart;
ArrCounter mainind, chunkind, inchunkind;
std::vector<std::unique_ptr<VType[]>> data;
public:
ReadedData(): mainind(Vec()), chunkind(Vec()), inchunkind(Vec()) {}
ReadedData(size_t N, const size_t* start, const size_t* count, const size_t* csize, std::vector<std::unique_ptr<VType[]>>&& d):
start(start, start + N),
chunkstart(
[](size_t N, const size_t* st, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i];
return out;
}(N, start, csize)),
mainind(Vec(count, count + N)),
chunkind(
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i]) / cs[i] - st[i] / cs[i] + 1;
return out;
}(N, start, count, csize)),
inchunkind(Vec(csize, csize + N)),
data(std::move(d))
{
}
VType operator()(size_t lini) const
{
Vec ind = mainind.Index(lini, mainind.Count());
Vec cind(ind.size()), inind(ind.size());
for(size_t i = 0; i < ind.size(); i++)
{
cind[i] = (ind[i] + start[i]) / inchunkind.Count(i) - chunkstart[i]; // indes of chunk
inind[i] = (ind[i] + start[i]) % inchunkind.Count(i); // index inside chunk
}
size_t chunk = chunkind.Index(cind);
size_t inside = inchunkind.Index(inind);
return data[chunk][inside];
}
};
private:
// Create attribute from json value
static AttVT CreateAtt(const Json::Value& val)
{
if(val.type() == Json::intValue) return AttVT{std::in_place_type<int8>, val.asInt64()};
if(val.type() == Json::uintValue) return AttVT(std::in_place_type<uint8>, val.asUInt64());
if(val.type() == Json::realValue) return AttVT(std::in_place_type<double>, val.asDouble());
if(val.type() == Json::stringValue)
{
auto str = val.asString();
return AttVT(std::in_place_type<MString>, MString(str.c_str(), str.size()));
}
if(val.type() == Json::booleanValue) return AttVT(std::in_place_type<bool>, val.asBool());
return AttVT();
}
public:
// Read attributes from .zattrs
static auto ReadAtts(const Json::Value& obj)
{
std::vector<Attribute> out;
if(obj.type() != Json::objectValue) return out;
const auto keys = obj.getMemberNames();
for(const auto& key: keys)
if(key != "_ARRAY_DIMENSIONS") out.emplace_back(key, CreateAtt(obj[key]));
return out;
}
};
class ZarrFunctions: public ZarrTypes
{
std::unique_ptr<GenericCache> cache;
CURLRAII chandle;
MString url;
MString proxyurl;
std::vector<std::vector<size_t>> chunks;
// Find variable names in metadata
static std::vector<MString> ReadVarNames(const Json::Value& meta);
Error AddVar(const MString& name, const Json::Value& zattrs, const Json::Value& zarray);
protected:
ZarrFunctions()
{
auto oldprefix = michlib::GPL.UsePrefix("ZARR");
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", "")));
proxyurl = michlib::GPL.ParameterSValue("Proxy", "");
if(proxyurl.Exist()) curl_easy_setopt(chandle, CURLOPT_PROXY, proxyurl.Buf());
michlib::GPL.UsePrefix(oldprefix);
if(!cache)
{
michlib::errmessage("Can't init data cache");
cache.reset(new FakeCache);
}
}
template<class VType> RetVal<ReadedData<VType>> Read(const MString& var, const size_t* start, const size_t* count) const
{
using Vec = std::vector<size_t>;
size_t ind = FindInd(var, vars);
const size_t N = vars[ind].NDim();
const auto& csize = chunks[ind];
Vec chunkstart(
[](size_t N, const size_t* st, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = st[i] / cs[i];
return out;
}(N, start, csize.data()));
ArrCounter chunkind(
[](size_t N, const size_t* st, const size_t* cn, const size_t* cs)
{
Vec out(N);
for(size_t i = 0; i < N; i++) out[i] = (st[i] + cn[i] - 1) / cs[i] - st[i] / cs[i] + 1;
return out;
}(N, start, count, csize.data()));
bool havefill = vars[ind].Fill().index() > 0;
VType fill = std::visit(
[](auto v)
{
if constexpr(std::is_convertible_v<decltype(v), VType>)
return static_cast<VType>(v);
else
return std::numeric_limits<VType>::max();
},
vars[ind].Fill());
std::vector<std::unique_ptr<VType[]>> cdata;
size_t chunksize = 1;
for(const auto c: csize) chunksize *= c;
cdata.resize(chunkind.N());
for(; chunkind; ++chunkind)
{
cdata[chunkind.Index()].reset(new VType[chunksize]);
auto res = GetChunk(var, chunkind.VIndex(chunkstart), chunksize, sizeof(VType), cdata[chunkind.Index()].get(), havefill ? &fill : nullptr);
if(!res) return res;
}
return ReadedData<VType>(N, start, count, csize.data(), std::move(cdata));
}
Error GetChunk(const MString& var, const std::vector<size_t>& chunkind, size_t chunksize, size_t elsize, void* data, const void* fill) const;
public:
Error Open(const MString& product, const MString& dataset, bool time = true);
};
using Zarr = NcZarrRead<ZarrFunctions>;