Browse Source

Rewrite NEMO source for using new zarr data

lintest
Michael Uleysky 5 months ago
parent
commit
5ca27193f7
  1. 207
      include/layereddataz.h
  2. 10
      include/ncfuncs.h
  3. 6
      sources/NEMO.h
  4. 298
      src/layereddataz.cpp
  5. 104
      src/ncfuncs.cpp

207
include/layereddataz.h

@ -0,0 +1,207 @@
#pragma once
#include "gsw.h"
#include "ncfuncs.h"
#include "simple2ddata.h"
#include "zarr.h"
#include <memory>
using michlib::Ceil;
using michlib::DetGeoDomain;
using michlib::Floor;
using michlib::GPL;
using michlib::int2;
class LayeredDataZ: public NCFuncs
{
public:
using Data = Simple2DData;
private:
class NC: public Zarr
{
std::vector<MDateTime> times;
public:
Error ReadTimes(const MString& tname)
{
static const MString pref = "LayeredDataZ::NC::ReadTimes";
if(!*this) return Error(pref, "Dataset not open");
std::vector<int8> time;
{
auto ret = Read(tname, time);
if(!ret) return ret.Add(pref, "Can't read time");
}
MDateTime refdate;
time_t step = 0;
{
auto units = AttString(tname, "units");
if(!units.Exist()) return Error(pref, "Can't read refdate");
auto [rd, st, suc] = Refdate(units);
if(!suc) return Error(pref, "Can't parse " + units + " to refdate");
if(st == 0) return Error(pref, "Can't get timestep from string " + units);
refdate = rd;
step = st;
}
times.resize(time.size());
for(size_t i = 0; i < times.size(); i++) times[i] = refdate + static_cast<time_t>(time[i]) * step;
return Error();
}
MDateTime Begin() const { return times.front(); }
MDateTime End() const { return times.back(); }
const std::vector<MDateTime>& Times() const { return times; }
size_t Index(MDateTime tm) const
{
if(tm < Begin() || tm > End()) return 0;
size_t b = 0, e = times.size() - 1;
if(tm == times[b]) return b + 1;
if(tm == times[e]) return e + 1;
while(e - b > 1)
{
size_t c = (e + b) / 2;
if(tm == times[c]) return c + 1;
if(tm > times[c])
b = c;
else
e = c;
}
return 0;
}
};
std::vector<NC> nc;
std::vector<real> depths;
bool depthinv;
std::vector<MDateTime> times;
struct CoordNames dname;
real lonb, latb, lone, late;
real lonstep, latstep;
MString title;
class EnvVar
{
MString name, oldvalue;
bool activated, saved;
public:
EnvVar(): activated(false) {}
~EnvVar() { Deactivate(); }
void Activate(const MString& var, const MString& val)
{
if(activated) Deactivate();
name = var;
char* curval = getenv(name.Buf());
if(nullptr == curval)
saved = false;
else
{
oldvalue = curval;
saved = true;
}
setenv(name.Buf(), val.Buf(), 1);
}
void Deactivate()
{
if(!activated) return;
if(saved)
setenv(name.Buf(), oldvalue.Buf(), 1);
else
unsetenv(name.Buf());
activated = false;
}
};
EnvVar proxy;
protected:
struct Parameters: public BaseParameters
{
size_t xb, yb, xe, ye, layer;
virtual ~Parameters() override = default;
};
// TODO: RetVal
MString Open(const MString& dataset);
void SetTitle(const MString& newtitle) { title = newtitle; }
public:
MString Info() const;
std::pair<const BaseParameters*, MString> Parameters(michlib_internal::ParameterListEx& pars, const CLArgs& args, const struct Region& reg) const;
bool Read(const MString& vname, std::map<MString, Data>& cache, const BaseParameters* ip, size_t i) const;
bool isOk() const { return nc.size() > 0; }
explicit operator bool() const { return nc.size() > 0; }
real Depth(size_t l) const { return isOk() ? depths[l] : -1000.0; }
real Depth(const BaseParameters* ip) const { return Depth(dynamic_cast<const struct Parameters*>(ip)->layer); }
real Lon(size_t ix) const { return isOk() ? (lonb + ix * lonstep) : -1000.0; }
real Lat(size_t iy) const { return isOk() ? (latb + iy * latstep) : -1000.0; }
size_t NDepths() const { return depths.size(); }
size_t NTimes() const { return times.size(); }
MDateTime Time(size_t i) const
{
if(!isOk() || i >= times.size()) return MDateTime();
return times[i];
}
time_t Timestep() const { return isOk() ? (times[1] - times[0]).Seconds() : 0; }
MString Title() const { return title; }
MString Dump(const struct Parameters* ppar) const
{
// clang-format off
return
"Current settings:\n" + MString() +
" Longitudes: from " + Lon(ppar->xb) + " (" + ppar->xb + ") to "+ Lon(ppar->xe) + " (" + ppar->xe + ")\n" +
" Latitudes: from " + Lat(ppar->yb) + " (" + ppar->yb + ") to "+ Lat(ppar->ye) + " (" + ppar->ye + ")\n" +
" Depth: layer " + ppar->layer + ", depth " + Depth(ppar->layer) + " m\n";
// clang-format on
}
VarPresence CheckVar(const MString& vname) const
{
return NCFuncs::CheckVar(vname, [this](const MString& vn) { return HaveVar(vn); });
}
private:
template<class DataType> Data ReadVarRaw(const NC& f, const MString& name, size_t i, bool nodepth, const struct Parameters* p) const;
bool HaveVar(const MString& vname) const
{
for(size_t i = 0; i < nc.size(); i++)
if(NCFuncs::HaveVar(nc[i], vname)) return true;
return false;
}
std::tuple<MString, size_t, size_t> VarNameLoc(const MString vname, MDateTime tm) const
{
for(size_t i = 0; i < nc.size(); i++)
{
auto tind = nc[i].Index(tm);
if(tind == 0) continue;
for(const auto& v: nc[i].Vars())
{
auto stname = nc[i].AttString(v.Name(), "standard_name");
if(!stname.Exist()) continue;
if(StName2Name(stname) == vname) return {v.Name(), i, tind - 1};
}
}
return {"", 0, 0};
}
};

10
include/ncfuncs.h

@ -2,6 +2,8 @@
#include "DataAdapters/ncfilealt.h"
#include "basedata.h"
#include "mdatetime.h"
#include "ncsimple.h"
#include "zarr.h"
#include <map>
#include <set>
#include <tuple>
@ -27,7 +29,13 @@ class NCFuncs
static CoordNames GetCNames(const NCFileA& nc);
static CoordNames GetDNames(const NCFileA& nc);
static bool HaveVar(const NCFileA& nc, const MString& vname);
template<class HV> static VarPresence CheckVar(const MString& vname, HV hv)
template<class NcZarrFunctions> static void GetVars(const NcZarrRead<NcZarrFunctions>& nc, std::set<MString>& vars);
template<class NcZarrFunctions> static CoordNames GetCNames(const NcZarrRead<NcZarrFunctions>& nc);
template<class NcZarrFunctions> static CoordNames GetDNames(const NcZarrRead<NcZarrFunctions>& nc);
template<class NcZarrFunctions> static bool HaveVar(const NcZarrRead<NcZarrFunctions>& nc, const MString& vname);
template<class HV> static VarPresence CheckVar(const MString& vname, HV hv)
{
if(!hv(vname))
{

6
sources/NEMO.h

@ -1,7 +1,7 @@
#pragma once
#include "layereddata.h"
#include "layereddataz.h"
class NEMOData: public LayeredData
class NEMOData: public LayeredDataZ
{
enum Type
{
@ -101,6 +101,6 @@ class NEMOData: public LayeredData
return "Unknown dataset: " + dataset;
SetTitle(DataTitle());
return LayeredData::Open(dataset);
return LayeredDataZ::Open(dataset);
}
};

298
src/layereddataz.cpp

@ -0,0 +1,298 @@
#define MICHLIB_NOSOURCE
#include "layereddataz.h"
MString LayeredDataZ::Info() const
{
if(!isOk()) return "";
MString d;
for(size_t i = 0; i < NDepths(); i++) d += MString(" ") + "(" + i + " " + Depth(i) + ")";
std::set<MString> vars;
for(const auto& f: nc) GetVars(f, vars);
MString svars;
{
bool first = true;
for(const auto& v: vars)
{
svars += (first ? "" : ", ") + v;
first = false;
}
}
// clang-format off
return
"Dataset: " + Title() + "\n" +
" Begin date: " + Time(0).ToString() + "\n" +
" End date: " + Time(NTimes()-1).ToString() + "\n" +
" Time step: " + Timestep() + " seconds\n" +
" Time moments: " + NTimes() + "\n" +
" Region: (" + lonb + " : " + lone + ") x (" + latb + " : " + late + ")\n" +
" Grid: " + dname.nx + "x" + dname.ny + " (" + lonstep + " x " + latstep + ")\n" +
" Depths:" + d + "\n" +
" Supported variables: " + svars;
// clang-format on
}
MString LayeredDataZ::Open(const MString& dataset)
{
nc.clear();
MString proxyurl = GPL.ParameterSValue("USEPROXY", "");
if(proxyurl.Exist()) proxy.Activate("all_proxy", proxyurl);
nc.clear();
size_t i = 1;
while(true)
{
MString url = GPL.ParameterSValue(dataset + "_URL" + i, "");
if(url.Exist())
{
// Split url on product and dataset
auto words = url.Split(":");
if(words.size() == 0 || words.size() > 2)
{
nc.clear();
return "Invalid url " + url;
}
MString product = words[0];
MString dataset = words.size() == 2 ? words[1] : "";
nc.emplace_back();
{
auto ret = nc.back().Open(product, dataset);
if(!ret)
{
nc.clear();
return "Can't open " + dataset + " of " + product;
}
}
}
else
break;
i++;
}
if(nc.size() == 0) return "No urls for dataset " + dataset + " specified in config";
dname = GetDNames(nc[0]);
if(!(dname.lonname.Exist() && dname.latname.Exist()))
{
nc.clear();
return "Can't find longitude/latitude";
}
if(!dname.timename.Exist())
{
nc.clear();
return "Can't find time";
}
auto cn = GetCNames(nc[0]);
// Read times
for(auto& f: nc)
{
auto ret = f.ReadTimes(cn.timename);
if(!ret)
{
nc.clear();
return "Can't read times";
}
times.insert(times.end(), f.Times().begin(), f.Times().end());
}
std::sort(times.begin(), times.end());
auto last = std::unique(times.begin(), times.end());
times.erase(last, times.end());
depthinv = false;
if(cn.depthname.Exist())
{
auto ret = nc[0].Read(cn.depthname, depths);
if(!ret)
{
nc.clear();
return "Can't read depths";
}
if(depths.back() <= 0 && depths.front() <= 0) std::ranges::transform(depths, depths.begin(), std::negate{});
if(depths.back() < depths.front() && depths.size() > 1)
{
depthinv = true;
for(size_t i = 0; i < depths.size() - i - 1; i++) std::swap(depths[i], depths[depths.size() - i - 1]);
}
}
else // Surface only data
{
depths.resize(1);
depths[0] = 0;
}
std::vector<double> lons, lats;
{
auto ret = nc[0].Read(cn.lonname, lons);
if(!ret)
{
nc.clear();
return "Can't get longitudes";
}
}
{
auto ret = nc[0].Read(cn.latname, lats);
if(!ret)
{
nc.clear();
return "Can't get latitudes";
}
}
lonb = lons[0];
latb = lats[0];
lone = lons.back();
late = lats.back();
lonstep = (lone - lonb) / (dname.nx - 1);
latstep = (late - latb) / (dname.ny - 1);
return "";
}
std::pair<const BaseParameters*, MString> LayeredDataZ::Parameters(michlib_internal::ParameterListEx& pars, const CLArgs& args, const struct Region& reg) const
{
std::unique_ptr<struct Parameters> ppar{new struct Parameters};
ppar->layer = args.contains("layer") ? args.at("layer").ToInteger<size_t>() : 0;
if(!args.contains("depth") && ppar->layer >= NDepths()) return {nullptr, MString("Layer ") + ppar->layer + " is too deep!"};
real depth = args.contains("depth") ? args.at("depth").ToReal() : Depth(ppar->layer);
{
auto dom = DetGeoDomain(lonb, lone);
real lon1 = ToGeoDomain(reg.lonb, dom);
real lon2 = ToGeoDomain(reg.lone, dom);
real lat1 = reg.latb;
real lat2 = reg.late;
bool global = lone - lonb + 1.5 * lonstep > 360.0;
// Special case when the longitude lies in a small sector between the end and the start
if(global)
{
if(lon1 < lonb) lon1 = lone;
if(lon2 > lone) lon2 = lonb;
}
else
{
if(lon1 < lonb) lon1 = lonb;
if(lon2 > lone) lon2 = lone;
}
ppar->xb = static_cast<size_t>(Floor((lon1 - lonb) / lonstep));
ppar->xe = static_cast<size_t>(Ceil((lon2 - lonb) / lonstep));
if(ppar->xb == ppar->xe) return {nullptr, "Lonb must be not equal late"};
if(!global && ppar->xb > ppar->xe) return {nullptr, "Lonb must be lesser then lone"};
ppar->yb = static_cast<size_t>(Floor((lat1 - latb) / latstep));
ppar->ye = static_cast<size_t>(Ceil((lat2 - latb) / latstep));
if(ppar->ye > dname.ny - 1) ppar->ye = dname.ny - 1;
if(ppar->yb >= ppar->ye) return {nullptr, "Latb must be lesser then late"};
if(depth < 0.0 || depth > depths.back())
ppar->layer = (depth < 0.0) ? 0 : (depths.size() - 1);
else
for(size_t i = 0; i < depths.size() - 1; i++)
{
if(depth >= depths[i] && depth <= depths[i + 1])
{
ppar->layer = (depth - depths[i] <= depths[i + 1] - depth) ? i : (i + 1);
break;
}
}
if(depthinv) ppar->layer = depths.size() - ppar->layer - 1;
}
pars.SetParameter("depth", Depth(ppar->layer));
pars.SetParameter("layer", ppar->layer);
pars.SetParameter("dataset", Title());
pars.SetParameter("lonb", Lon(ppar->xb));
pars.SetParameter("latb", Lat(ppar->yb));
pars.SetParameter("lone", Lon(ppar->xe));
pars.SetParameter("late", Lat(ppar->ye));
return {ppar.release(), ""};
}
bool LayeredDataZ::Read(const MString& vname, std::map<MString, LayeredDataZ::Data>& cache, const BaseParameters* ip, size_t i) const
{
if(cache.contains(vname)) return true;
if(!isOk()) return false;
auto p = dynamic_cast<const struct Parameters*>(ip);
auto [name, id, tid] = VarNameLoc(vname, times[i]);
if(!name.Exist()) // Conversion read
return TransformationRead(this, vname, cache, ip, i);
// Direct read
bool nodepth = false;
Data data;
//auto head = nc[id]->Header();
for(const auto& v: nc[id].Vars())
if(v.Name() == name)
{
if(v.NDim() == 3) nodepth = true;
if(v.Type() == NcZarrTypes::VarType::INT2) data = ReadVarRaw<int2>(nc[id], name, tid, nodepth, p);
if(v.Type() == NcZarrTypes::VarType::INT4) data = ReadVarRaw<int>(nc[id], name, tid, nodepth, p);
if(v.Type() == NcZarrTypes::VarType::FLOAT) data = ReadVarRaw<float>(nc[id], name, tid, nodepth, p);
if(v.Type() == NcZarrTypes::VarType::DOUBLE) data = ReadVarRaw<double>(nc[id], name, tid, nodepth, p);
if(data)
{
cache[vname] = std::move(data);
return true;
}
}
return false;
}
template<class DataType> LayeredDataZ::Data LayeredDataZ::ReadVarRaw(const NC& f, const MString& name, size_t i, bool nodepth, const struct LayeredDataZ::Parameters* p) const
{
real unitmul = 1.0;
//DataType fill;
real offset = 0.0, scale = 1.0;
if(f.HasAtt(name, "add_offset")) offset = f.AttReal(name, "add_offset");
if(f.HasAtt(name, "scale_factor")) scale = f.AttReal(name, "scale_factor");
MString unit;
if(f.HasAtt(name, "units")) unit = f.AttString(name, "units");
if(unit == "m s-1" || unit == "m/s")
{
unitmul = 100.0;
unit = "cm/s";
}
Data data((p->xb < p->xe) ? (p->xe - p->xb + 1) : (dname.nx + p->xe - p->xb + 1), p->ye - p->yb + 1, Lon(p->xb), Lat(p->yb), lonstep, latstep, std::move(unit));
auto trans = [scale, offset, unitmul](auto raw) -> DataType { return (raw * scale + offset) * unitmul; };
if(p->xb < p->xe)
{
auto ret = nodepth ? f.Read(name, data, trans, {dname.lonname, p->xb, p->xe - p->xb + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1})
: f.Read(name, data, trans, {dname.lonname, p->xb, p->xe - p->xb + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1},
{dname.depthname, p->layer, 1});
if(!ret) return Data();
}
else
{
{
auto ret = nodepth ? f.Read(name, data, trans, {dname.lonname, p->xb, dname.nx - p->xb + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1})
: f.Read(name, data, trans, {dname.lonname, p->xb, dname.nx - p->xb + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1},
{dname.depthname, p->layer, 1});
if(!ret) return Data();
}
{
size_t shift = dname.nx - p->xb + 1;
auto shifteddata = [&data, shift](size_t ix, size_t iy) -> real& { return data(ix + shift, iy); };
auto ret =
nodepth ? f.Read(name, shifteddata, trans, {dname.lonname, 0, p->xe + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1})
: f.Read(name, shifteddata, trans, {dname.lonname, 0, p->xe + 1}, {dname.latname, p->yb, p->ye - p->yb + 1}, {dname.timename, i, 1}, {dname.depthname, p->layer, 1});
if(!ret) return Data();
}
}
return data;
}

104
src/ncfuncs.cpp

@ -31,6 +31,37 @@ NCFuncs::CoordNames NCFuncs::GetDNames(const NCFileA& nc)
return out;
}
template<class NcZarrFunctions> NCFuncs::CoordNames NCFuncs::GetDNames(const NcZarrRead<NcZarrFunctions>& nc)
{
CoordNames out;
for(const auto& dim: nc.Dims())
{
if(dim.Name() == "lon" || dim.Name() == "longitude")
{
out.lonname = dim.Name();
out.nx = dim.Size();
}
if(dim.Name() == "lat" || dim.Name() == "latitude")
{
out.latname = dim.Name();
out.ny = dim.Size();
}
if(dim.Name() == "depth" || dim.Name() == "elevation")
{
out.depthname = dim.Name();
out.nz = dim.Size();
}
if(dim.Name() == "time")
{
out.timename = dim.Name();
out.nt = dim.Size();
}
}
return out;
}
template NCFuncs::CoordNames NCFuncs::GetDNames<ZarrFunctions>(const NcZarrRead<ZarrFunctions>&);
NCFuncs::CoordNames NCFuncs::GetCNames(const NCFileA& nc)
{
CoordNames out;
@ -64,6 +95,46 @@ NCFuncs::CoordNames NCFuncs::GetCNames(const NCFileA& nc)
return out;
}
template<class NcZarrFunctions> NCFuncs::CoordNames NCFuncs::GetCNames(const NcZarrRead<NcZarrFunctions>& nc)
{
CoordNames out;
for(const auto& v: nc.Vars()) // Try to define coordinates by attribute standard_name or attribute axis
{
auto havestname = nc.HasAtt(v.Name(), "standard_name");
auto haveaxis = nc.HasAtt(v.Name(), "axis");
if(!(havestname || haveaxis)) continue;
auto stname = nc.AttString(v.Name(), "standard_name");
auto axis = nc.AttString(v.Name(), "axis");
bool islon = false, islat = false, isdepth = false, istime = false;
if(stname == "longitude") islon = true;
if(stname == "latitude") islat = true;
if(stname == "depth") isdepth = true;
if(stname == "time") istime = true;
if(!out.lonname.Exist() && axis == "X") islon = true;
if(!out.latname.Exist() && axis == "Y") islat = true;
if(!out.depthname.Exist() && axis == "Z") isdepth = true;
if(!out.timename.Exist() && axis == "T") istime = true;
if(islon) out.lonname = v.Name();
if(islat) out.latname = v.Name();
if(isdepth) out.depthname = v.Name();
if(istime) out.timename = v.Name();
if(islon) out.nx = v.Dims().size();
if(islat) out.ny = v.Dims().size();
if(isdepth) out.nz = v.Dims().size();
if(istime) out.nt = v.Dims().size();
}
// If time not found just check variable "time"
if(!out.timename.Exist() && nc.HasVar("time")) out.timename = "time";
return out;
}
template NCFuncs::CoordNames NCFuncs::GetCNames<ZarrFunctions>(const NcZarrRead<ZarrFunctions>&);
void NCFuncs::GetVars(const NCFileA& nc, std::set<MString>& vars)
{
auto head = nc.Header();
@ -83,6 +154,26 @@ void NCFuncs::GetVars(const NCFileA& nc, std::set<MString>& vars)
if(vars.contains("ssh")) vars.emplace("vgeo");
}
template<class NcZarrFunctions> void NCFuncs::GetVars(const NcZarrRead<NcZarrFunctions>& nc, std::set<MString>& vars)
{
for(const auto& v: nc.Vars())
{
if(!nc.HasAtt(v.Name(), "standard_name")) continue;
auto ret = nc.AttString(v.Name(), "standard_name");
if(StName2Name(ret).Exist()) vars.emplace(StName2Name(ret));
}
if((vars.contains("ptemp") || vars.contains("temp")) && vars.contains("sal")) vars.emplace("pdens");
if(vars.contains("ptemp") && vars.contains("sal")) vars.emplace("temp");
if(vars.contains("temp") && vars.contains("sal")) vars.emplace("ptemp");
if(vars.contains("u") && vars.contains("v")) vars.emplace("U");
if(vars.contains("u") && vars.contains("v")) vars.emplace("U2");
if(vars.contains("ssh")) vars.emplace("ugeo");
if(vars.contains("ssh")) vars.emplace("vgeo");
}
template void NCFuncs::GetVars<ZarrFunctions>(const NcZarrRead<ZarrFunctions>&, std::set<MString>&);
std::tuple<MDateTime, time_t, bool> NCFuncs::Refdate(const MString& refdate)
{
MDateTime out;
@ -182,3 +273,16 @@ bool NCFuncs::HaveVar(const NCFileA& nc, const MString& vname)
}
return false;
}
template<class NcZarrFunctions> bool NCFuncs::HaveVar(const NcZarrRead<NcZarrFunctions>& nc, const MString& vname)
{
for(const auto& v: nc.Vars())
{
if(!nc.HasAtt(v.Name(), "standard_name")) continue;
auto stname = nc.AttString(v.Name(), "standard_name");
if(StName2Name(stname) == vname) return true;
}
return false;
}
template bool NCFuncs::HaveVar<ZarrFunctions>(const NcZarrRead<ZarrFunctions>&, const MString&);

Loading…
Cancel
Save