Compare commits

...

13 Commits

  1. 2
      CMakeLists.txt
  2. 379
      include/cache.h
  3. 4
      include/mirrorfuncs.h
  4. 70
      include/ncfilew.h
  5. 3
      include/zarr.h
  6. 27
      sources/COPERNICUS.cpp
  7. 4
      sources/NEMO.h
  8. 3
      src/CMakeLists.txt
  9. 254
      src/cache.cpp
  10. 32
      src/copcat.cpp
  11. 6
      src/mirrorfuncs.cpp
  12. 1
      src/zarr.cpp

2
CMakeLists.txt

@ -58,7 +58,7 @@ endif()
add_compile_options(-Wno-deprecated-declarations)
CHECK_CXX_COMPILER_FLAG(-Wno-class-memaccess COMPILER_SUPPORTS_CLASSMEMACCESS)
if(COMPILER_SUPPORTS_CLASSMEMACCESS)
add_compile_options(-Wno-class-memaccess)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Wno-class-memaccess>)
endif()
add_library(teos STATIC GSW-C/gsw_oceanographic_toolbox.c GSW-C/gsw_saar.c)

379
include/cache.h

@ -1,14 +1,153 @@
#pragma once
#include "merrors.h"
#include "GPL.h"
#include "mirrorfuncs.h"
#include <functional>
#include <libpq-fe.h>
#include <optional>
#include <sqlite3.h>
#include <time.h>
#include <variant>
using michlib::GPL;
using michlib::int1;
using michlib::int4;
using michlib::int_cast;
using michlib::MString;
using michlib::pointer_cast;
class SQLiteConnection
{
public:
using DBType = sqlite3*;
using FuncType = std::function<void(DBType)>;
private:
static DBType db;
static size_t count;
static std::vector<FuncType> destructs;
public:
SQLiteConnection()
{
count++;
if(db == nullptr)
{
MString oldprefix = GPL.UsePrefix("SQLITE");
MString name = GPL.ParameterSValue("db", "");
GPL.UsePrefix(oldprefix);
auto ret = sqlite3_open(name.Buf(), &db);
if(ret != SQLITE_OK)
{
sqlite3_close(db);
db = nullptr;
}
}
}
SQLiteConnection([[maybe_unused]] const SQLiteConnection& sq): SQLiteConnection() {}
SQLiteConnection(SQLiteConnection&&) = default;
SQLiteConnection& operator=([[maybe_unused]] const SQLiteConnection& sq)
{
*this = {};
return *this;
}
SQLiteConnection& operator=(SQLiteConnection&&) = default;
~SQLiteConnection()
{
if(count == 0) michlib::errmessage("Destructor of SQLiteConnection called on count==0");
if(count > 1)
count--;
else
{
count = 0;
if(db != nullptr)
{
for(const auto& f: destructs) f(db);
sqlite3_close(db);
}
db = nullptr;
}
}
static void AddDestructor(FuncType&& f) { destructs.emplace_back(std::move(f)); }
operator DBType() const { return db; }
static DBType GetDB() { return db; }
explicit operator bool() const { return db != nullptr; }
};
class PostgreSQLConnection
{
public:
using DBType = PGconn*;
using FuncType = std::function<void(DBType)>;
private:
static DBType conn;
static size_t count;
static std::vector<FuncType> destructs;
public:
PostgreSQLConnection()
{
count++;
if(conn == nullptr)
{
MString oldprefix = GPL.UsePrefix("POSTGRES");
MString name = GPL.ParameterSValue("connection", "");
GPL.UsePrefix(oldprefix);
conn = PQconnectdb(name.Buf());
if(PQstatus(conn) != CONNECTION_OK)
{
michlib::errmessage(PQerrorMessage(conn));
PQfinish(conn);
conn = nullptr;
}
}
}
PostgreSQLConnection([[maybe_unused]] const PostgreSQLConnection& pq): PostgreSQLConnection() {}
PostgreSQLConnection(PostgreSQLConnection&&) = default;
PostgreSQLConnection& operator=([[maybe_unused]] const PostgreSQLConnection& pq)
{
*this = {};
return *this;
}
PostgreSQLConnection& operator=(PostgreSQLConnection&&) = default;
~PostgreSQLConnection()
{
if(count == 0) michlib::errmessage("Destructor of PostgreSQLConnection called on count==0");
if(count > 1)
count--;
else
{
count = 0;
if(conn != nullptr)
{
for(const auto& f: destructs) f(conn);
PQfinish(conn);
}
conn = nullptr;
}
}
static void AddDestructor(FuncType&& f) { destructs.emplace_back(std::move(f)); }
operator DBType() const { return conn; }
static DBType GetDB() { return conn; }
explicit operator bool() const { return conn != nullptr; }
};
class GenericCache
{
public:
@ -27,44 +166,50 @@ class FakeCache: public GenericCache
class SQLiteCache: public GenericCache
{
sqlite3* db = nullptr;
static bool regdest;
SQLiteConnection db;
public:
bool Init(const MString& name)
bool Init()
{
Close();
auto ret = sqlite3_open(name.Buf(), &db);
if(ret != SQLITE_OK)
{
Close();
return false;
}
// Create table
sqlite3_stmt* sqst;
int i;
i = sqlite3_prepare_v2(db,
"CREATE TABLE IF NOT EXISTS `cache`('key' TEXT PRIMARY KEY ON CONFLICT REPLACE NOT NULL ON CONFLICT FAIL, 'value' BLOB NOT NULL ON CONFLICT FAIL, "
"'exptime' INTEGER NOT NULL ON CONFLICT FAIL) WITHOUT ROWID, STRICT;",
-1, &sqst, 0);
i = sqlite3_step(sqst);
if(i != SQLITE_DONE)
if(!db) return false;
if(!regdest)
{
// Create table
sqlite3_stmt* sqst;
int i;
i = sqlite3_prepare_v2(db,
"CREATE TABLE IF NOT EXISTS `cache`('key' TEXT PRIMARY KEY ON CONFLICT REPLACE NOT NULL ON CONFLICT FAIL, 'value' BLOB NOT NULL ON CONFLICT FAIL, "
"'exptime' INTEGER NOT NULL ON CONFLICT FAIL) WITHOUT ROWID, STRICT;",
-1, &sqst, 0);
i = sqlite3_step(sqst);
if(i != SQLITE_DONE)
{
sqlite3_finalize(sqst);
return false;
}
sqlite3_finalize(sqst);
Close();
return false;
sqlite3_busy_timeout(db, 1000);
db.AddDestructor(
[](SQLiteConnection::DBType db)
{
sqlite3_stmt* sqst = nullptr;
int i = SQLITE_OK;
if(i == SQLITE_OK) i = sqlite3_prepare_v2(db, "DELETE from `cache` WHERE exptime<?1;", -1, &sqst, 0);
if(i == SQLITE_OK) i = sqlite3_bind_int64(sqst, 1, time(nullptr));
if(i == SQLITE_OK) i = sqlite3_step(sqst);
sqlite3_finalize(sqst);
});
regdest = true;
}
sqlite3_finalize(sqst);
sqlite3_busy_timeout(db, 1000);
return true;
}
void Close()
{
if(db != nullptr) sqlite3_close(db);
db = nullptr;
}
virtual bool Put(const MString& key, const MString& value, size_t ttl) const override
{
if(!*this) return false;
@ -107,26 +252,39 @@ class SQLiteCache: public GenericCache
return {"", false};
}
virtual ~SQLiteCache() override
{
if(!*this) return;
sqlite3_stmt* sqst = nullptr;
int i = SQLITE_OK;
if(i == SQLITE_OK) i = sqlite3_prepare_v2(db, "DELETE from `cache` WHERE exptime<?1;", -1, &sqst, 0);
if(i == SQLITE_OK) i = sqlite3_bind_int64(sqst, 1, time(nullptr));
if(i == SQLITE_OK) i = sqlite3_step(sqst);
sqlite3_finalize(sqst);
sqlite3_close_v2(db);
}
virtual ~SQLiteCache() override = default;
explicit operator bool() const { return db != nullptr; }
};
class PostgreSQLCache: public GenericCache
class PostgreSQLHelpers
{
PGconn* conn = nullptr;
static constexpr time_t postgresepoch = 946648800;
class PGResultRAIIDT
{
public:
// TODO: make static
void operator()(PGresult* res) { PQclear(res); }
};
protected:
PostgreSQLConnection conn;
// Convert Postgres binary representation of timestamp to Unix epoch seconds. Microseconds ignored
static time_t raw2epoch(time_t raw) { return Invert(raw) / 1000000 + postgresepoch; }
// Convert Unix epoch time to Postres binary representation
static time_t epoch2raw(time_t epoch) { return Invert((epoch - postgresepoch) * 1000000); }
class PGresultRAII: public std::unique_ptr<PGresult, PGResultRAIIDT>
{
public:
PGresultRAII() = default;
PGresultRAII(PGresult* res): std::unique_ptr<PGresult, PGResultRAIIDT>(res) {}
operator PGresult*() const { return get(); }
};
bool CheckCon() const
{
@ -139,50 +297,54 @@ class PostgreSQLCache: public GenericCache
template<class D> static D Invert(D d)
{
using michlib::int1;
if(sizeof(D) <= 1) return d;
D out;
int1* pout = pointer_cast<int1*>(&out);
int1* pin = pointer_cast<int1*>(&d);
for(size_t i = 0; i < sizeof(D); i++) pout[sizeof(D) - i - 1] = pin[i];
D out;
Invert(&d, &out, sizeof(D));
return out;
}
static void Invert(const void* src, void* dst, size_t sz)
{
if(sz == 0) return;
const int1* pin = pointer_cast<const int1*>(src);
int1* pout = pointer_cast<int1*>(dst);
for(size_t i = 0; i < sz; i++) pout[sz - i - 1] = pin[i];
}
public:
explicit operator bool() const { return conn != nullptr; }
};
class PostgreSQLCache: public GenericCache, public PostgreSQLHelpers
{
static bool regdest;
public:
bool Init(const MString& name)
bool Init()
{
Close();
conn = PQconnectdb(name.Buf());
if(!conn) return false;
if(PQstatus(conn) != CONNECTION_OK)
if(!regdest)
{
michlib::errmessage(PQerrorMessage(conn));
Close();
return false;
}
PGresultRAII res;
// Create table
if(false)
{
auto* res = PQexec(conn, "CREATE TABLE IF NOT EXISTS cache(key TEXT PRIMARY KEY NOT NULL, value BYTEA, exptime TIMESTAMP(0) NOT NULL);");
// Create table
res = PQexec(conn, "SET client_min_messages=WARNING;");
res = PQexec(conn, "CREATE TABLE IF NOT EXISTS cache(key TEXT PRIMARY KEY NOT NULL, value BYTEA, exptime TIMESTAMP(0) NOT NULL);");
if(PQresultStatus(res) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
PQclear(res);
Close();
}
else
PQclear(res);
res = PQexec(conn, "SET client_min_messages=NOTICE;");
conn.AddDestructor([](PostgreSQLConnection::DBType conn) { PGresultRAII res = PQexec(conn, "DELETE FROM cache WHERE exptime<localtimestamp;"); });
regdest = true;
}
return true;
}
void Close()
{
if(conn != nullptr) PQfinish(conn);
conn = nullptr;
}
virtual bool Put(const MString& key, const MString& value, size_t ttl) const override
{
if(!CheckCon()) return false;
@ -190,21 +352,19 @@ class PostgreSQLCache: public GenericCache
auto interval = michlib::int_cast<michlib::int8>(ttl);
michlib::int8 rinterval = Invert(interval);
const char* params[] = {key.Buf(), value.Buf(), pointer_cast<const char*>(&rinterval)};
int plens[] = {int_cast<int>(key.Len()), int_cast<int>(value.Len()), 8};
int plens[] = {int_cast<int>(key.Len()), int_cast<int>(value.Len()), sizeof(rinterval)};
int pfor[] = {0, 1, 1};
auto* res = PQexecParams(conn,
"INSERT INTO cache(key,value,exptime) VALUES($1,$2,localtimestamp + ($3::bigint ||' seconds')::interval)"
"ON CONFLICT(key) DO UPDATE SET value=EXCLUDED.value, exptime=EXCLUDED.exptime;",
3, nullptr, params, plens, pfor, 1);
PGresultRAII res = PQexecParams(conn,
"INSERT INTO cache(key,value,exptime) VALUES($1,$2,localtimestamp + ($3::bigint ||' seconds')::interval)"
"ON CONFLICT(key) DO UPDATE SET value=EXCLUDED.value, exptime=EXCLUDED.exptime;",
3, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
PQclear(res);
return false;
}
PQclear(res);
return true;
}
@ -216,63 +376,72 @@ class PostgreSQLCache: public GenericCache
int plens[] = {int_cast<int>(key.Len())};
int pfor[] = {0};
auto* res = PQexecParams(conn, "SELECT value from cache WHERE key=$1::text AND exptime>localtimestamp;", 1, nullptr, params, plens, pfor, 1);
PGresultRAII res = PQexecParams(conn, "SELECT value from cache WHERE key=$1::text AND exptime>localtimestamp;", 1, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_TUPLES_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
PQclear(res);
return {"", false};
}
else if(PQntuples(res) == 0)
{
PQclear(res);
return {"", false};
}
MString val(PQgetvalue(res, 0, 0), PQgetlength(res, 0, 0));
PQclear(res);
return {std::move(val), true};
}
virtual ~PostgreSQLCache() override
{
if(!CheckCon()) return;
auto* res = PQexec(conn, "DELETE FROM cache WHERE exptime<localtimestamp;");
PQclear(res);
Close();
}
explicit operator bool() const { return conn != nullptr; }
virtual ~PostgreSQLCache() override = default;
};
inline GenericCache* CreateCache(const MString& cachedesc)
{
auto i = cachedesc.GetPos(':');
if(i == 0)
{
if(cachedesc == "no") return new FakeCache;
return nullptr;
}
auto i = cachedesc.GetPos(':');
auto name = i == 0 ? cachedesc : cachedesc.SubStr(1, i - 1);
auto par = i == 0 ? "" : cachedesc.SubStr(i + 1, cachedesc.Len() - i);
auto name = cachedesc.SubStr(1, i - 1);
auto par = cachedesc.SubStr(i + 1, cachedesc.Len() - i);
if(name == "no") return new FakeCache;
if(name == "sqlite")
{
auto ret = new SQLiteCache;
ret->Init(par);
ret->Init();
if(*ret) return ret;
delete ret;
}
if(name == "postgre" || name == "postgres" || name == "postgresql")
{
auto ret = new PostgreSQLCache;
ret->Init(par);
ret->Init();
if(*ret) return ret;
delete ret;
}
return nullptr;
}
class FileInfoCache: public PostgreSQLHelpers
{
public:
using DataType = std::optional<MString>;
using CallbackType = std::function<DataType(const MString&)>;
private:
static bool regdest;
CallbackType readfunc;
MString dir;
int4 dirid;
FileInfoCache() = delete;
CallbackType::result_type GetData(const MString& fname) const { return readfunc(dir + "/" + fname); }
void GetDirId();
public:
FileInfoCache(CallbackType&& readfunc_, const MString& dir_);
Error UpdateCache(bool force = false) const;
DataType GetInfo(const MString& name) const;
};

4
include/mirrorfuncs.h

@ -7,9 +7,9 @@
#include <sys/types.h>
#include <vector>
using michlib::Error;
using michlib::MDateTime;
using michlib::RetVal;
using michlib::Error;
class DIRRAIIDT
{
@ -52,7 +52,7 @@ inline MString FileName(const MString& name)
bool MakePath(const MString& dname);
// Get local file list
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const MString& path = "");
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const bool nofollow = true, const MString& path = "");
// Download file to the local mirror
Error DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const MString& root);

70
include/ncfilew.h

@ -280,9 +280,7 @@ class NCFileWBase
template<class T>
requires requires(int nc, int vid, const T* d) {
{
NCTypeD<T, void>::put_var(nc, vid, d)
} -> std::same_as<int>;
{ NCTypeD<T, void>::put_var(nc, vid, d) } -> std::same_as<int>;
}
void WriteVar(const MString& vname, const T* data)
{
@ -297,9 +295,7 @@ class NCFileWBase
template<class T>
requires requires(int nc, int vid, const size_t* start, const size_t* count, const T* d) {
{
NCTypeD<T, void>::put_vara(nc, vid, start, count, d)
} -> std::same_as<int>;
{ NCTypeD<T, void>::put_vara(nc, vid, start, count, d) } -> std::same_as<int>;
}
void WriteVar(const MString& vname, size_t ind, const T* data)
{
@ -421,24 +417,24 @@ class NCFileW: public NCFileWBase
if constexpr(Is1DType(dtype))
{
const size_t c = data.N();
float buf[c];
const size_t c = data.N();
auto buf = std::make_unique<float[]>(c);
for(size_t ix = 0; ix < c; ix++) buf[ix] = data.IsFill(ix) ? fill : op(ix);
if(tdep)
WriteVar(name, tind, buf);
WriteVar(name, tind, buf.get());
else
WriteVar(name, buf);
WriteVar(name, buf.get());
}
else
{
const size_t c[2] = {data.Ny(), data.Nx()};
float buf[c[0] * c[1]];
auto buf = std::make_unique<float[]>(c[0] * c[1]);
for(size_t iy = 0; iy < c[0]; iy++)
for(size_t ix = 0; ix < c[1]; ix++) buf[iy * c[1] + ix] = data.IsFill(ix, iy) ? fill : op(ix, iy);
if(tdep)
WriteVar(name, tind, buf);
WriteVar(name, tind, buf.get());
else
WriteVar(name, buf);
WriteVar(name, buf.get());
}
if(!*this) return "Can't write variable " + name + ": " + ErrMessage();
@ -449,11 +445,9 @@ class NCFileW: public NCFileWBase
template<class D> MString WriteVariable(const D& data, const MString& name, size_t tind)
{
if constexpr(Is1DType(DetType<D>()))
return WriteVariable(
data, name, [&data = std::as_const(data)](size_t i) { return data(i); }, tind);
return WriteVariable(data, name, [&data = std::as_const(data)](size_t i) { return data(i); }, tind);
else
return WriteVariable(
data, name, [&data = std::as_const(data)](size_t i, size_t j) { return data(i, j); }, tind);
return WriteVariable(data, name, [&data = std::as_const(data)](size_t i, size_t j) { return data(i, j); }, tind);
}
template<class D, class Op>
@ -489,64 +483,64 @@ class NCFileW: public NCFileWBase
}
else if constexpr(dtype == GPSET)
{
const size_t c = data.N();
float bufx[c];
float bufy[c];
const size_t c = data.N();
auto bufx = std::make_unique<float[]>(c);
auto bufy = std::make_unique<float[]>(c);
for(size_t ix = 0; ix < c; ix++)
{
bufx[ix] = data.Lon(ix);
bufy[ix] = data.Lat(ix);
}
WriteVar("longitude", bufx);
WriteVar("latitude", bufy);
WriteVar("longitude", bufx.get());
WriteVar("latitude", bufy.get());
}
else if constexpr(dtype == RGRID)
{
const size_t cx = data.Nx(), cy = data.Ny();
float bufx[cx];
float bufy[cy];
auto bufx = std::make_unique<float[]>(cx);
auto bufy = std::make_unique<float[]>(cy);
for(size_t ix = 0; ix < cx; ix++) bufx[ix] = data.Ix2X(ix);
for(size_t iy = 0; iy < cy; iy++) bufy[iy] = data.Iy2Y(iy);
WriteVar("x", bufx);
WriteVar("y", bufy);
WriteVar("x", bufx.get());
WriteVar("y", bufy.get());
}
else if constexpr(dtype == GRGRID)
{
const size_t cx = data.Nx(), cy = data.Ny();
float bufx[cx];
float bufy[cy];
auto bufx = std::make_unique<float[]>(cx);
auto bufy = std::make_unique<float[]>(cy);
for(size_t ix = 0; ix < cx; ix++) bufx[ix] = data.Ix2Lon(ix);
for(size_t iy = 0; iy < cy; iy++) bufy[iy] = data.Iy2Lat(iy);
WriteVar("longitude", bufx);
WriteVar("latitude", bufy);
WriteVar("longitude", bufx.get());
WriteVar("latitude", bufy.get());
}
else if constexpr(dtype == GRID)
{
const size_t c[2] = {data.Ny(), data.Nx()};
float bufx[c[0] * c[1]];
float bufy[c[0] * c[1]];
auto bufx = std::make_unique<float[]>(c[0] * c[1]);
auto bufy = std::make_unique<float[]>(c[0] * c[1]);
for(size_t iy = 0; iy < c[0]; iy++)
for(size_t ix = 0; ix < c[1]; ix++)
{
bufx[iy * c[1] + ix] = data.X(ix, iy);
bufy[iy * c[1] + ix] = data.Y(ix, iy);
}
WriteVar("x", bufx);
WriteVar("y", bufy);
WriteVar("x", bufx.get());
WriteVar("y", bufy.get());
}
else if constexpr(dtype == GGRID)
{
const size_t c[2] = {data.Ny(), data.Nx()};
float bufx[c[0] * c[1]];
float bufy[c[0] * c[1]];
auto bufx = std::make_unique<float[]>(c[0] * c[1]);
auto bufy = std::make_unique<float[]>(c[0] * c[1]);
for(size_t iy = 0; iy < c[0]; iy++)
for(size_t ix = 0; ix < c[1]; ix++)
{
bufx[iy * c[1] + ix] = data.Lon(ix, iy);
bufy[iy * c[1] + ix] = data.Lat(ix, iy);
}
WriteVar("longitude", bufx);
WriteVar("latitude", bufy);
WriteVar("longitude", bufx.get());
WriteVar("latitude", bufy.get());
}
else
return "Unknown data type";

3
include/zarr.h

@ -94,6 +94,7 @@ class ZarrFunctions: public ZarrTypes
std::unique_ptr<GenericCache> cache;
CURLRAII chandle;
MString url;
MString proxyurl;
std::vector<std::vector<size_t>> chunks;
@ -107,6 +108,8 @@ class ZarrFunctions: public ZarrTypes
{
auto oldprefix = michlib::GPL.UsePrefix("ZARR");
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", "")));
proxyurl = michlib::GPL.ParameterSValue("Proxy", "");
if(proxyurl.Exist()) curl_easy_setopt(chandle, CURLOPT_PROXY, proxyurl.Buf());
michlib::GPL.UsePrefix(oldprefix);
if(!cache)
{

27
sources/COPERNICUS.cpp

@ -129,6 +129,9 @@ Error COPERNICUSData::Mirror(const CLArgs& args) const
dsets = dlist.Value();
}
michlib::RegExpSimple filter((args.contains("filter") ? args.at("filter") : ".*").Buf());
if(filter.Compile() != 0) return Error(pref, MString("Can't compile regular expression ") + filter.RegStr());
CURLRAII chandle;
for(const auto& dset: dsets)
{
@ -154,20 +157,34 @@ Error COPERNICUSData::Mirror(const CLArgs& args) const
while(rpos != rfiles.size() || lpos != lfiles.size())
{
if(rpos == rfiles.size())
while(lpos != lfiles.size()) rem.push_back(lpos++);
while(lpos != lfiles.size())
{
if(filter.Match(lfiles[lpos].name.Buf())) rem.push_back(lpos);
lpos++;
}
if(lpos == lfiles.size())
while(rpos != rfiles.size()) down.push_back(rpos++);
while(rpos != rfiles.size())
{
if(filter.Match(rfiles[rpos].name.Buf())) down.push_back(rpos);
rpos++;
}
if(rpos == rfiles.size() || lpos == lfiles.size()) continue;
if(rfiles[rpos].name < lfiles[lpos].name)
down.push_back(rpos++);
{
if(filter.Match(rfiles[rpos].name.Buf())) down.push_back(rpos);
rpos++;
}
else if(lfiles[lpos].name < rfiles[rpos].name)
rem.push_back(lpos++);
{
if(filter.Match(lfiles[lpos].name.Buf())) rem.push_back(lpos);
lpos++;
}
else
{
auto delta = rfiles[rpos].mtime.Epoch() - lfiles[lpos].mtime.Epoch();
if(delta < 0) delta = -delta;
if(delta > 0 || rfiles[rpos].size != lfiles[lpos].size) upd.emplace_back(rpos, lpos);
if((delta > 0 || rfiles[rpos].size != lfiles[lpos].size) && filter.Match(lfiles[lpos].name.Buf())) upd.emplace_back(rpos, lpos);
lpos++;
rpos++;
}

4
sources/NEMO.h

@ -7,6 +7,7 @@ class NEMOData: public LayeredDataZ
{
TYPE_UNKNOWN,
TYPE_DT,
TYPE_DT1,
TYPE_NRT,
TYPE_NRT6,
TYPE_BALTICDT,
@ -34,6 +35,7 @@ class NEMOData: public LayeredDataZ
switch(type)
{
case(TYPE_DT): return "NEMO Delayed time, daily mean (DT)";
case(TYPE_DT1): return "NEMO Delayed time, daily mean, part 2 (DT1)";
case(TYPE_NRT): return "NEMO Near-real time, daily mean (NRT)";
case(TYPE_NRT6): return "NEMO Near-real time, 6h resolution (NRT6)";
case(TYPE_BALTICDT): return "NEMO Delayed time, Baltic region, daily mean (BALTICDT)";
@ -59,6 +61,8 @@ class NEMOData: public LayeredDataZ
GPL.UsePrefix("NEMO");
if(dataset == "DT")
type = TYPE_DT;
else if(dataset == "DT1")
type = TYPE_DT1;
else if(dataset == "NRT")
type = TYPE_NRT;
else if(dataset == "NRT6")

3
src/CMakeLists.txt

@ -4,6 +4,7 @@ set(DATALISTINC ${CMAKE_CURRENT_BINARY_DIR}/../include/datalist.h) # Include dat
find_package(PkgConfig REQUIRED)
find_library(netcdf netcdf REQUIRED)
find_library(udunits udunits2 REQUIRED)
find_package(OpenMP REQUIRED)
find_package(CURL REQUIRED)
find_package(LibXml2 REQUIRED)
@ -20,7 +21,7 @@ file(GLOB srcs CONFIGURE_DEPENDS *.cpp)
add_executable(${EXENAME} ${srcs} ${ACTIONLISTINC} ${SOURCELISTINC})
target_include_directories(${EXENAME} PRIVATE ../michlib/michlib ${CMAKE_CURRENT_BINARY_DIR}/../include)
target_link_libraries(${EXENAME} ${linker_options} ${netcdf} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} ${BLOSC_LINK_LIBRARIES} ${LIBPQ_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos)
target_link_libraries(${EXENAME} ${linker_options} ${netcdf} ${udunits} OpenMP::OpenMP_CXX CURL::libcurl ${JSONCPP_LINK_LIBRARIES} ${BLOSC_LINK_LIBRARIES} ${LIBPQ_LINK_LIBRARIES} LibXml2::LibXml2 SQLite::SQLite3 teos)
set_target_properties(${EXENAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS ${EXENAME})

254
src/cache.cpp

@ -0,0 +1,254 @@
#define MICHLIB_NOSOURCE
#include "cache.h"
SQLiteConnection::DBType SQLiteConnection::db = nullptr;
size_t SQLiteConnection::count = 0;
std::vector<SQLiteConnection::FuncType> SQLiteConnection::destructs = {};
PostgreSQLConnection::DBType PostgreSQLConnection::conn = nullptr;
size_t PostgreSQLConnection::count = 0;
std::vector<PostgreSQLConnection::FuncType> PostgreSQLConnection::destructs = {};
bool SQLiteCache::regdest = false;
bool PostgreSQLCache::regdest = false;
bool FileInfoCache::regdest = false;
void FileInfoCache::GetDirId()
{
if(dirid != 0) return;
const char* params[] = {dir.Buf()};
int plens[] = {int_cast<int>(dir.Len())};
int pfor[] = {0};
PGresultRAII res = PQexecParams(conn, "SELECT id FROM dirs WHERE name=$1::text;", 1, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_TUPLES_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
return;
}
else if(PQntuples(res) == 0)
{
res = PQexecParams(conn,
"INSERT INTO dirs(name,id) VALUES ($1, (SELECT min(num.numid) FROM (SELECT generate_series(1, (SELECT COALESCE((SELECT max(id) FROM dirs), 1)) + "
"1, 1) AS numid) num LEFT JOIN dirs ON dirs.id=num.numid WHERE id IS NULL)) RETURNING id;",
1, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
}
if(PQntuples(res) == 0) return;
}
if(PQgetlength(res, 0, 0) == sizeof(dirid)) dirid = *pointer_cast<const decltype(dirid)*>(PQgetvalue(res, 0, 0));
michlib::message("Dirid: ", Invert(dirid));
}
FileInfoCache::FileInfoCache(FileInfoCache::CallbackType&& readfunc_, const MString& dir_): readfunc(std::move(readfunc_)), dir(dir_), dirid(0)
{
if(!conn) return;
if(!regdest)
{
// Create table
PGresultRAII res = PQexec(conn, "SET client_min_messages=WARNING;");
res = PQexec(conn, "BEGIN;"
"CREATE TABLE IF NOT EXISTS dirs(name TEXT PRIMARY KEY, id INTEGER UNIQUE NOT NULL CONSTRAINT id_is_positive CHECK(id>0));"
"CREATE TABLE IF NOT EXISTS files(name TEXT NOT NULL, size BIGINT NOT NULL CONSTRAINT size_is_positive CHECK(size>0), modtime TIMESTAMP(0) NOT NULL, "
"dirid INTEGER REFERENCES dirs(id) ON DELETE CASCADE, lastaccess TIMESTAMP(0) NOT NULL, data BYTEA NOT NULL, PRIMARY KEY(name,dirid));"
"COMMIT;");
if(PQresultStatus(res) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
}
res = PQexec(conn, "SET client_min_messages=NOTICE;");
conn.AddDestructor(
[](PostgreSQLConnection::DBType conn)
{
PGresultRAII res = PQexec(conn, "BEGIN;"
"DELETE FROM files WHERE lastaccess+'100 days'::interval<localtimestamp;"
"DELETE FROM dirs WHERE id NOT IN (SELECT dirid FROM files);"
"COMMIT;");
});
regdest = true;
}
GetDirId();
//UpdateCache();
}
Error FileInfoCache::UpdateCache(bool force) const
{
const static MString pref = "FileInfoCache::UpdateCache";
DIRRAII dhandle;
dhandle.reset(opendir(dir.Buf()));
if(!dhandle) return {pref, "Can't open directory " + dir};
int dfd = dirfd(dhandle);
errno = 0;
struct dirent* dent = readdir(dhandle);
if(errno != 0) return {pref, "Can't read directory " + dir};
struct stat st;
do {
if(dent->d_name[0] != '.')
{
int ret = fstatat(dfd, dent->d_name, &st, 0);
if(ret != 0) return {pref, "Can't stat " + dir + "/" + dent->d_name};
if(S_ISREG(st.st_mode)) // Regular file
{
const char* params[] = {dent->d_name, pointer_cast<const char*>(&dirid)};
int plens[] = {int_cast<int>(strlen(dent->d_name)), sizeof(dirid)};
int pfor[] = {0, 1};
bool querysucc = true;
time_t modtime;
size_t size;
PGresultRAII res = PQexecParams(conn, "SELECT size,modtime FROM files WHERE name=$1::text AND dirid=$2::integer;", 2, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
querysucc = false;
}
else if(PQntuples(res) == 0 || PQntuples(res) > 1)
querysucc = false;
if(querysucc)
{
size = *pointer_cast<const decltype(size)*>(PQgetvalue(res, 0, 0));
modtime = raw2epoch(*pointer_cast<const time_t*>(PQgetvalue(res, 0, 1)));
}
else
{
size = int_cast<size_t>(st.st_size);
modtime = st.st_mtim.tv_sec;
}
if(!querysucc || force || size != int_cast<size_t>(st.st_size) || modtime != st.st_mtim.tv_sec)
{
auto ret = GetData(dent->d_name);
// Remove entry
if(!ret && querysucc)
{
PGresultRAII dres = PQexecParams(conn, "DELETE FROM files WHERE name=$1::text AND dirid=$2::integer;", 2, nullptr, params, plens, pfor, 1);
if(PQresultStatus(dres) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(dres)));
michlib::errmessage(PQerrorMessage(conn));
}
}
else // Update or insert
{
auto sizei = Invert(size);
auto modtimei = epoch2raw(modtime);
const char* params[] = {dent->d_name, pointer_cast<const char*>(&sizei), pointer_cast<const char*>(&modtimei), pointer_cast<const char*>(&dirid), ret.value().Buf()};
int plens[] = {int_cast<int>(strlen(dent->d_name)), sizeof(sizei), sizeof(modtimei), sizeof(dirid), int_cast<int>(ret.value().Len())};
int pfor[] = {0, 1, 1, 1, 1};
PGresultRAII res = PQexecParams(conn,
"INSERT INTO files (name,size,modtime,dirid,lastaccess,data) VALUES($1::text, $2::bigint, $3::timestamp, $4::integer, localtimestamp, $5) "
"ON CONFLICT ON CONSTRAINT files_pkey DO UPDATE SET "
"size=EXCLUDED.size, modtime=EXCLUDED.modtime, lastaccess=EXCLUDED.lastaccess, data=EXCLUDED.data;",
5, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
}
} // Insert or update branch
} // Need data update
} // Regular file
} // if(dent->d_name[0] != '.')
dent = readdir(dhandle);
} while(dent != nullptr || errno != 0);
return Error();
}
FileInfoCache::DataType FileInfoCache::GetInfo(const MString& name) const
{
if(!*this) return GetData(name);
bool querysucc = true;
MString data;
time_t modtime;
size_t size;
{
const char* params[] = {name.Buf(), pointer_cast<const char*>(&dirid)};
int plens[] = {int_cast<int>(name.Len()), sizeof(dirid)};
int pfor[] = {0, 1};
PGresultRAII res =
PQexecParams(conn, "UPDATE files SET lastaccess=localtimestamp WHERE name=$1::text AND dirid=$2::integer RETURNING data,size,modtime;", 2, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
querysucc = false;
}
if(PQntuples(res) == 0 || PQntuples(res) > 1)
{
michlib::errmessage("Data for file ", dir + "/" + name, (PQntuples(res) == 0 ? " not found " : " duplicated "), "in cache");
querysucc = false;
}
if(querysucc)
{
data = MString(PQgetvalue(res, 0, 0), PQgetlength(res, 0, 0));
size = *pointer_cast<const decltype(size)*>(PQgetvalue(res, 0, 1));
modtime = raw2epoch(*pointer_cast<const time_t*>(PQgetvalue(res, 0, 2)));
}
}
{
struct stat st;
int ret = stat((dir + "/" + name).Buf(), &st);
if(ret != 0) return DataType();
if(querysucc && st.st_mtim.tv_sec == modtime && size == int_cast<size_t>(st.st_size)) return data;
modtime = st.st_mtim.tv_sec;
size = st.st_size;
}
auto ret = GetData(name);
if(ret)
{
auto sizei = Invert(size);
auto modtimei = epoch2raw(modtime);
const char* params[] = {name.Buf(), pointer_cast<const char*>(&sizei), pointer_cast<const char*>(&modtimei), pointer_cast<const char*>(&dirid), ret.value().Buf()};
int plens[] = {int_cast<int>(name.Len()), sizeof(sizei), sizeof(modtimei), sizeof(dirid), int_cast<int>(ret.value().Len())};
int pfor[] = {0, 1, 1, 1, 1};
PGresultRAII res = PQexecParams(conn,
"INSERT INTO files (name,size,modtime,dirid,lastaccess,data) VALUES($1::text, $2::bigint, $3::timestamp, $4::integer, localtimestamp, $5) "
"ON CONFLICT ON CONSTRAINT files_pkey DO UPDATE SET "
"size=EXCLUDED.size, modtime=EXCLUDED.modtime, lastaccess=EXCLUDED.lastaccess, data=EXCLUDED.data;",
5, nullptr, params, plens, pfor, 1);
if(PQresultStatus(res) != PGRES_COMMAND_OK)
{
michlib::errmessage(PQresStatus(PQresultStatus(res)));
michlib::errmessage(PQerrorMessage(conn));
}
}
return ret;
}

32
src/copcat.cpp

@ -7,16 +7,20 @@ const MString CopernicusCatalog::caturl = "https://stac.marine.copernicus.eu/met
CopernicusCatalog::CopernicusCatalog()
{
// Cache
auto oldprefix = michlib::GPL.UsePrefix("COPERNICUS");
// Cache
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", "")));
michlib::GPL.UsePrefix(oldprefix);
if(!cache)
{
michlib::errmessage("Can't init cache");
cache.reset(new FakeCache);
}
// Proxy
auto proxyurl = michlib::GPL.ParameterSValue("Proxy", "");
if(proxyurl.Exist()) curl_easy_setopt(chandle, CURLOPT_PROXY, proxyurl.Buf());
michlib::GPL.UsePrefix(oldprefix);
GetCatalog();
}
@ -44,9 +48,14 @@ RetVal<std::vector<MString>> CopernicusCatalog::ProductList() const
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& rel = links[i]["rel"];
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(rel.type() == Json::stringValue && titl.type() == Json::stringValue && rel.asString() == "child") out.emplace_back(titl.asString().c_str());
if(rel.type() == Json::stringValue && href.type() == Json::stringValue && rel.asString() == "child")
{
auto str = href.asString();
str.erase(str.find('/'));
out.emplace_back(str.c_str());
}
}
return out;
}
@ -62,9 +71,8 @@ RetVal<MString> CopernicusCatalog::ProductURL(const MString& prod) const
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(titl.type() == Json::stringValue && href.type() == Json::stringValue && titl.asString().c_str() == prod) return DirName(caturl) + "/" + MString(href.asString().c_str());
if(href.type() == Json::stringValue && href.asString() == (prod + "/product.stac.json").Buf()) return DirName(caturl) + "/" + MString(href.asString().c_str());
}
return {pref, "unknown product: " + prod};
}
@ -90,9 +98,14 @@ RetVal<std::vector<MString>> CopernicusCatalog::DatasetList(const MString& prod)
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& rel = links[i]["rel"];
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(rel.type() == Json::stringValue && titl.type() == Json::stringValue && rel.asString() == "item") out.emplace_back(titl.asString().c_str());
if(rel.type() == Json::stringValue && href.type() == Json::stringValue && rel.asString() == "item")
{
auto str = href.asString();
str.erase(str.find('/'));
out.emplace_back(str.c_str());
}
}
return out;
}
@ -116,9 +129,8 @@ RetVal<MString> CopernicusCatalog::DatasetURL(const MString& prod, const MString
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(titl.type() == Json::stringValue && href.type() == Json::stringValue && titl.asString().c_str() == dataset) return DirName(url) + "/" + MString(href.asString().c_str());
if(href.type() == Json::stringValue && href.asString() == (dataset + "/dataset.stac.json").Buf()) return DirName(url) + "/" + MString(href.asString().c_str());
}
return {pref, "unknown dataset: " + dataset};
}

6
src/mirrorfuncs.cpp

@ -27,7 +27,7 @@ bool MakePath(const MString& dname)
return true;
}
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const MString& path)
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const bool nofollow, const MString& path)
{
const static MString pref = "ReadLocalFileList";
@ -48,11 +48,11 @@ RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const
do {
if(dent->d_name[0] != '.')
{
int ret = fstatat(dfd, dent->d_name, &st, AT_SYMLINK_NOFOLLOW);
int ret = fstatat(dfd, dent->d_name, &st, nofollow ? AT_SYMLINK_NOFOLLOW : 0);
if(ret != 0) return {pref, "Can't stat " + path + "/" + dir + "/" + dent->d_name};
if(S_ISDIR(st.st_mode)) // Directory, recurse
{
auto list = ReadLocalFileList(dir + "/" + dent->d_name, path + (path.Exist() ? "/" : "") + dent->d_name);
auto list = ReadLocalFileList(dir + "/" + dent->d_name, nofollow, path + (path.Exist() ? "/" : "") + dent->d_name);
if(!list) return list;
out.insert(out.end(), list.Value().begin(), list.Value().end());
}

1
src/zarr.cpp

@ -171,6 +171,7 @@ Error ZarrFunctions::GetChunk(const MString& var, const std::vector<size_t>& chu
{
michlib::message(str + " not found in cache, downloading");
CURLRAII myhandle; // TODO: remove this workaround of unknown bug
if(proxyurl.Exist()) curl_easy_setopt(myhandle, CURLOPT_PROXY, proxyurl.Buf());
//auto [out, res] = GetUrl(chandle, str);
auto [out, res] = GetUrl(myhandle, str);
if(res != CURLE_OK) return Error(pref, MString("can't download chunk: ") + chandle.Err());

Loading…
Cancel
Save