Browse Source

Working with the Copernicus catalog is placed in a separate class.

The Error and RetVal classes are used.
lintest
Michael Uleysky 10 months ago
parent
commit
775a038263
  1. 7
      actions/actionmirror.h
  2. 55
      include/copcat.h
  3. 10
      include/mirrorfuncs.h
  4. 139
      sources/COPERNICUS.cpp
  5. 18
      sources/COPERNICUS.h
  6. 169
      src/copcat.cpp
  7. 56
      src/mirrorfuncs.cpp

7
actions/actionmirror.h

@ -3,21 +3,20 @@
#include "merrors.h"
using michlib::message;
using michlib::Error;
template<class T>
concept MirrorSupported = requires(T t, const CLArgs& args) {
{
t.Mirror(args)
} -> std::convertible_to<MString>;
} -> std::convertible_to<Error>;
};
ADD_ACTION(Mirror, mirror, MirrorSupported<Source>);
template<class D> MString ActionMirror::DoAction(const CLArgs& args, D& data)
{
//auto resop = data.Open(args);
//if(resop.Exist()) return "Can't open source: " + resop;
auto res = data.Mirror(args);
if(res.Exist()) return "Mirroring failed: " + res;
if(!res) return "Mirroring failed";
return "";
};

55
include/copcat.h

@ -0,0 +1,55 @@
#pragma once
#include "cache.h"
#include "curlfuncs.h"
#include "merrors.h"
#include <json/json.h>
using michlib::Error;
using michlib::RetVal;
class CopernicusCatalog
{
static const MString caturl;
std::unique_ptr<GenericCache> cache;
CURLRAII chandle;
Json::Value catalog;
char curlerr[CURL_ERROR_SIZE];
// Download JSON from url
RetVal<Json::Value> GetJSON(const MString& url) const;
// Download catalog
Error GetCatalog();
// Asset url from dataset
RetVal<MString> AssetURL(const MString& prod, const MString& dataset, const MString& asset) const;
public:
CopernicusCatalog();
// List of products
RetVal<std::vector<MString>> ProductList() const;
// List of datasets in product
RetVal<std::vector<MString>> DatasetList(const MString& prod) const;
// URL of product
RetVal<MString> ProductURL(const MString& prod) const;
// URL of dataset
RetVal<MString> DatasetURL(const MString& prod, const MString& dataset) const;
// URL of native data (files) in dataset
RetVal<MString> DatasetNativeURL(const MString& prod, const MString& dataset) const { return AssetURL(prod, dataset, "native"); }
// URL of timechuncked data (files) in dataset
RetVal<MString> DatasetTimeURL(const MString& prod, const MString& dataset) const { return AssetURL(prod, dataset, "timeChunked"); }
// URL of geochuncked data (files) in dataset
RetVal<MString> DatasetGeoURL(const MString& prod, const MString& dataset) const { return AssetURL(prod, dataset, "geoChunked"); }
bool Valid() const { return catalog.isObject(); }
explicit operator bool() const { return Valid(); }
};

10
include/mirrorfuncs.h

@ -8,6 +8,8 @@
#include <vector>
using michlib::MDateTime;
using michlib::RetVal;
using michlib::Error;
class DIRRAIIDT
{
@ -50,13 +52,13 @@ inline MString FileName(const MString& name)
bool MakePath(const MString& dname);
// Get local file list
std::pair<std::vector<struct FileInfo>, MString> ReadLocalFileList(const MString& dir, const MString& path = "");
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const MString& path = "");
// Download file to the local mirror
MString DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const MString& root);
Error DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const MString& root);
// Remove file from the local mirror
MString RemoveFile(const struct FileInfo& linfo);
Error RemoveFile(const struct FileInfo& linfo);
// Updare file in the local mirror
MString UpdateFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const struct FileInfo& linfo, const MString& root);
Error UpdateFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const struct FileInfo& linfo, const MString& root);

139
sources/COPERNICUS.cpp

@ -6,50 +6,9 @@
using michlib::GPL;
const MString COPERNICUSData::caturl = "https://stac.marine.copernicus.eu/metadata/catalog.stac.json";
std::pair<Json::Value, MString> COPERNICUSData::GetJSON(const MString& url)
{
Json::Reader reader;
Json::Value obj;
MString content;
auto [val, suc] = cache->Get(url);
if(suc)
content = std::move(val);
else
{
michlib::message(url + " not found in cache, downloading");
auto [out, res] = GetUrl(chandle, url);
if(res != CURLE_OK) return {obj, MString("Can't download JSON: ") + curlerr};
cache->Put(url, out, 3600);
content = std::move(out);
}
reader.parse(content.Buf(), content.Buf() + content.Len(), obj, false);
return {obj, ""};
}
MString COPERNICUSData::ReadURL(const Json::Value& cat, const MString& prod)
{
const auto& links = cat["links"];
if(links.type() != Json::arrayValue) return "";
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(titl.type() == Json::stringValue && href.type() == Json::stringValue)
{
MString str(titl.asString().c_str());
if(str == prod) return MString(href.asString().c_str());
}
}
return "";
}
std::pair<std::vector<struct FileInfo>, MString> COPERNICUSData::ReadRemoteFileList(const MString& url)
RetVal<std::vector<struct FileInfo>> COPERNICUSData::ReadRemoteFileList(const MString& url) const
{
const static MString pref = "COPERNICUSData::ReadRemoteFileList";
LIBXML_TEST_VERSION
std::vector<struct FileInfo> out;
@ -68,7 +27,7 @@ std::pair<std::vector<struct FileInfo>, MString> COPERNICUSData::ReadRemoteFileL
break;
}
}
if(pos == url.Len()) return {out, "Can't parse url: " + url};
if(pos == url.Len()) return {pref, "Can't parse url: " + url};
bucket = url.SubStr(1, pos);
prefix = url.SubStr(pos + 2, url.Len() - pos - 1);
@ -84,20 +43,20 @@ std::pair<std::vector<struct FileInfo>, MString> COPERNICUSData::ReadRemoteFileL
cont = "";
auto [data, res] = GetUrl(chandle, url);
if(res != CURLE_OK) return {out, MString("Can't download ") + url + ": " + curlerr};
if(res != CURLE_OK) return {pref, MString("Can't download ") + url + ": " + curlerr};
xmlDocPtr doc = xmlReadMemory(data.Buf(), data.Len(), "data.xml", nullptr, 0);
if(doc == nullptr) return {out, MString("Can't download ") + url + ": XML parse error"};
if(doc == nullptr) return {pref, MString("Can't download ") + url + ": XML parse error"};
auto cur = xmlDocGetRootElement(doc);
if(cur == nullptr)
{
xmlFreeDoc(doc);
return {out, MString("Can't download ") + url + ": empty XML"};
return {pref, MString("Can't download ") + url + ": empty XML"};
}
if(xmlStrEqual(cur->name, (const xmlChar*)"ListBucketResult") == 0)
{
xmlFreeDoc(doc);
return {out, MString("Can't download ") + url + ": unknown XML"};
return {pref, MString("Can't download ") + url + ": unknown XML"};
}
for(const auto* n = cur->children; n; n = n->next)
@ -142,87 +101,49 @@ std::pair<std::vector<struct FileInfo>, MString> COPERNICUSData::ReadRemoteFileL
}
std::sort(out.begin(), out.end(), [](const struct FileInfo& a, const struct FileInfo& b) { return a.name < b.name; });
return {out, ""};
return out;
}
MString COPERNICUSData::Mirror(const CLArgs& args)
Error COPERNICUSData::Mirror(const CLArgs& args) const
{
const static MString pref = "COPERNICUSData::Mirror";
GPL.UsePrefix("COPERNICUS");
// Local directory
MString mirrorroot = GPL.ParameterSValue("MirrorTo", "");
if(!mirrorroot.Exist()) return "Local mirror directory not specified";
// Cache
cache.reset(CreateCache(GPL.ParameterSValue("Cache", "")));
if(!cache)
{
michlib::errmessage("Can't init cache");
cache.reset(new FakeCache);
}
curl_easy_setopt(chandle, CURLOPT_ERRORBUFFER, curlerr);
if(!args.contains("product")) return "Copernicus product not specified";
MString prod = args.at("product");
Json::Value product;
MString produrl;
// Get catalog
{
auto [cat, err] = GetJSON(caturl);
if(err.Exist()) return "Can't download catalog: " + err;
if(cat["title"].type() != Json::stringValue || cat["title"].asString() != "Copernicus Marine Data Store") return "Can't parse catalog";
catalog = std::move(cat);
}
if(!mirrorroot.Exist()) return {pref, "Local mirror directory not specified"};
// Get product
{
auto url = ReadURL(catalog, prod);
if(!url.Exist()) return "Url for product " + prod + " not found in catalog";
produrl = DirName(caturl) + "/" + url;
auto [pr, err] = GetJSON(produrl);
if(err.Exist()) return "Can't download product information from " + produrl + ": " + err;
product = std::move(pr);
}
if(!args.contains("product")) return {pref, "Copernicus product not specified"};
MString prod = args.at("product");
CopernicusCatalog cat;
std::vector<MString> dsets;
if(args.contains("dataset"))
dsets.push_back(args.at("dataset"));
else
{
const auto& links = product["links"];
if(links.type() != Json::arrayValue) return "Can't find information about datasets";
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& rel = links[i]["rel"];
const auto& titl = links[i]["title"];
if(rel.type() == Json::stringValue && titl.type() == Json::stringValue && rel.asString() == "item") dsets.push_back(titl.asString().c_str());
}
auto dlist = cat.DatasetList(prod);
if(!dlist) return dlist.Add(pref, "Can't get list of datasets");
dsets = dlist.Value();
}
CURLRAII dhandle;
for(const auto& dset: dsets)
{
michlib::message("Mirroring " + dset);
auto url = ReadURL(product, dset);
if(!url.Exist()) return "Url for dataset " + dset + " not found in product description";
MString dseturl = DirName(produrl) + "/" + url;
auto [ds, err] = GetJSON(dseturl);
if(err.Exist()) return "Can't download dataset information from " + dseturl + ": " + err;
const auto& href = ds["assets"]["native"]["href"];
if(href.type() != Json::stringValue) return "Can't find data for dataset " + dset + " from product " + prod;
url = href.asString().c_str();
auto url = cat.DatasetNativeURL(prod, dset);
if(!url) return {pref, "Can't find data for dataset " + dset + " from product " + prod};
MString locroot = mirrorroot + "/" + prod + "/" + dset;
auto [lfiles, lerr] = ReadLocalFileList(locroot);
if(lerr.Exist()) return lerr;
auto lfilesret = ReadLocalFileList(locroot);
if(!lfilesret) return lfilesret.Add(pref, "Can't get local file list");
const auto& lfiles = lfilesret.Value();
auto [rfiles, rerr] = ReadRemoteFileList(url);
if(rerr.Exist()) return rerr;
auto rfilesret = ReadRemoteFileList(url.Value());
if(!rfilesret) return rfilesret.Add(pref, "Can't get remote file list");
const auto& rfiles = rfilesret.Value();
std::vector<size_t> down, rem;
std::vector<std::pair<size_t, size_t>> upd;
@ -260,14 +181,14 @@ MString COPERNICUSData::Mirror(const CLArgs& args)
{
size_t ri = down[i];
auto err = DownloadFile(dhandle, rfiles[ri], locroot);
if(err.Exist()) return err;
if(!err) return err.Add(pref, "Can't download file");
}
for(size_t i = 0; i < rem.size(); i++)
{
size_t li = rem[i];
auto err = RemoveFile(lfiles[li]);
if(err.Exist()) return err;
if(!err) return err.Add(pref, "Can't remove file");
}
for(size_t i = 0; i < upd.size(); i++)
@ -275,9 +196,9 @@ MString COPERNICUSData::Mirror(const CLArgs& args)
size_t ri = upd[i].first;
size_t li = upd[i].second;
auto err = UpdateFile(dhandle, rfiles[ri], lfiles[li], locroot);
if(err.Exist()) return err;
if(!err) return err.Add(pref, "Can't update file");
}
}
return "";
return Error();
}

18
sources/COPERNICUS.h

@ -1,30 +1,18 @@
#pragma once
#include "ParseArgs.h"
#include "cache.h"
#include "curlfuncs.h"
#include "copcat.h"
#include "mdatetime.h"
#include <json/json.h>
using michlib::MDateTime;
using michlib::MString;
class COPERNICUSData
{
static const MString caturl;
std::unique_ptr<GenericCache> cache;
CURLRAII chandle;
Json::Value catalog;
char curlerr[CURL_ERROR_SIZE];
// Get url for product or dataset from catalog
static MString ReadURL(const Json::Value& cat, const MString& prod);
// Download JSON from url
std::pair<Json::Value, MString> GetJSON(const MString& url);
// Get remote file list from url
std::pair<std::vector<struct FileInfo>,MString> ReadRemoteFileList(const MString& url);
RetVal<std::vector<struct FileInfo>> ReadRemoteFileList(const MString& url) const;
public:
static constexpr const char* name = "COPERNICUS";
@ -32,5 +20,5 @@ class COPERNICUSData
COPERNICUSData() = default;
// Main mirror function
MString Mirror(const CLArgs& args);
Error Mirror(const CLArgs& args) const;
};

169
src/copcat.cpp

@ -0,0 +1,169 @@
#define MICHLIB_NOSOURCE
#include "copcat.h"
#include "GPL.h"
#include "mirrorfuncs.h"
const MString CopernicusCatalog::caturl = "https://stac.marine.copernicus.eu/metadata/catalog.stac.json";
CopernicusCatalog::CopernicusCatalog()
{
// Cache
michlib::GPL.UsePrefix("COPERNICUS");
cache.reset(CreateCache(michlib::GPL.ParameterSValue("Cache", "")));
if(!cache)
{
michlib::errmessage("Can't init cache");
cache.reset(new FakeCache);
}
curl_easy_setopt(chandle, CURLOPT_ERRORBUFFER, curlerr);
GetCatalog();
}
Error CopernicusCatalog::GetCatalog()
{
if(Valid()) return Error();
auto ret = GetJSON(caturl);
if(ret)
catalog = ret.Value();
else
return ret.Add("CopernicusCatalog::GetCatalog", "can't download catalog");
return Error();
}
RetVal<std::vector<MString>> CopernicusCatalog::ProductList() const
{
static const MString pref = "CopernicusCatalog::ProductList";
if(!Valid()) return {pref, "no catalog"};
const auto& links = catalog["links"];
if(links.type() != Json::arrayValue) return {pref, "no \"links\" section in the catalog"};
std::vector<MString> out;
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& rel = links[i]["rel"];
const auto& titl = links[i]["title"];
if(rel.type() == Json::stringValue && titl.type() == Json::stringValue && rel.asString() == "child") out.emplace_back(titl.asString().c_str());
}
return out;
}
RetVal<MString> CopernicusCatalog::ProductURL(const MString& prod) const
{
static const MString pref = "CopernicusCatalog::ProductURL";
if(!Valid()) return {pref, "no catalog"};
const auto& links = catalog["links"];
if(links.type() != Json::arrayValue) return {pref, "no \"links\" section in the catalog"};
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(titl.type() == Json::stringValue && href.type() == Json::stringValue && titl.asString().c_str() == prod) return DirName(caturl) + "/" + MString(href.asString().c_str());
}
return {pref, "unknown product: " + prod};
}
RetVal<std::vector<MString>> CopernicusCatalog::DatasetList(const MString& prod) const
{
static const MString pref = "CopernicusCatalog::DatasetList";
MString url;
{
auto ret = ProductURL(prod);
if(!ret) return ret.Add(pref, "Can't get url for the product " + prod);
url = ret.Value();
}
auto ret = GetJSON(url);
if(!ret) return ret.Add(pref, "Can't download product " + prod);
const auto& links = ret.Value()["links"];
if(links.type() != Json::arrayValue) return {pref, "no \"links\" section in the product " + prod + " description"};
std::vector<MString> out;
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& rel = links[i]["rel"];
const auto& titl = links[i]["title"];
if(rel.type() == Json::stringValue && titl.type() == Json::stringValue && rel.asString() == "item") out.emplace_back(titl.asString().c_str());
}
return out;
}
RetVal<MString> CopernicusCatalog::DatasetURL(const MString& prod, const MString& dataset) const
{
static const MString pref = "CopernicusCatalog::DatasetURL";
MString url;
{
auto ret = ProductURL(prod);
if(!ret) return ret.Add(pref, "Can't get url for the product " + prod);
url = ret.Value();
}
auto ret = GetJSON(url);
if(!ret) return ret.Add(pref, "Can't download product " + prod);
const auto& links = ret.Value()["links"];
if(links.type() != Json::arrayValue) return {pref, "no \"links\" section in the product " + prod + " description"};
for(Json::ArrayIndex i = 0; i < links.size(); i++)
{
const auto& titl = links[i]["title"];
const auto& href = links[i]["href"];
if(titl.type() == Json::stringValue && href.type() == Json::stringValue && titl.asString().c_str() == dataset) return DirName(url) + "/" + MString(href.asString().c_str());
}
return {pref, "unknown dataset: " + dataset};
}
RetVal<MString> CopernicusCatalog::AssetURL(const MString& prod, const MString& dataset, const MString& asset) const
{
static const MString pref = "CopernicusCatalog::AssetURL";
MString url;
{
auto ret = DatasetURL(prod, dataset);
if(!ret) return ret.Add(pref, "Can't get url for the dataset " + dataset);
url = ret.Value();
}
auto ret = GetJSON(url);
if(!ret) return ret.Add(pref, "Can't download dataset " + dataset);
const auto& href = ret.Value()["assets"][asset.Buf()]["href"];
if(!href || href.type() != Json::stringValue) return {pref, "href for the asset " + asset + " not found"};
return MString(href.asString().c_str());
}
RetVal<Json::Value> CopernicusCatalog::GetJSON(const MString& url) const
{
const static MString pref = "CopernicusCatalog::GetJSON";
Json::Reader reader;
Json::Value obj;
MString content;
auto [val, suc] = cache->Get(url);
if(suc)
content = std::move(val);
else
{
michlib::message(url + " not found in cache, downloading");
auto [out, res] = GetUrl(chandle, url);
if(res != CURLE_OK) return Error(pref, MString("can't download JSON: ") + curlerr);
cache->Put(url, out, 3600);
content = std::move(out);
}
reader.parse(content.Buf(), content.Buf() + content.Len(), obj, false);
return obj;
}

56
src/mirrorfuncs.cpp

@ -27,32 +27,34 @@ bool MakePath(const MString& dname)
return true;
}
std::pair<std::vector<struct FileInfo>, MString> ReadLocalFileList(const MString& dir, const MString& path)
RetVal<std::vector<struct FileInfo>> ReadLocalFileList(const MString& dir, const MString& path)
{
const static MString pref = "ReadLocalFileList";
std::vector<struct FileInfo> out;
DIRRAII dhandle;
MakePath(dir);
dhandle.reset(opendir(dir.Buf()));
if(!dhandle) return {out, "Can't open directory " + path + (path.Exist() ? "/" : "") + dir};
if(!dhandle) return {pref, "Can't open directory " + path + (path.Exist() ? "/" : "") + dir};
int dfd = dirfd(dhandle);
errno = 0;
struct dirent* dent = readdir(dhandle);
if(errno != 0) return {out, "Can't read directory " + path + (path.Exist() ? "/" : "") + dir};
if(errno != 0) return {pref, "Can't read directory " + path + (path.Exist() ? "/" : "") + dir};
struct stat st;
do {
if(dent->d_name[0] != '.')
{
int ret = fstatat(dfd, dent->d_name, &st, AT_SYMLINK_NOFOLLOW);
if(ret != 0) return {out, "Can't stat " + path + "/" + dir + "/" + dent->d_name};
if(ret != 0) return {pref, "Can't stat " + path + "/" + dir + "/" + dent->d_name};
if(S_ISDIR(st.st_mode)) // Directory, recurse
{
auto [list, err] = ReadLocalFileList(dir + "/" + dent->d_name, path + (path.Exist() ? "/" : "") + dent->d_name);
if(err.Exist()) return {out, err};
out.insert(out.end(), list.begin(), list.end());
auto list = ReadLocalFileList(dir + "/" + dent->d_name, path + (path.Exist() ? "/" : "") + dent->d_name);
if(!list) return list;
out.insert(out.end(), list.Value().begin(), list.Value().end());
}
if(S_ISREG(st.st_mode)) // Regular file
{
@ -63,21 +65,23 @@ std::pair<std::vector<struct FileInfo>, MString> ReadLocalFileList(const MString
dent = readdir(dhandle);
} while(dent != nullptr || errno != 0);
if(errno != 0) return {out, "Can't read directory " + path + "/" + dir};
if(errno != 0) return {pref, "Can't read directory " + path + "/" + dir};
std::sort(out.begin(), out.end(), [](const struct FileInfo& a, const struct FileInfo& b) { return a.name < b.name; });
return {out, ""};
return out;
}
MString DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const MString& root)
Error DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const MString& root)
{
const static MString pref = "DownloadFile";
message("Downloading " + rinfo.url);
MString dname = DirName(rinfo.name), fname = FileName(rinfo.name);
FD fd;
if(!MakePath(root + "/" + dname)) return "Can't create directory " + root + "/" + dname;
if(!MakePath(root + "/" + dname)) return {pref, "Can't create directory " + root + "/" + dname};
fd.Reset(creat((root + "/" + rinfo.name).Buf(), 0644));
if(!fd) return "Can't create file " + root + "/" + rinfo.name;
if(!fd) return {pref, "Can't create file " + root + "/" + rinfo.name};
char errbuf[CURL_ERROR_SIZE];
int cfd = fd.Get();
@ -89,7 +93,7 @@ MString DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, cons
if(res != CURLE_OK)
{
unlink((root + "/" + rinfo.name).Buf());
return MString("Can't download file: ") + errbuf;
return {pref, MString("Can't download file: ") + errbuf};
}
{
@ -101,30 +105,32 @@ MString DownloadFile(const CURLRAII& chandle, const struct FileInfo& rinfo, cons
if(ret != 0)
{
unlink((root + "/" + rinfo.name).Buf());
return "Can't set mtime for file: " + root + "/" + rinfo.name;
return {pref, "Can't set mtime for file: " + root + "/" + rinfo.name};
}
}
return "";
return Error();
}
MString RemoveFile(const struct FileInfo& linfo)
Error RemoveFile(const struct FileInfo& linfo)
{
const static MString pref = "RemoveFile";
message("Remove " + linfo.url);
int ret = unlink(linfo.url.Buf());
if(ret != 0) return "Can't remove file " + linfo.url;
return "";
if(ret != 0) return {pref, "Can't remove file " + linfo.url};
return Error();
}
MString UpdateFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const struct FileInfo& linfo, const MString& root)
Error UpdateFile(const CURLRAII& chandle, const struct FileInfo& rinfo, const struct FileInfo& linfo, const MString& root)
{
MString err;
const static MString pref = "UpdateFile";
message("Update " + linfo.url);
err = RemoveFile(linfo);
if(err.Exist()) return err;
err = DownloadFile(chandle, rinfo, root);
if(err.Exist()) return err;
auto rm = RemoveFile(linfo);
if(!rm) return rm.Add(pref, "Can't remove file");
auto df = DownloadFile(chandle, rinfo, root);
if(!df) return df.Add(pref, "Can't download file");
return "";
return Error();
}

Loading…
Cancel
Save