|
|
|
#define MICHLIB_NOSOURCE
|
|
|
|
#include "COPERNICUS.h"
|
|
|
|
#include "mirrorfuncs.h"
|
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
|
|
|
|
using michlib::GPL;
|
|
|
|
|
|
|
|
RetVal<std::vector<struct FileInfo>> COPERNICUSData::ReadRemoteFileList(const MString& url) const
|
|
|
|
{
|
|
|
|
const static MString pref = "COPERNICUSData::ReadRemoteFileList";
|
|
|
|
LIBXML_TEST_VERSION
|
|
|
|
|
|
|
|
std::vector<struct FileInfo> out;
|
|
|
|
MString bucket, prefix;
|
|
|
|
|
|
|
|
// Split url on prefix and bucket
|
|
|
|
{
|
|
|
|
size_t pos = url.Len();
|
|
|
|
size_t count = 0;
|
|
|
|
for(size_t i = 0; i < url.Len(); i++)
|
|
|
|
{
|
|
|
|
if(url[i] == '/') count++;
|
|
|
|
if(count == 4)
|
|
|
|
{
|
|
|
|
pos = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(pos == url.Len()) return {pref, "Can't parse url: " + url};
|
|
|
|
|
|
|
|
bucket = url.SubStr(1, pos);
|
|
|
|
prefix = url.SubStr(pos + 2, url.Len() - pos - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
MString cont;
|
|
|
|
bool next = true;
|
|
|
|
|
|
|
|
CURLRAII chandle;
|
|
|
|
while(next)
|
|
|
|
{
|
|
|
|
MString url = bucket + "?list-type=2&prefix=" + prefix;
|
|
|
|
if(cont.Exist()) url += "&continuation-token=" + cont;
|
|
|
|
cont = "";
|
|
|
|
|
|
|
|
auto [data, res] = GetUrl(chandle, url);
|
|
|
|
if(res != CURLE_OK) return {pref, MString("Can't download ") + url + ": " + chandle.Err()};
|
|
|
|
|
|
|
|
xmlDocPtr doc = xmlReadMemory(data.Buf(), data.Len(), "data.xml", nullptr, 0);
|
|
|
|
if(doc == nullptr) return {pref, MString("Can't download ") + url + ": XML parse error"};
|
|
|
|
auto cur = xmlDocGetRootElement(doc);
|
|
|
|
if(cur == nullptr)
|
|
|
|
{
|
|
|
|
xmlFreeDoc(doc);
|
|
|
|
return {pref, MString("Can't download ") + url + ": empty XML"};
|
|
|
|
}
|
|
|
|
if(xmlStrEqual(cur->name, (const xmlChar*)"ListBucketResult") == 0)
|
|
|
|
{
|
|
|
|
xmlFreeDoc(doc);
|
|
|
|
return {pref, MString("Can't download ") + url + ": unknown XML"};
|
|
|
|
}
|
|
|
|
|
|
|
|
for(const auto* n = cur->children; n; n = n->next)
|
|
|
|
{
|
|
|
|
if(xmlStrEqual(n->name, (const xmlChar*)"NextContinuationToken") == 1)
|
|
|
|
{
|
|
|
|
auto* content = xmlNodeGetContent(n);
|
|
|
|
cont = (char*)content;
|
|
|
|
xmlFree(content);
|
|
|
|
}
|
|
|
|
if(xmlStrEqual(n->name, (const xmlChar*)"Contents") == 1)
|
|
|
|
{
|
|
|
|
MString fname;
|
|
|
|
MDateTime mtime;
|
|
|
|
size_t size = 0;
|
|
|
|
for(const auto* c = n->children; c; c = c->next)
|
|
|
|
{
|
|
|
|
if(xmlStrEqual(c->name, (const xmlChar*)"Key") == 1)
|
|
|
|
{
|
|
|
|
auto* content = xmlNodeGetContent(c);
|
|
|
|
fname = (char*)content;
|
|
|
|
xmlFree(content);
|
|
|
|
}
|
|
|
|
if(xmlStrEqual(c->name, (const xmlChar*)"LastModified") == 1)
|
|
|
|
{
|
|
|
|
auto* content = xmlNodeGetContent(c);
|
|
|
|
mtime.FromString((char*)content);
|
|
|
|
xmlFree(content);
|
|
|
|
}
|
|
|
|
if(xmlStrEqual(c->name, (const xmlChar*)"Size") == 1)
|
|
|
|
{
|
|
|
|
auto* content = xmlNodeGetContent(c);
|
|
|
|
size = MString((char*)content).ToInteger<size_t>();
|
|
|
|
xmlFree(content);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out.emplace_back(bucket + "/" + fname, fname.SubStr(prefix.Len() + 2, fname.Len() - prefix.Len() - 1), mtime, size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
xmlFreeDoc(doc);
|
|
|
|
next = cont.Exist();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(out.begin(), out.end(), [](const struct FileInfo& a, const struct FileInfo& b) { return a.name < b.name; });
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
Error COPERNICUSData::Mirror(const CLArgs& args) const
|
|
|
|
{
|
|
|
|
const static MString pref = "COPERNICUSData::Mirror";
|
|
|
|
|
|
|
|
GPL.UsePrefix("COPERNICUS");
|
|
|
|
|
|
|
|
// Local directory
|
|
|
|
MString mirrorroot = GPL.ParameterSValue("MirrorTo", "");
|
|
|
|
if(!mirrorroot.Exist()) return {pref, "Local mirror directory not specified"};
|
|
|
|
|
|
|
|
if(!args.contains("product")) return {pref, "Copernicus product not specified"};
|
|
|
|
MString prod = args.at("product");
|
|
|
|
CopernicusCatalog cat;
|
|
|
|
|
|
|
|
std::vector<MString> dsets;
|
|
|
|
if(args.contains("dataset"))
|
|
|
|
dsets.push_back(args.at("dataset"));
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto dlist = cat.DatasetList(prod);
|
|
|
|
if(!dlist) return dlist.Add(pref, "Can't get list of datasets");
|
|
|
|
dsets = dlist.Value();
|
|
|
|
}
|
|
|
|
|
|
|
|
michlib::RegExpSimple filter((args.contains("filter") ? args.at("filter") : ".*").Buf());
|
|
|
|
if(filter.Compile() != 0) return Error(pref, MString("Can't compile regular expression ") + filter.RegStr());
|
|
|
|
|
|
|
|
CURLRAII chandle;
|
|
|
|
for(const auto& dset: dsets)
|
|
|
|
{
|
|
|
|
michlib::message("Mirroring " + dset);
|
|
|
|
auto url = cat.DatasetNativeURL(prod, dset);
|
|
|
|
if(!url) return {pref, "Can't find data for dataset " + dset + " from product " + prod};
|
|
|
|
|
|
|
|
MString locroot = mirrorroot + "/" + prod + "/" + dset;
|
|
|
|
|
|
|
|
auto lfilesret = ReadLocalFileList(locroot);
|
|
|
|
if(!lfilesret) return lfilesret.Add(pref, "Can't get local file list");
|
|
|
|
const auto& lfiles = lfilesret.Value();
|
|
|
|
|
|
|
|
auto rfilesret = ReadRemoteFileList(url.Value());
|
|
|
|
if(!rfilesret) return rfilesret.Add(pref, "Can't get remote file list");
|
|
|
|
const auto& rfiles = rfilesret.Value();
|
|
|
|
|
|
|
|
std::vector<size_t> down, rem;
|
|
|
|
std::vector<std::pair<size_t, size_t>> upd;
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t rpos = 0, lpos = 0;
|
|
|
|
while(rpos != rfiles.size() || lpos != lfiles.size())
|
|
|
|
{
|
|
|
|
if(rpos == rfiles.size())
|
|
|
|
while(lpos != lfiles.size())
|
|
|
|
{
|
|
|
|
if(filter.Match(lfiles[lpos].name.Buf())) rem.push_back(lpos);
|
|
|
|
lpos++;
|
|
|
|
}
|
|
|
|
if(lpos == lfiles.size())
|
|
|
|
while(rpos != rfiles.size())
|
|
|
|
{
|
|
|
|
if(filter.Match(rfiles[rpos].name.Buf())) down.push_back(rpos);
|
|
|
|
rpos++;
|
|
|
|
}
|
|
|
|
if(rpos == rfiles.size() || lpos == lfiles.size()) continue;
|
|
|
|
|
|
|
|
if(rfiles[rpos].name < lfiles[lpos].name)
|
|
|
|
{
|
|
|
|
if(filter.Match(rfiles[rpos].name.Buf())) down.push_back(rpos);
|
|
|
|
rpos++;
|
|
|
|
}
|
|
|
|
else if(lfiles[lpos].name < rfiles[rpos].name)
|
|
|
|
{
|
|
|
|
if(filter.Match(lfiles[lpos].name.Buf())) rem.push_back(lpos);
|
|
|
|
lpos++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto delta = rfiles[rpos].mtime.Epoch() - lfiles[lpos].mtime.Epoch();
|
|
|
|
if(delta < 0) delta = -delta;
|
|
|
|
if((delta > 0 || rfiles[rpos].size != lfiles[lpos].size) && filter.Match(lfiles[lpos].name.Buf())) upd.emplace_back(rpos, lpos);
|
|
|
|
lpos++;
|
|
|
|
rpos++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
michlib::message(MString("New files: ") + down.size());
|
|
|
|
michlib::message(MString("Obsolete files: ") + rem.size());
|
|
|
|
michlib::message(MString("Modified files: ") + upd.size());
|
|
|
|
|
|
|
|
for(size_t i = 0; i < down.size(); i++)
|
|
|
|
{
|
|
|
|
size_t ri = down[i];
|
|
|
|
auto err = DownloadFile(chandle, rfiles[ri], locroot);
|
|
|
|
if(!err) return err.Add(pref, "Can't download file");
|
|
|
|
}
|
|
|
|
|
|
|
|
for(size_t i = 0; i < rem.size(); i++)
|
|
|
|
{
|
|
|
|
size_t li = rem[i];
|
|
|
|
auto err = RemoveFile(lfiles[li]);
|
|
|
|
if(!err) return err.Add(pref, "Can't remove file");
|
|
|
|
}
|
|
|
|
|
|
|
|
for(size_t i = 0; i < upd.size(); i++)
|
|
|
|
{
|
|
|
|
size_t ri = upd[i].first;
|
|
|
|
size_t li = upd[i].second;
|
|
|
|
auto err = UpdateFile(chandle, rfiles[ri], lfiles[li], locroot);
|
|
|
|
if(!err) return err.Add(pref, "Can't update file");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Error();
|
|
|
|
}
|