//===========================================================================
// @(#) $DwmPath: dwm/libDwmRDAP/tags/libDwmRDAP-0.1.9/apps/getpfx2as/getpfx2as.cc 9604 $
// @(#) $Id: getpfx2as.cc 9604 2017-06-22 19:36:47Z dwm $
//===========================================================================
//  Copyright (c) Daniel W. McRobb 2017
//  All rights reserved.
//
//  Redistribution and use in source and binary forms, with or without
//  modification, are permitted provided that the following conditions
//  are met:
//
//  1. Redistributions of source code must retain the above copyright
//     notice, this list of conditions and the following disclaimer.
//  2. Redistributions in binary form must reproduce the above copyright
//     notice, this list of conditions and the following disclaimer in the
//     documentation and/or other materials provided with the distribution.
//  3. The names of the authors and copyright holders may not be used to
//     endorse or promote products derived from this software without
//     specific prior written permission.
//
//  IN NO EVENT SHALL DANIEL W. MCROBB BE LIABLE TO ANY PARTY FOR
//  DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
//  INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE,
//  EVEN IF DANIEL W. MCROBB HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
//  DAMAGE.
//
//  THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND
//  DANIEL W. MCROBB HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
//  UPDATES, ENHANCEMENTS, OR MODIFICATIONS. DANIEL W. MCROBB MAKES NO
//  REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER
//  IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
//  WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE,
//  OR THAT THE USE OF THIS SOFTWARE WILL NOT INFRINGE ANY PATENT,
//  TRADEMARK OR OTHER RIGHTS.
//===========================================================================

//---------------------------------------------------------------------------
//!  \file getpfx2as.cc
//!  \brief NOT YET DOCUMENTED
//---------------------------------------------------------------------------

extern "C" {
  #include <time.h>
  #include <unistd.h>
  #include <zlib.h>
}

#include <fstream>
#include <iostream>
#include <iomanip>
#include <regex>
#include <tuple>
#include <vector>

#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>

#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>

#include "DwmIpv4Routes.hh"
#include "DwmSvnTag.hh"

static const Dwm::SvnTag svntag("@(#) $DwmPath: dwm/libDwmRDAP/tags/libDwmRDAP-0.1.9/apps/getpfx2as/getpfx2as.cc 9604 $");

using namespace std;
using boost::iostreams::filtering_streambuf;
using boost::iostreams::gzip_decompressor;
using boost::iostreams::gzip_compressor;
using Poco::URI;
using Poco::Net::HTTPClientSession;
using Poco::Net::HTTPMessage;
using Poco::Net::HTTPRequest;
using Poco::Net::HTTPResponse;

typedef std::tuple<int,time_t,string>  DataFileTuple;

//----------------------------------------------------------------------------
//!  baseURL should be
//!  http://data.caida.org/datasets/routing/routeviews-prefix2as
//----------------------------------------------------------------------------
vector<DataFileTuple> GetAvailableFiles(const string & baseURL)
{
  vector<DataFileTuple>  dataFiles;
  
  Poco::URI          uri(baseURL + "/pfx2as-creation.log");
  HTTPClientSession  sess(uri.getHost());
  HTTPRequest        req(HTTPRequest::HTTP_GET, uri.getPathAndQuery(),
                         HTTPMessage::HTTP_1_1);
  try {
    std::regex   rgx("([0-9]+)[ \t]+([0-9]+)[ \t]+([^ \t]+)",
                     regex::ECMAScript|regex::optimize);
    std::smatch  sm;
    ostream & os = sess.sendRequest(req);
    if (os) {
      HTTPResponse  resp;
      istream & is = sess.receiveResponse(resp);
      string  s;
      while (getline(is, s)) {
        if (regex_search(s, sm, rgx)) {
          if (sm.size() == 4) {
            DataFileTuple  dft(stoi(sm[1].str()), stoull(sm[2].str()),
                               sm[3].str());
            dataFiles.push_back(dft);
          }
        }
      }
    }
  }
  catch (Poco::Exception &ex) {
    cerr << "Exception, URI "
         << uri.toString() << ", " << ex.className() << ", "
         << ex.name() << ", " << ex.displayText() << "\n";
  }

  return dataFiles;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
string GetLatestFilename(vector<DataFileTuple> & dataFiles)
{
  string  rc;
  if (! dataFiles.empty()) {
    sort(dataFiles.begin(), dataFiles.end(),
         [] (const DataFileTuple & dft1, const DataFileTuple & dft2)
         { return (std::get<0>(dft1) > std::get<0>(dft2)); });
    rc = std::get<2>(dataFiles.front());
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool GetDataFile(const string & baseURL, const string & fileRelPath,
                 string & outFileName)
{
  bool    rc = false;
  size_t  idx = fileRelPath.find_last_of('/');
  if (idx < (fileRelPath.size() - 1)) {
    outFileName = fileRelPath.substr(idx + 1);
    ofstream  ofs(outFileName.c_str());
    if (ofs) {
      Poco::URI          uri(baseURL + '/' + fileRelPath);
      HTTPClientSession  sess(uri.getHost());
      HTTPRequest        req(HTTPRequest::HTTP_GET, uri.getPathAndQuery(),
                             HTTPMessage::HTTP_1_1);
      try {
        ostream & os = sess.sendRequest(req);
        if (os) {
          HTTPResponse  resp;
          istream & is = sess.receiveResponse(resp);
          ofs << is.rdbuf();
          rc = true;
        }
      }
      catch (Poco::Exception &ex) {
        cerr << "Exception, URI "
             << uri.toString() << ", " << ex.className() << ", "
             << ex.name() << ", " << ex.displayText() << "\n";
      }
      ofs.close();
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CoalesceDataFile(const string & inFileName, string & outFileName)
{
  bool    rc = false;
  outFileName = inFileName;
  size_t  idx = outFileName.find_last_of(".gz");
  if (idx != string::npos) {
    outFileName = outFileName.substr(0, idx - 2);
    outFileName += "_coalesced.gz";
  }
  else {
    outFileName	+= "_coalesced";
  }
  
  ifstream  is(inFileName.c_str());
  if (is) {
    ofstream  os(outFileName.c_str(), std::ios_base::binary);
    if (os) {
      filtering_streambuf<boost::iostreams::input>  gzin;
      gzin.push(gzip_decompressor());
      gzin.push(is);
      istream  gzis(&gzin);
      filtering_streambuf<boost::iostreams::output>  gzout;
      gzout.push(gzip_compressor(boost::iostreams::zlib::best_compression));
      gzout.push(os);
      ostream  gzos(&gzout);

      Dwm::Ipv4Routes<string>  routes;
      string  addrstr, maskstr, asnumstr;
      while (gzis >> addrstr >> maskstr >> asnumstr) {
        Dwm::Ipv4Address  addr(addrstr);
        uint8_t  maskLen = stoul(maskstr) & 0xFF;
        Dwm::Ipv4Prefix  pfx(addr, maskLen);
        routes[pfx] = asnumstr;
      }
      routes.Coalesce();
          
      vector<pair<Dwm::Ipv4Prefix,string>>  rtvec;
      routes.SortByKey(rtvec);
      for (auto & rve : rtvec) {
        gzos << rve.first.Network() << '\t'
             << (uint16_t)rve.first.MaskLength() << '\t'
             << rve.second << '\n';
      }
      rc = true;
      boost::iostreams::close(gzout);
      os.close();
    }
    is.close();
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CreatePrefix2ASFile(const string & coalescedInFile,
                         const string & outFile)
{
  bool      rc = false;
  ifstream  is(coalescedInFile);
  if (is) {
    ofstream  os(outFile);
    if (os) {
      //  input file must be a gzip file.  Set up gzip input filter.
      filtering_streambuf<boost::iostreams::input>  gzin;
      gzin.push(gzip_decompressor());
      gzin.push(is);
      istream  gzis(&gzin);

      //  Read all the data into an Ipv4Routes<string> object.
      Dwm::Ipv4Routes<uint32_t>  routes;
      string  addrstr, maskstr, asnumstr;
      while (gzis >> addrstr >> maskstr >> asnumstr) {
        Dwm::Ipv4Address  addr(addrstr);
        uint8_t  maskLen = stoul(maskstr) & 0xFF;
        Dwm::Ipv4Prefix  pfx(addr, maskLen);
        routes[pfx] = stoul(asnumstr);
      }
      //  re-coalesce since I reduce AS sets to a single AS and it may
      //  affect coalescing.
      // routes.Coalesce();
      //  Save the data in binary form and close the output file.
      routes.Write(os);
      os.close();
      rc = true;
    }
    is.close();
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CreatePrefix2ASFiles(vector<string> & files)
{
  bool  rc = true;
  regex  rgx("_coalesced\\.gz", regex::ECMAScript|regex::optimize);
  
  for (auto & file : files) {
    string  outFileName = regex_replace(file, rgx, ".bin");
    if (! CreatePrefix2ASFile(file, outFileName)) {
      rc = false;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool GetDataFiles(const string & baseURL, vector<string> & files)
{
  bool  rc = true;
  auto  it = files.begin();
  for ( ; it != files.end(); ++it) {
    string  outFile;
    if (GetDataFile(baseURL, *it, outFile)) {
      *it = outFile;
    }
    else {
      rc = false;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
bool CoalesceDataFiles(vector<string> & fileNames)
{
  bool  rc = true;
  auto  it = fileNames.begin();
  for ( ; it != fileNames.end(); ++it) {
    string  coalescedFileName;
    if (CoalesceDataFile(*it, coalescedFileName)) {
      *it = coalescedFileName;
    }
    else {
      rc = false;
    }
  }
  return rc;
}

//----------------------------------------------------------------------------
//!  
//----------------------------------------------------------------------------
void Usage(const string & argv0)
{
  cerr << "usage: " << argv0 << " [[-a | -f filename] [-b] | -l]\n";
  return;
}

//----------------------------------------------------------------------------
//!  Fetches and coalesces a CAIDA prefix to AS file, for example:
//!  http://data.caida.org/datasets/routing/routeviews-prefix2as/2017/06/routeviews-rv2-20170614-1200.pfx2as.gz
//----------------------------------------------------------------------------
int main(int argc, char *argv[])
{
  int     rc = 1;
  bool    getall = false;
  bool    createBinFile = false;
  bool    listFiles = false;
  string  matchFile;
  int     optchar;
  
  while ((optchar = getopt(argc, argv, "abf:l")) != -1) {
    switch (optchar) {
      case 'a':
        getall = true;
        break;
      case 'b':
        createBinFile = true;
        break;
      case 'f':
        matchFile = optarg;
        break;
      case 'l':
        listFiles = true;
        break;
      default:
        Usage(argv[0]);
        return 1;
        break;
    }
  }
  
  string  baseURL("http://data.caida.org/datasets/routing/routeviews-prefix2as");
  vector<DataFileTuple>  fileTuples = GetAvailableFiles(baseURL);
  vector<string>         fileNames;
  if (! fileTuples.empty()) {
    if (listFiles) {
      for (auto & fileTuple : fileTuples) {
        time_t      fileTime = get<1>(fileTuple);
        struct tm  *fileTm = localtime(&fileTime);
        char        tmbuf[32];
        strftime(tmbuf, 32, "%Y/%m/%d_%H:%M:%S", fileTm);
        size_t  idx = get<2>(fileTuple).find_last_of('/');
        cout << setiosflags(ios::left)
             << setw(6) << get<0>(fileTuple)
             << "  " << tmbuf
             << "  " << ((idx != string::npos) ?
                        get<2>(fileTuple).substr(idx+1)
                        : get<2>(fileTuple)) << '\n';
      }
    }
    else {
      if (getall || (! matchFile.empty())) {
        for (auto & fileTuple : fileTuples) {
          if (matchFile.empty()
              || (get<2>(fileTuple).find(matchFile) != string::npos)) {
            fileNames.push_back(get<2>(fileTuple));
          }
        }
      }
      else {
        fileNames.push_back(GetLatestFilename(fileTuples));
      }
      if (GetDataFiles(baseURL, fileNames)) {
        if (CoalesceDataFiles(fileNames)) {
          if (createBinFile) {
            if (CreatePrefix2ASFiles(fileNames)) {
              rc = 0;
            }
          }
          else {
            rc = 0;
          }
        }
      }
      else {
        cerr << "Failed to fetch data files\n";
      }
    }
  }
  else {
    cerr << "Unable to find available files\n";
  }
  return rc;
}
