Add unixDate attribute; switch to BDate for pubdates

This commit is contained in:
Jaidyn Ann 2020-07-06 04:39:30 -05:00
parent 27b00a5202
commit dd51380e6f
10 changed files with 229 additions and 84 deletions

View File

@ -12,7 +12,7 @@ Channel::Channel ( BString path, BString outputPath )
homePage = BString(""); homePage = BString("");
xmlUrl = BString(""); xmlUrl = BString("");
filePath = path; filePath = path;
lastDate = BString(""); // lastDate = NULL;
topLevelSubject = ""; topLevelSubject = "";
lastSubject = ""; lastSubject = "";
outputDir = outputPath; outputDir = outputPath;
@ -59,11 +59,10 @@ bool Channel::SetHomePage ( tinyxml2::XMLElement* elem ) {
bool Channel::SetLastDate ( const char* dateCStr ) { bool Channel::SetLastDate ( const char* dateCStr ) {
if ( dateCStr == NULL ) if ( dateCStr == NULL )
return false; return false;
BDateTime date = feedDateToBDate( dateCStr );
BString dateStr = stringDateToBString( dateCStr ); if ( date == NULL )
if ( dateStr == NULL )
return false; return false;
lastDate = dateStr; lastDate = date;
return true; return true;
} }
bool Channel::SetLastDate ( tinyxml2::XMLElement* elem ) { bool Channel::SetLastDate ( tinyxml2::XMLElement* elem ) {

View File

@ -13,7 +13,7 @@ public:
char lang[3]; char lang[3];
BString title; BString title;
BString description; BString description;
BString lastDate; BDateTime lastDate;
BString homePage; BString homePage;
BString xmlUrl; BString xmlUrl;
BList items; BList items;

View File

@ -4,6 +4,7 @@
#include <StorageKit.h> #include <StorageKit.h>
#include "Config.h" #include "Config.h"
#include "Item.h" #include "Item.h"
#include "Util.h"
Item::Item ( BString outputPath ) Item::Item ( BString outputPath )
{ {
@ -12,7 +13,7 @@ Item::Item ( BString outputPath )
homePage = BString(""); homePage = BString("");
postUrl = BString(""); postUrl = BString("");
content = BString(""); content = BString("");
pubDate = BString(""); // pubDate = NULL;
outputDir = outputPath; outputDir = outputPath;
} }
@ -25,65 +26,78 @@ Item::Filetize ( Config* cfg, bool onlyIfNew = false )
dir->CreateFile( title.String(), file ); dir->CreateFile( title.String(), file );
BString betype = cfg->mimetype; BString betype = cfg->mimetype;
if ( pubDate != NULL ) {
int32 unixDate = (int32)pubDate.Time_t();
file->WriteAttr( "unixDate", B_INT32_TYPE, 0,
&unixDate, sizeof(int32) );
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
dateTo3339String(pubDate).String(),
dateTo3339String(pubDate).CountChars() );
}
file->WriteAttr( "META:title", B_STRING_TYPE, 0, file->WriteAttr( "META:title", B_STRING_TYPE, 0,
title.String(), title.CountChars() ); title.String(), title.CountChars() );
file->WriteAttr( "description", B_STRING_TYPE, 0, file->WriteAttr( "description", B_STRING_TYPE, 0,
description.String(), description.CountChars() ); description.String(), description.CountChars() );
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
pubDate.String(), pubDate.CountChars() );
file->WriteAttr( "META:url", B_STRING_TYPE, 0, file->WriteAttr( "META:url", B_STRING_TYPE, 0,
postUrl.String(), postUrl.CountChars() ); postUrl.String(), postUrl.CountChars() );
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0, file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
betype.String(), betype.CountChars() ); betype.String(), betype.CountChars() );
file->Write(content.String(), content.Length()); file->Write(content.String(), content.Length());
// using file->Write with content converted to C string messes up length ofc
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
// const char* strPath = outputDir.String();
// std::string path(strPath);
// path += std::string(title.String());
// std::cout << path << std::endl;
//
// std::ofstream pFile(path);
// pFile << content;
// pFile.close();
return false; return false;
} }
void Item::SetTitle ( const char* titleStr ) { bool Item::SetTitle ( const char* titleStr ) {
if ( titleStr != NULL ) title = BString( titleStr ); if ( titleStr != NULL ) title = BString( titleStr );
else return false;
return true;
} }
void Item::SetTitle ( tinyxml2::XMLElement* elem ) { bool Item::SetTitle ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetTitle( elem->GetText() ); if ( elem != NULL ) return SetTitle( elem->GetText() );
return false;
} }
void Item::SetDesc ( const char* descStr ) { bool Item::SetDesc ( const char* descStr ) {
if ( descStr != NULL ) description = BString( descStr ); if ( descStr != NULL ) description = BString( descStr );
else return false;
return true;
} }
void Item::SetDesc ( tinyxml2::XMLElement* elem ) { bool Item::SetDesc ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetDesc( elem->GetText() ); if ( elem != NULL ) return SetDesc( elem->GetText() );
return false;
} }
void Item::SetContent ( const char* contentStr ) { bool Item::SetContent ( const char* contentStr ) {
if ( contentStr != NULL ) content = BString( contentStr ); if ( contentStr != NULL ) content = BString( contentStr );
else return false;
return true;
} }
void Item::SetContent ( tinyxml2::XMLElement* elem ) { bool Item::SetContent ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetContent( elem->GetText() ); if ( elem != NULL ) return SetContent( elem->GetText() );
return false;
} }
void Item::SetPostUrl ( const char* urlStr ) { bool Item::SetPostUrl ( const char* urlStr ) {
if ( urlStr != NULL ) if ( urlStr != NULL ) postUrl = BString( urlStr );
postUrl = BString( urlStr ); else return false;
return true;
} }
void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) { bool Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetPostUrl( elem->GetText() ); if ( elem != NULL ) return SetPostUrl( elem->GetText() );
return false;
} }
void Item::SetPubDate ( const char* dateStr ) { bool Item::SetPubDate ( const char* dateStr ) {
if ( dateStr != NULL ) if ( dateStr == NULL )
pubDate = BString( dateStr ); return false;
BDateTime date = feedDateToBDate( dateStr );
if ( date == NULL )
return false;
pubDate = date;
return true;
} }
void Item::SetPubDate ( tinyxml2::XMLElement* elem ) { bool Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetPubDate( elem->GetText() ); if ( elem != NULL ) return SetPubDate( elem->GetText() );
return false;
} }

View File

@ -11,7 +11,7 @@ class Item {
public: public:
BString title; BString title;
BString description; BString description;
BString pubDate; BDateTime pubDate;
BString homePage; BString homePage;
BString postUrl; BString postUrl;
BString content; BString content;
@ -21,16 +21,16 @@ public:
bool Filetize ( Config*, bool ); bool Filetize ( Config*, bool );
void SetTitle ( const char* ); bool SetTitle ( const char* );
void SetTitle ( tinyxml2::XMLElement* ); bool SetTitle ( tinyxml2::XMLElement* );
void SetDesc ( const char* ); bool SetDesc ( const char* );
void SetDesc ( tinyxml2::XMLElement* ); bool SetDesc ( tinyxml2::XMLElement* );
void SetContent ( const char* ); bool SetContent ( const char* );
void SetContent ( tinyxml2::XMLElement* ); bool SetContent ( tinyxml2::XMLElement* );
void SetPostUrl ( const char* ); bool SetPostUrl ( const char* );
void SetPostUrl ( tinyxml2::XMLElement* ); bool SetPostUrl ( tinyxml2::XMLElement* );
void SetPubDate ( const char* ); bool SetPubDate ( const char* );
void SetPubDate ( tinyxml2::XMLElement* ); bool SetPubDate ( tinyxml2::XMLElement* );
}; };

View File

@ -5,7 +5,9 @@
#include "Item.h" #include "Item.h"
#include "parsing.h" #include "parsing.h"
#include "Config.h" #include "Config.h"
#include "Rifen.h" #include "Pogger.h"
#include <StorageKit.h>
Config* main_cfg; Config* main_cfg;
@ -13,11 +15,11 @@ int
main ( int argc, char** argv ) main ( int argc, char** argv )
{ {
main_cfg = new Config; main_cfg = new Config;
usageMsg.ReplaceAll("%app%", "Rifen"); usageMsg.ReplaceAll("%app%", "Pogger");
invocation( argc, argv, &main_cfg ); invocation( argc, argv, &main_cfg );
main_cfg->targetFeeds.DoForEach(&processFeed); main_cfg->targetFeeds.DoForEach(&processFeed);
return 0; return 0;
} }
@ -68,7 +70,8 @@ invocation ( int argc, char** argv, Config** cfgPtr )
break; break;
case '?': case '?':
if ( optopt == 'O' || optopt == 'm' ) if ( optopt == 'O' || optopt == 'm' )
fprintf( stderr, "Option `-%c` requires an argument.\n\n", optopt ); fprintf( stderr, "Option `-%c` requires an argument.\n\n",
optopt );
else else
fprintf( stderr, "Unknown option `-%c`.\n\n", optopt ); fprintf( stderr, "Unknown option `-%c`.\n\n", optopt );
return 2; return 2;

View File

@ -2,40 +2,62 @@
#include <sstream> #include <sstream>
#include <locale> #include <locale>
#include <iomanip> #include <iomanip>
#include <DateTime.h>
#include <HttpRequest.h> #include <HttpRequest.h>
#include "ProtocolListener.h" #include "ProtocolListener.h"
#include "Util.h" #include "Util.h"
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
int BDateTime
stringDateToEpoch ( const char* dateCStr ) feedDateToBDate ( const char* dateCStr )
{ {
std::istringstream ss( dateCStr ); BDateTime date = dateRfc822ToBDate( dateCStr );
std::tm t = {}; if ( date == NULL ) date = dateRfc3339ToBDate( dateCStr );
return date;
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) )
return std::mktime( &t );
return -1;
} }
BString BDateTime
stringDateToBString ( const char* dateCStr ) dateRfc3339ToBDate ( const char* dateCStr )
{ {
std::istringstream ss( dateCStr ); return stringDateToBDate( dateCStr, "%Y-%m-%dT%H:%M:%S" );
std::ostringstream dateStream; }
std::tm t = {};
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) ) { BDateTime
dateStream << std::put_time( &t, "%c" ); dateRfc822ToBDate ( const char* dateCStr )
std::string dateString = dateStream.str(); {
return BString( dateStream.str().c_str() ); return stringDateToBDate( dateCStr, "%a, %d %b %Y %H:%M:%S" );
}
BDateTime
stringDateToBDate ( const char* dateCStr, const char* templateCStr )
{
std::istringstream dateStream( dateCStr );
std::tm time = {};
if ( dateStream >> std::get_time( &time, templateCStr ) ) {
BTime newTime = BTime( time.tm_hour, time.tm_min, time.tm_sec, 0 );
BDate newDate = BDate( time.tm_year + 1900, time.tm_mon + 1, time.tm_mday );
return BDateTime( newDate, newTime );
} }
return NULL; return NULL;
} }
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
BString
dateTo3339String ( BDateTime dt )
{
char buffer[18];
sprintf( buffer, "%i-%02i-%02iT%02i:%02i:%02i",
dt.Date().Year(), dt.Date().Month(), dt.Date().Day(),
dt.Time().Hour(), dt.Time().Minute(), dt.Time().Second() );
return BString( buffer );
}
// ----------------------------------------------------------------------------
int32 int32
webFetch ( char* strUrl, BDataIO* reply ) webFetch ( char* strUrl, BDataIO* reply )
{ {

View File

@ -1,11 +1,18 @@
#ifndef UTIL_H #ifndef UTIL_H
#define UTIL_H #define UTIL_H
#include <DateTime.h>
#include <Url.h> #include <Url.h>
#include "ProtocolListener.h" #include "ProtocolListener.h"
int stringDateToEpoch ( const char* dateStr ); BDateTime feedDateToBDate ( const char* );
BString stringDateToBString ( const char* dateStr ); BDateTime dateRfc3339ToBDate ( const char* );
BDateTime dateRfc822ToBDate ( const char* );
BDateTime stringDateToBDate ( const char*, const char* );
BString dateTo3339String ( BDateTime );
int32 webFetch ( BUrl, BDataIO* ); int32 webFetch ( BUrl, BDataIO* );
int32 webFetch ( char*, BDataIO* ); int32 webFetch ( char*, BDataIO* );

View File

@ -19,9 +19,11 @@ feedParser ( Channel** chanPtr, Config* cfg )
if ( xml.FirstChildElement("rss") ) if ( xml.FirstChildElement("rss") )
rssParser( chanPtr, cfg, &xml ); rssParser( chanPtr, cfg, &xml );
else if ( xml.FirstChildElement("feed") ) else if ( xml.FirstChildElement("feed") )
printf("has atom\n"); atomParser( chanPtr, cfg, &xml );
} }
// ----------------------------------------------------------------------------
void void
rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml ) rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{ {
@ -86,15 +88,109 @@ rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
} }
} }
// ----------------------------------------------------------------------------
void
atomParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xfeed = xml->FirstChildElement("feed");
atomRootParse( chanPtr, cfg, xfeed );
atomParseEntries( chanPtr, cfg, xfeed );
}
void
atomRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author");
tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry");
tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link");
tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link");
bool set = false;
chan->SetTitle( xfeed->FirstChildElement("title") );
chan->SetDesc( xfeed->FirstChildElement("description") );
set = chan->SetLastDate( xfeed->FirstChildElement("updated") );
if ( !set ) set = chan->SetLastDate( xfeed->FirstChildElement("published") );
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("updated") );
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("published") );
set = chan->SetHomePage( xlink->Attribute( "href" ) );
if ( !set && xauthor ) set = chan->SetHomePage( xauthor->FirstChildElement("uri") );
if ( !set && xauthlink ) set = chan->SetHomePage( xauthlink->Attribute( "href" ) );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String());
}
void
atomEntryParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xentry )
{
Channel* chan = *(chanPtr);
Item* newItem = (Item*)malloc( sizeof(Item) );
newItem = new Item( chan->outputDir );
tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content");
tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group");
tinyxml2::XMLPrinter xprinter;
newItem->SetTitle( xentry->FirstChildElement("title") );
newItem->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") );
bool set = false;
set = newItem->SetDesc( xentry->FirstChildElement("summary") );
if ( !set ) set = newItem->SetDesc( xentry->FirstChildElement("description"));
if ( !set && xmedia ) set = newItem->SetDesc( xmedia->FirstChildElement("media:description"));
set = newItem->SetPubDate( xentry->FirstChildElement("updated") );
if ( !set ) set = newItem->SetPubDate( xentry->FirstChildElement("published") );
if ( xcontent ) {
xcontent->Accept( &xprinter );
newItem->SetContent( xprinter.CStr() );
}
if ( cfg->verbose )
printf("\t%s\n", newItem->title.String());
chan->items.AddItem( newItem );
}
void
atomParseEntries ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xentry;
xentry = xfeed->FirstChildElement("entry");
int entryCount = xmlCountSiblings( xentry, "entry" );
chan->items = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i items-\n", entryCount);
while ( xentry ) {
atomEntryParse( chanPtr, cfg, xentry );
xentry = xentry->NextSiblingElement("entry");
}
}
// ----------------------------------------------------------------------------
int int
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name ) xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
{ {
int count = 0; int count = 0;
while ( xsibling ) { while ( xsibling ) {
count++; count++;
xsibling = xsibling->NextSiblingElement(sibling_name); xsibling = xsibling->NextSiblingElement(sibling_name);
} }
return count; return count;
} }

View File

@ -10,6 +10,10 @@ void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* );
void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* ); void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* );
void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* ); void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* );
void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* ); void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* );
void atomParser ( Channel**, Config*, tinyxml2::XMLDocument* );
void atomRootParse ( Channel** chanPtr, Config*, tinyxml2::XMLElement* );
void atomEntryParse ( Channel**, Config*, tinyxml2::XMLElement* );
void atomParseEntries ( Channel**, Config*, tinyxml2::XMLElement* );
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* ); int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
#endif #endif