Add unixDate attribute; switch to BDate for pubdates

This commit is contained in:
Jaidyn Ann 2020-07-06 04:39:30 -05:00
parent 27b00a5202
commit dd51380e6f
10 changed files with 229 additions and 84 deletions

View File

@ -12,7 +12,7 @@ Channel::Channel ( BString path, BString outputPath )
homePage = BString("");
xmlUrl = BString("");
filePath = path;
lastDate = BString("");
// lastDate = NULL;
topLevelSubject = "";
lastSubject = "";
outputDir = outputPath;
@ -59,11 +59,10 @@ bool Channel::SetHomePage ( tinyxml2::XMLElement* elem ) {
bool Channel::SetLastDate ( const char* dateCStr ) {
if ( dateCStr == NULL )
return false;
BString dateStr = stringDateToBString( dateCStr );
if ( dateStr == NULL )
BDateTime date = feedDateToBDate( dateCStr );
if ( date == NULL )
return false;
lastDate = dateStr;
lastDate = date;
return true;
}
bool Channel::SetLastDate ( tinyxml2::XMLElement* elem ) {

View File

@ -13,7 +13,7 @@ public:
char lang[3];
BString title;
BString description;
BString lastDate;
BDateTime lastDate;
BString homePage;
BString xmlUrl;
BList items;

View File

@ -4,6 +4,7 @@
#include <StorageKit.h>
#include "Config.h"
#include "Item.h"
#include "Util.h"
Item::Item ( BString outputPath )
{
@ -12,7 +13,7 @@ Item::Item ( BString outputPath )
homePage = BString("");
postUrl = BString("");
content = BString("");
pubDate = BString("");
// pubDate = NULL;
outputDir = outputPath;
}
@ -25,65 +26,78 @@ Item::Filetize ( Config* cfg, bool onlyIfNew = false )
dir->CreateFile( title.String(), file );
BString betype = cfg->mimetype;
if ( pubDate != NULL ) {
int32 unixDate = (int32)pubDate.Time_t();
file->WriteAttr( "unixDate", B_INT32_TYPE, 0,
&unixDate, sizeof(int32) );
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
dateTo3339String(pubDate).String(),
dateTo3339String(pubDate).CountChars() );
}
file->WriteAttr( "META:title", B_STRING_TYPE, 0,
title.String(), title.CountChars() );
file->WriteAttr( "description", B_STRING_TYPE, 0,
description.String(), description.CountChars() );
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
pubDate.String(), pubDate.CountChars() );
file->WriteAttr( "META:url", B_STRING_TYPE, 0,
postUrl.String(), postUrl.CountChars() );
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
betype.String(), betype.CountChars() );
file->Write(content.String(), content.Length());
// using file->Write with content converted to C string messes up length ofc
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
// const char* strPath = outputDir.String();
// std::string path(strPath);
// path += std::string(title.String());
// std::cout << path << std::endl;
//
// std::ofstream pFile(path);
// pFile << content;
// pFile.close();
return false;
}
void Item::SetTitle ( const char* titleStr ) {
bool Item::SetTitle ( const char* titleStr ) {
if ( titleStr != NULL ) title = BString( titleStr );
else return false;
return true;
}
void Item::SetTitle ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetTitle( elem->GetText() );
bool Item::SetTitle ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) return SetTitle( elem->GetText() );
return false;
}
void Item::SetDesc ( const char* descStr ) {
bool Item::SetDesc ( const char* descStr ) {
if ( descStr != NULL ) description = BString( descStr );
else return false;
return true;
}
void Item::SetDesc ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetDesc( elem->GetText() );
bool Item::SetDesc ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) return SetDesc( elem->GetText() );
return false;
}
void Item::SetContent ( const char* contentStr ) {
bool Item::SetContent ( const char* contentStr ) {
if ( contentStr != NULL ) content = BString( contentStr );
else return false;
return true;
}
void Item::SetContent ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetContent( elem->GetText() );
bool Item::SetContent ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) return SetContent( elem->GetText() );
return false;
}
void Item::SetPostUrl ( const char* urlStr ) {
if ( urlStr != NULL )
postUrl = BString( urlStr );
bool Item::SetPostUrl ( const char* urlStr ) {
if ( urlStr != NULL ) postUrl = BString( urlStr );
else return false;
return true;
}
void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetPostUrl( elem->GetText() );
bool Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) return SetPostUrl( elem->GetText() );
return false;
}
void Item::SetPubDate ( const char* dateStr ) {
if ( dateStr != NULL )
pubDate = BString( dateStr );
bool Item::SetPubDate ( const char* dateStr ) {
if ( dateStr == NULL )
return false;
BDateTime date = feedDateToBDate( dateStr );
if ( date == NULL )
return false;
pubDate = date;
return true;
}
void Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) SetPubDate( elem->GetText() );
bool Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
if ( elem != NULL ) return SetPubDate( elem->GetText() );
return false;
}

View File

@ -11,7 +11,7 @@ class Item {
public:
BString title;
BString description;
BString pubDate;
BDateTime pubDate;
BString homePage;
BString postUrl;
BString content;
@ -21,16 +21,16 @@ public:
bool Filetize ( Config*, bool );
void SetTitle ( const char* );
void SetTitle ( tinyxml2::XMLElement* );
void SetDesc ( const char* );
void SetDesc ( tinyxml2::XMLElement* );
void SetContent ( const char* );
void SetContent ( tinyxml2::XMLElement* );
void SetPostUrl ( const char* );
void SetPostUrl ( tinyxml2::XMLElement* );
void SetPubDate ( const char* );
void SetPubDate ( tinyxml2::XMLElement* );
bool SetTitle ( const char* );
bool SetTitle ( tinyxml2::XMLElement* );
bool SetDesc ( const char* );
bool SetDesc ( tinyxml2::XMLElement* );
bool SetContent ( const char* );
bool SetContent ( tinyxml2::XMLElement* );
bool SetPostUrl ( const char* );
bool SetPostUrl ( tinyxml2::XMLElement* );
bool SetPubDate ( const char* );
bool SetPubDate ( tinyxml2::XMLElement* );
};

View File

@ -5,7 +5,9 @@
#include "Item.h"
#include "parsing.h"
#include "Config.h"
#include "Rifen.h"
#include "Pogger.h"
#include <StorageKit.h>
Config* main_cfg;
@ -13,11 +15,11 @@ int
main ( int argc, char** argv )
{
main_cfg = new Config;
usageMsg.ReplaceAll("%app%", "Rifen");
usageMsg.ReplaceAll("%app%", "Pogger");
invocation( argc, argv, &main_cfg );
main_cfg->targetFeeds.DoForEach(&processFeed);
return 0;
}
@ -68,7 +70,8 @@ invocation ( int argc, char** argv, Config** cfgPtr )
break;
case '?':
if ( optopt == 'O' || optopt == 'm' )
fprintf( stderr, "Option `-%c` requires an argument.\n\n", optopt );
fprintf( stderr, "Option `-%c` requires an argument.\n\n",
optopt );
else
fprintf( stderr, "Unknown option `-%c`.\n\n", optopt );
return 2;

View File

@ -2,40 +2,62 @@
#include <sstream>
#include <locale>
#include <iomanip>
#include <DateTime.h>
#include <HttpRequest.h>
#include "ProtocolListener.h"
#include "Util.h"
// ----------------------------------------------------------------------------
int
stringDateToEpoch ( const char* dateCStr )
BDateTime
feedDateToBDate ( const char* dateCStr )
{
std::istringstream ss( dateCStr );
std::tm t = {};
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) )
return std::mktime( &t );
return -1;
BDateTime date = dateRfc822ToBDate( dateCStr );
if ( date == NULL ) date = dateRfc3339ToBDate( dateCStr );
return date;
}
BString
stringDateToBString ( const char* dateCStr )
BDateTime
dateRfc3339ToBDate ( const char* dateCStr )
{
std::istringstream ss( dateCStr );
std::ostringstream dateStream;
std::tm t = {};
return stringDateToBDate( dateCStr, "%Y-%m-%dT%H:%M:%S" );
}
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) ) {
dateStream << std::put_time( &t, "%c" );
std::string dateString = dateStream.str();
return BString( dateStream.str().c_str() );
BDateTime
dateRfc822ToBDate ( const char* dateCStr )
{
return stringDateToBDate( dateCStr, "%a, %d %b %Y %H:%M:%S" );
}
BDateTime
stringDateToBDate ( const char* dateCStr, const char* templateCStr )
{
std::istringstream dateStream( dateCStr );
std::tm time = {};
if ( dateStream >> std::get_time( &time, templateCStr ) ) {
BTime newTime = BTime( time.tm_hour, time.tm_min, time.tm_sec, 0 );
BDate newDate = BDate( time.tm_year + 1900, time.tm_mon + 1, time.tm_mday );
return BDateTime( newDate, newTime );
}
return NULL;
}
// ----------------------------------------------------------------------------
BString
dateTo3339String ( BDateTime dt )
{
char buffer[18];
sprintf( buffer, "%i-%02i-%02iT%02i:%02i:%02i",
dt.Date().Year(), dt.Date().Month(), dt.Date().Day(),
dt.Time().Hour(), dt.Time().Minute(), dt.Time().Second() );
return BString( buffer );
}
// ----------------------------------------------------------------------------
int32
webFetch ( char* strUrl, BDataIO* reply )
{

View File

@ -1,11 +1,18 @@
#ifndef UTIL_H
#define UTIL_H
#include <DateTime.h>
#include <Url.h>
#include "ProtocolListener.h"
int stringDateToEpoch ( const char* dateStr );
BString stringDateToBString ( const char* dateStr );
BDateTime feedDateToBDate ( const char* );
BDateTime dateRfc3339ToBDate ( const char* );
BDateTime dateRfc822ToBDate ( const char* );
BDateTime stringDateToBDate ( const char*, const char* );
BString dateTo3339String ( BDateTime );
int32 webFetch ( BUrl, BDataIO* );
int32 webFetch ( char*, BDataIO* );

View File

@ -19,9 +19,11 @@ feedParser ( Channel** chanPtr, Config* cfg )
if ( xml.FirstChildElement("rss") )
rssParser( chanPtr, cfg, &xml );
else if ( xml.FirstChildElement("feed") )
printf("has atom\n");
atomParser( chanPtr, cfg, &xml );
}
// ----------------------------------------------------------------------------
void
rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{
@ -86,15 +88,109 @@ rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
}
}
// ----------------------------------------------------------------------------
void
atomParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xfeed = xml->FirstChildElement("feed");
atomRootParse( chanPtr, cfg, xfeed );
atomParseEntries( chanPtr, cfg, xfeed );
}
void
atomRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author");
tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry");
tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link");
tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link");
bool set = false;
chan->SetTitle( xfeed->FirstChildElement("title") );
chan->SetDesc( xfeed->FirstChildElement("description") );
set = chan->SetLastDate( xfeed->FirstChildElement("updated") );
if ( !set ) set = chan->SetLastDate( xfeed->FirstChildElement("published") );
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("updated") );
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("published") );
set = chan->SetHomePage( xlink->Attribute( "href" ) );
if ( !set && xauthor ) set = chan->SetHomePage( xauthor->FirstChildElement("uri") );
if ( !set && xauthlink ) set = chan->SetHomePage( xauthlink->Attribute( "href" ) );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String());
}
void
atomEntryParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xentry )
{
Channel* chan = *(chanPtr);
Item* newItem = (Item*)malloc( sizeof(Item) );
newItem = new Item( chan->outputDir );
tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content");
tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group");
tinyxml2::XMLPrinter xprinter;
newItem->SetTitle( xentry->FirstChildElement("title") );
newItem->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") );
bool set = false;
set = newItem->SetDesc( xentry->FirstChildElement("summary") );
if ( !set ) set = newItem->SetDesc( xentry->FirstChildElement("description"));
if ( !set && xmedia ) set = newItem->SetDesc( xmedia->FirstChildElement("media:description"));
set = newItem->SetPubDate( xentry->FirstChildElement("updated") );
if ( !set ) set = newItem->SetPubDate( xentry->FirstChildElement("published") );
if ( xcontent ) {
xcontent->Accept( &xprinter );
newItem->SetContent( xprinter.CStr() );
}
if ( cfg->verbose )
printf("\t%s\n", newItem->title.String());
chan->items.AddItem( newItem );
}
void
atomParseEntries ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Channel* chan = *(chanPtr);
tinyxml2::XMLElement* xentry;
xentry = xfeed->FirstChildElement("entry");
int entryCount = xmlCountSiblings( xentry, "entry" );
chan->items = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i items-\n", entryCount);
while ( xentry ) {
atomEntryParse( chanPtr, cfg, xentry );
xentry = xentry->NextSiblingElement("entry");
}
}
// ----------------------------------------------------------------------------
int
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
{
int count = 0;
while ( xsibling ) {
count++;
xsibling = xsibling->NextSiblingElement(sibling_name);
}
return count;
}

View File

@ -10,6 +10,10 @@ void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* );
void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* );
void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* );
void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* );
void atomParser ( Channel**, Config*, tinyxml2::XMLDocument* );
void atomRootParse ( Channel** chanPtr, Config*, tinyxml2::XMLElement* );
void atomEntryParse ( Channel**, Config*, tinyxml2::XMLElement* );
void atomParseEntries ( Channel**, Config*, tinyxml2::XMLElement* );
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
#endif