diff --git a/src/Channel.cpp b/src/Channel.cpp index eab225f..d0f9c78 100644 --- a/src/Channel.cpp +++ b/src/Channel.cpp @@ -12,7 +12,7 @@ Channel::Channel ( BString path, BString outputPath ) homePage = BString(""); xmlUrl = BString(""); filePath = path; - lastDate = BString(""); +// lastDate = NULL; topLevelSubject = ""; lastSubject = ""; outputDir = outputPath; @@ -59,11 +59,10 @@ bool Channel::SetHomePage ( tinyxml2::XMLElement* elem ) { bool Channel::SetLastDate ( const char* dateCStr ) { if ( dateCStr == NULL ) return false; - - BString dateStr = stringDateToBString( dateCStr ); - if ( dateStr == NULL ) + BDateTime date = feedDateToBDate( dateCStr ); + if ( date == NULL ) return false; - lastDate = dateStr; + lastDate = date; return true; } bool Channel::SetLastDate ( tinyxml2::XMLElement* elem ) { diff --git a/src/Channel.h b/src/Channel.h index 5ed91ba..95d2a54 100644 --- a/src/Channel.h +++ b/src/Channel.h @@ -13,7 +13,7 @@ public: char lang[3]; BString title; BString description; - BString lastDate; + BDateTime lastDate; BString homePage; BString xmlUrl; BList items; diff --git a/src/Item.cpp b/src/Item.cpp index 66f861d..29bf805 100644 --- a/src/Item.cpp +++ b/src/Item.cpp @@ -4,6 +4,7 @@ #include #include "Config.h" #include "Item.h" +#include "Util.h" Item::Item ( BString outputPath ) { @@ -12,7 +13,7 @@ Item::Item ( BString outputPath ) homePage = BString(""); postUrl = BString(""); content = BString(""); - pubDate = BString(""); +// pubDate = NULL; outputDir = outputPath; } @@ -25,65 +26,78 @@ Item::Filetize ( Config* cfg, bool onlyIfNew = false ) dir->CreateFile( title.String(), file ); BString betype = cfg->mimetype; + if ( pubDate != NULL ) { + int32 unixDate = (int32)pubDate.Time_t(); + file->WriteAttr( "unixDate", B_INT32_TYPE, 0, + &unixDate, sizeof(int32) ); + file->WriteAttr( "pubDate", B_STRING_TYPE, 0, + dateTo3339String(pubDate).String(), + dateTo3339String(pubDate).CountChars() ); + } file->WriteAttr( "META:title", B_STRING_TYPE, 0, title.String(), title.CountChars() ); file->WriteAttr( "description", B_STRING_TYPE, 0, description.String(), description.CountChars() ); - file->WriteAttr( "pubDate", B_STRING_TYPE, 0, - pubDate.String(), pubDate.CountChars() ); file->WriteAttr( "META:url", B_STRING_TYPE, 0, postUrl.String(), postUrl.CountChars() ); file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0, betype.String(), betype.CountChars() ); file->Write(content.String(), content.Length()); - // using file->Write with content converted to C string messes up length ofc - // this is required to preserve length (because of UTF char substitutions in parsing.cpp) -// const char* strPath = outputDir.String(); -// std::string path(strPath); -// path += std::string(title.String()); -// std::cout << path << std::endl; -// -// std::ofstream pFile(path); -// pFile << content; -// pFile.close(); return false; } -void Item::SetTitle ( const char* titleStr ) { +bool Item::SetTitle ( const char* titleStr ) { if ( titleStr != NULL ) title = BString( titleStr ); + else return false; + return true; } -void Item::SetTitle ( tinyxml2::XMLElement* elem ) { - if ( elem != NULL ) SetTitle( elem->GetText() ); +bool Item::SetTitle ( tinyxml2::XMLElement* elem ) { + if ( elem != NULL ) return SetTitle( elem->GetText() ); + return false; } -void Item::SetDesc ( const char* descStr ) { +bool Item::SetDesc ( const char* descStr ) { if ( descStr != NULL ) description = BString( descStr ); + else return false; + return true; } -void Item::SetDesc ( tinyxml2::XMLElement* elem ) { - if ( elem != NULL ) SetDesc( elem->GetText() ); +bool Item::SetDesc ( tinyxml2::XMLElement* elem ) { + if ( elem != NULL ) return SetDesc( elem->GetText() ); + return false; } -void Item::SetContent ( const char* contentStr ) { +bool Item::SetContent ( const char* contentStr ) { if ( contentStr != NULL ) content = BString( contentStr ); + else return false; + return true; } -void Item::SetContent ( tinyxml2::XMLElement* elem ) { - if ( elem != NULL ) SetContent( elem->GetText() ); +bool Item::SetContent ( tinyxml2::XMLElement* elem ) { + if ( elem != NULL ) return SetContent( elem->GetText() ); + return false; } -void Item::SetPostUrl ( const char* urlStr ) { - if ( urlStr != NULL ) - postUrl = BString( urlStr ); +bool Item::SetPostUrl ( const char* urlStr ) { + if ( urlStr != NULL ) postUrl = BString( urlStr ); + else return false; + return true; } -void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) { - if ( elem != NULL ) SetPostUrl( elem->GetText() ); +bool Item::SetPostUrl ( tinyxml2::XMLElement* elem ) { + if ( elem != NULL ) return SetPostUrl( elem->GetText() ); + return false; } -void Item::SetPubDate ( const char* dateStr ) { - if ( dateStr != NULL ) - pubDate = BString( dateStr ); +bool Item::SetPubDate ( const char* dateStr ) { + if ( dateStr == NULL ) + return false; + BDateTime date = feedDateToBDate( dateStr ); + if ( date == NULL ) + return false; + pubDate = date; + return true; } -void Item::SetPubDate ( tinyxml2::XMLElement* elem ) { - if ( elem != NULL ) SetPubDate( elem->GetText() ); +bool Item::SetPubDate ( tinyxml2::XMLElement* elem ) { + if ( elem != NULL ) return SetPubDate( elem->GetText() ); + return false; } diff --git a/src/Item.h b/src/Item.h index e7d7904..06c279e 100644 --- a/src/Item.h +++ b/src/Item.h @@ -11,7 +11,7 @@ class Item { public: BString title; BString description; - BString pubDate; + BDateTime pubDate; BString homePage; BString postUrl; BString content; @@ -21,16 +21,16 @@ public: bool Filetize ( Config*, bool ); - void SetTitle ( const char* ); - void SetTitle ( tinyxml2::XMLElement* ); - void SetDesc ( const char* ); - void SetDesc ( tinyxml2::XMLElement* ); - void SetContent ( const char* ); - void SetContent ( tinyxml2::XMLElement* ); - void SetPostUrl ( const char* ); - void SetPostUrl ( tinyxml2::XMLElement* ); - void SetPubDate ( const char* ); - void SetPubDate ( tinyxml2::XMLElement* ); + bool SetTitle ( const char* ); + bool SetTitle ( tinyxml2::XMLElement* ); + bool SetDesc ( const char* ); + bool SetDesc ( tinyxml2::XMLElement* ); + bool SetContent ( const char* ); + bool SetContent ( tinyxml2::XMLElement* ); + bool SetPostUrl ( const char* ); + bool SetPostUrl ( tinyxml2::XMLElement* ); + bool SetPubDate ( const char* ); + bool SetPubDate ( tinyxml2::XMLElement* ); }; diff --git a/src/Pogger.cpp b/src/Pogger.cpp index 64ad1ea..3e84e72 100644 --- a/src/Pogger.cpp +++ b/src/Pogger.cpp @@ -5,7 +5,9 @@ #include "Item.h" #include "parsing.h" #include "Config.h" -#include "Rifen.h" +#include "Pogger.h" + +#include Config* main_cfg; @@ -13,11 +15,11 @@ int main ( int argc, char** argv ) { main_cfg = new Config; - usageMsg.ReplaceAll("%app%", "Rifen"); + usageMsg.ReplaceAll("%app%", "Pogger"); invocation( argc, argv, &main_cfg ); - main_cfg->targetFeeds.DoForEach(&processFeed); + return 0; } @@ -68,7 +70,8 @@ invocation ( int argc, char** argv, Config** cfgPtr ) break; case '?': if ( optopt == 'O' || optopt == 'm' ) - fprintf( stderr, "Option `-%c` requires an argument.\n\n", optopt ); + fprintf( stderr, "Option `-%c` requires an argument.\n\n", + optopt ); else fprintf( stderr, "Unknown option `-%c`.\n\n", optopt ); return 2; diff --git a/src/Pogger.h b/src/Pogger.h index 328b2ed..6c9f64f 100644 --- a/src/Pogger.h +++ b/src/Pogger.h @@ -3,7 +3,7 @@ int main ( int, char** ); int usage ( ); int invocation ( int, char**, Config** ); -void freeargInvocation ( int, char**, int, Config** ); +void freeargInvocation ( int, char**, int, Config** ); bool processItem ( void* ); bool processFeed ( void* ); diff --git a/src/Util.cpp b/src/Util.cpp index 30636d7..8a9172a 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -2,40 +2,62 @@ #include #include #include +#include #include #include "ProtocolListener.h" #include "Util.h" // ---------------------------------------------------------------------------- -int -stringDateToEpoch ( const char* dateCStr ) +BDateTime +feedDateToBDate ( const char* dateCStr ) { - std::istringstream ss( dateCStr ); - std::tm t = {}; - - if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) ) - return std::mktime( &t ); - return -1; + BDateTime date = dateRfc822ToBDate( dateCStr ); + if ( date == NULL ) date = dateRfc3339ToBDate( dateCStr ); + return date; } -BString -stringDateToBString ( const char* dateCStr ) +BDateTime +dateRfc3339ToBDate ( const char* dateCStr ) { - std::istringstream ss( dateCStr ); - std::ostringstream dateStream; - std::tm t = {}; + return stringDateToBDate( dateCStr, "%Y-%m-%dT%H:%M:%S" ); +} - if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) ) { - dateStream << std::put_time( &t, "%c" ); - std::string dateString = dateStream.str(); - return BString( dateStream.str().c_str() ); +BDateTime +dateRfc822ToBDate ( const char* dateCStr ) +{ + return stringDateToBDate( dateCStr, "%a, %d %b %Y %H:%M:%S" ); +} + +BDateTime +stringDateToBDate ( const char* dateCStr, const char* templateCStr ) +{ + std::istringstream dateStream( dateCStr ); + std::tm time = {}; + + if ( dateStream >> std::get_time( &time, templateCStr ) ) { + BTime newTime = BTime( time.tm_hour, time.tm_min, time.tm_sec, 0 ); + BDate newDate = BDate( time.tm_year + 1900, time.tm_mon + 1, time.tm_mday ); + return BDateTime( newDate, newTime ); } return NULL; } // ---------------------------------------------------------------------------- +BString +dateTo3339String ( BDateTime dt ) +{ + char buffer[18]; + sprintf( buffer, "%i-%02i-%02iT%02i:%02i:%02i", + dt.Date().Year(), dt.Date().Month(), dt.Date().Day(), + dt.Time().Hour(), dt.Time().Minute(), dt.Time().Second() ); + + return BString( buffer ); +} + +// ---------------------------------------------------------------------------- + int32 webFetch ( char* strUrl, BDataIO* reply ) { diff --git a/src/Util.h b/src/Util.h index d1580e7..ba16471 100644 --- a/src/Util.h +++ b/src/Util.h @@ -1,13 +1,20 @@ #ifndef UTIL_H #define UTIL_H +#include #include #include "ProtocolListener.h" -int stringDateToEpoch ( const char* dateStr ); -BString stringDateToBString ( const char* dateStr ); -int32 webFetch ( BUrl, BDataIO* ); -int32 webFetch ( char*, BDataIO* ); +BDateTime feedDateToBDate ( const char* ); +BDateTime dateRfc3339ToBDate ( const char* ); +BDateTime dateRfc822ToBDate ( const char* ); +BDateTime stringDateToBDate ( const char*, const char* ); + +BString dateTo3339String ( BDateTime ); + + +int32 webFetch ( BUrl, BDataIO* ); +int32 webFetch ( char*, BDataIO* ); #endif diff --git a/src/parsing.cpp b/src/parsing.cpp index db5ab93..4bdf480 100644 --- a/src/parsing.cpp +++ b/src/parsing.cpp @@ -19,9 +19,11 @@ feedParser ( Channel** chanPtr, Config* cfg ) if ( xml.FirstChildElement("rss") ) rssParser( chanPtr, cfg, &xml ); else if ( xml.FirstChildElement("feed") ) - printf("has atom\n"); + atomParser( chanPtr, cfg, &xml ); } +// ---------------------------------------------------------------------------- + void rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml ) { @@ -86,15 +88,109 @@ rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan ) } } +// ---------------------------------------------------------------------------- + +void +atomParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml ) +{ + Channel* chan = *(chanPtr); + + tinyxml2::XMLElement* xfeed = xml->FirstChildElement("feed"); + + atomRootParse( chanPtr, cfg, xfeed ); + atomParseEntries( chanPtr, cfg, xfeed ); +} + +void +atomRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed ) +{ + Channel* chan = *(chanPtr); + + tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author"); + tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry"); + tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link"); + tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link"); + + bool set = false; + + chan->SetTitle( xfeed->FirstChildElement("title") ); + chan->SetDesc( xfeed->FirstChildElement("description") ); + + set = chan->SetLastDate( xfeed->FirstChildElement("updated") ); + if ( !set ) set = chan->SetLastDate( xfeed->FirstChildElement("published") ); + if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("updated") ); + if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("published") ); + + set = chan->SetHomePage( xlink->Attribute( "href" ) ); + if ( !set && xauthor ) set = chan->SetHomePage( xauthor->FirstChildElement("uri") ); + if ( !set && xauthlink ) set = chan->SetHomePage( xauthlink->Attribute( "href" ) ); + + if ( cfg->verbose ) + printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String()); +} + +void +atomEntryParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xentry ) +{ + Channel* chan = *(chanPtr); + Item* newItem = (Item*)malloc( sizeof(Item) ); + newItem = new Item( chan->outputDir ); + + tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content"); + tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group"); + tinyxml2::XMLPrinter xprinter; + + newItem->SetTitle( xentry->FirstChildElement("title") ); + newItem->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") ); + + bool set = false; + set = newItem->SetDesc( xentry->FirstChildElement("summary") ); + if ( !set ) set = newItem->SetDesc( xentry->FirstChildElement("description")); + if ( !set && xmedia ) set = newItem->SetDesc( xmedia->FirstChildElement("media:description")); + + set = newItem->SetPubDate( xentry->FirstChildElement("updated") ); + if ( !set ) set = newItem->SetPubDate( xentry->FirstChildElement("published") ); + + if ( xcontent ) { + xcontent->Accept( &xprinter ); + newItem->SetContent( xprinter.CStr() ); + } + + if ( cfg->verbose ) + printf("\t%s\n", newItem->title.String()); + + chan->items.AddItem( newItem ); +} + +void +atomParseEntries ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed ) +{ + Channel* chan = *(chanPtr); + tinyxml2::XMLElement* xentry; + + xentry = xfeed->FirstChildElement("entry"); + + int entryCount = xmlCountSiblings( xentry, "entry" ); + chan->items = BList(entryCount); + + if ( cfg->verbose ) + printf("\t-%i items-\n", entryCount); + + while ( xentry ) { + atomEntryParse( chanPtr, cfg, xentry ); + xentry = xentry->NextSiblingElement("entry"); + } +} + +// ---------------------------------------------------------------------------- + int xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name ) { int count = 0; - while ( xsibling ) { count++; xsibling = xsibling->NextSiblingElement(sibling_name); } - return count; } diff --git a/src/parsing.h b/src/parsing.h index 7827b6d..db4254b 100644 --- a/src/parsing.h +++ b/src/parsing.h @@ -5,11 +5,15 @@ #include "Config.h" #include "Channel.h" -void feedParser ( Channel**, Config* ); -void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* ); -void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* ); -void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* ); -void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* ); -int xmlCountSiblings ( tinyxml2::XMLElement*, const char* ); +void feedParser ( Channel**, Config* ); +void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* ); +void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* ); +void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* ); +void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* ); +void atomParser ( Channel**, Config*, tinyxml2::XMLDocument* ); +void atomRootParse ( Channel** chanPtr, Config*, tinyxml2::XMLElement* ); +void atomEntryParse ( Channel**, Config*, tinyxml2::XMLElement* ); +void atomParseEntries ( Channel**, Config*, tinyxml2::XMLElement* ); +int xmlCountSiblings ( tinyxml2::XMLElement*, const char* ); #endif