Add unixDate attribute; switch to BDate for pubdates
This commit is contained in:
parent
27b00a5202
commit
dd51380e6f
|
@ -12,7 +12,7 @@ Channel::Channel ( BString path, BString outputPath )
|
|||
homePage = BString("");
|
||||
xmlUrl = BString("");
|
||||
filePath = path;
|
||||
lastDate = BString("");
|
||||
// lastDate = NULL;
|
||||
topLevelSubject = "";
|
||||
lastSubject = "";
|
||||
outputDir = outputPath;
|
||||
|
@ -59,11 +59,10 @@ bool Channel::SetHomePage ( tinyxml2::XMLElement* elem ) {
|
|||
bool Channel::SetLastDate ( const char* dateCStr ) {
|
||||
if ( dateCStr == NULL )
|
||||
return false;
|
||||
|
||||
BString dateStr = stringDateToBString( dateCStr );
|
||||
if ( dateStr == NULL )
|
||||
BDateTime date = feedDateToBDate( dateCStr );
|
||||
if ( date == NULL )
|
||||
return false;
|
||||
lastDate = dateStr;
|
||||
lastDate = date;
|
||||
return true;
|
||||
}
|
||||
bool Channel::SetLastDate ( tinyxml2::XMLElement* elem ) {
|
||||
|
|
|
@ -13,7 +13,7 @@ public:
|
|||
char lang[3];
|
||||
BString title;
|
||||
BString description;
|
||||
BString lastDate;
|
||||
BDateTime lastDate;
|
||||
BString homePage;
|
||||
BString xmlUrl;
|
||||
BList items;
|
||||
|
|
78
src/Item.cpp
78
src/Item.cpp
|
@ -4,6 +4,7 @@
|
|||
#include <StorageKit.h>
|
||||
#include "Config.h"
|
||||
#include "Item.h"
|
||||
#include "Util.h"
|
||||
|
||||
Item::Item ( BString outputPath )
|
||||
{
|
||||
|
@ -12,7 +13,7 @@ Item::Item ( BString outputPath )
|
|||
homePage = BString("");
|
||||
postUrl = BString("");
|
||||
content = BString("");
|
||||
pubDate = BString("");
|
||||
// pubDate = NULL;
|
||||
outputDir = outputPath;
|
||||
}
|
||||
|
||||
|
@ -25,65 +26,78 @@ Item::Filetize ( Config* cfg, bool onlyIfNew = false )
|
|||
dir->CreateFile( title.String(), file );
|
||||
|
||||
BString betype = cfg->mimetype;
|
||||
if ( pubDate != NULL ) {
|
||||
int32 unixDate = (int32)pubDate.Time_t();
|
||||
file->WriteAttr( "unixDate", B_INT32_TYPE, 0,
|
||||
&unixDate, sizeof(int32) );
|
||||
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
|
||||
dateTo3339String(pubDate).String(),
|
||||
dateTo3339String(pubDate).CountChars() );
|
||||
}
|
||||
|
||||
file->WriteAttr( "META:title", B_STRING_TYPE, 0,
|
||||
title.String(), title.CountChars() );
|
||||
file->WriteAttr( "description", B_STRING_TYPE, 0,
|
||||
description.String(), description.CountChars() );
|
||||
file->WriteAttr( "pubDate", B_STRING_TYPE, 0,
|
||||
pubDate.String(), pubDate.CountChars() );
|
||||
file->WriteAttr( "META:url", B_STRING_TYPE, 0,
|
||||
postUrl.String(), postUrl.CountChars() );
|
||||
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
|
||||
betype.String(), betype.CountChars() );
|
||||
|
||||
file->Write(content.String(), content.Length());
|
||||
// using file->Write with content converted to C string messes up length ofc
|
||||
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
|
||||
// const char* strPath = outputDir.String();
|
||||
// std::string path(strPath);
|
||||
// path += std::string(title.String());
|
||||
// std::cout << path << std::endl;
|
||||
//
|
||||
// std::ofstream pFile(path);
|
||||
// pFile << content;
|
||||
// pFile.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetTitle ( const char* titleStr ) {
|
||||
bool Item::SetTitle ( const char* titleStr ) {
|
||||
if ( titleStr != NULL ) title = BString( titleStr );
|
||||
else return false;
|
||||
return true;
|
||||
}
|
||||
void Item::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetTitle( elem->GetText() );
|
||||
bool Item::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) return SetTitle( elem->GetText() );
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetDesc ( const char* descStr ) {
|
||||
bool Item::SetDesc ( const char* descStr ) {
|
||||
if ( descStr != NULL ) description = BString( descStr );
|
||||
else return false;
|
||||
return true;
|
||||
}
|
||||
void Item::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetDesc( elem->GetText() );
|
||||
bool Item::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) return SetDesc( elem->GetText() );
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetContent ( const char* contentStr ) {
|
||||
bool Item::SetContent ( const char* contentStr ) {
|
||||
if ( contentStr != NULL ) content = BString( contentStr );
|
||||
else return false;
|
||||
return true;
|
||||
}
|
||||
void Item::SetContent ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetContent( elem->GetText() );
|
||||
bool Item::SetContent ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) return SetContent( elem->GetText() );
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetPostUrl ( const char* urlStr ) {
|
||||
if ( urlStr != NULL )
|
||||
postUrl = BString( urlStr );
|
||||
bool Item::SetPostUrl ( const char* urlStr ) {
|
||||
if ( urlStr != NULL ) postUrl = BString( urlStr );
|
||||
else return false;
|
||||
return true;
|
||||
}
|
||||
void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetPostUrl( elem->GetText() );
|
||||
bool Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) return SetPostUrl( elem->GetText() );
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetPubDate ( const char* dateStr ) {
|
||||
if ( dateStr != NULL )
|
||||
pubDate = BString( dateStr );
|
||||
bool Item::SetPubDate ( const char* dateStr ) {
|
||||
if ( dateStr == NULL )
|
||||
return false;
|
||||
BDateTime date = feedDateToBDate( dateStr );
|
||||
if ( date == NULL )
|
||||
return false;
|
||||
pubDate = date;
|
||||
return true;
|
||||
}
|
||||
void Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetPubDate( elem->GetText() );
|
||||
bool Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) return SetPubDate( elem->GetText() );
|
||||
return false;
|
||||
}
|
||||
|
|
22
src/Item.h
22
src/Item.h
|
@ -11,7 +11,7 @@ class Item {
|
|||
public:
|
||||
BString title;
|
||||
BString description;
|
||||
BString pubDate;
|
||||
BDateTime pubDate;
|
||||
BString homePage;
|
||||
BString postUrl;
|
||||
BString content;
|
||||
|
@ -21,16 +21,16 @@ public:
|
|||
|
||||
bool Filetize ( Config*, bool );
|
||||
|
||||
void SetTitle ( const char* );
|
||||
void SetTitle ( tinyxml2::XMLElement* );
|
||||
void SetDesc ( const char* );
|
||||
void SetDesc ( tinyxml2::XMLElement* );
|
||||
void SetContent ( const char* );
|
||||
void SetContent ( tinyxml2::XMLElement* );
|
||||
void SetPostUrl ( const char* );
|
||||
void SetPostUrl ( tinyxml2::XMLElement* );
|
||||
void SetPubDate ( const char* );
|
||||
void SetPubDate ( tinyxml2::XMLElement* );
|
||||
bool SetTitle ( const char* );
|
||||
bool SetTitle ( tinyxml2::XMLElement* );
|
||||
bool SetDesc ( const char* );
|
||||
bool SetDesc ( tinyxml2::XMLElement* );
|
||||
bool SetContent ( const char* );
|
||||
bool SetContent ( tinyxml2::XMLElement* );
|
||||
bool SetPostUrl ( const char* );
|
||||
bool SetPostUrl ( tinyxml2::XMLElement* );
|
||||
bool SetPubDate ( const char* );
|
||||
bool SetPubDate ( tinyxml2::XMLElement* );
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
#include "Item.h"
|
||||
#include "parsing.h"
|
||||
#include "Config.h"
|
||||
#include "Rifen.h"
|
||||
#include "Pogger.h"
|
||||
|
||||
#include <StorageKit.h>
|
||||
|
||||
Config* main_cfg;
|
||||
|
||||
|
@ -13,11 +15,11 @@ int
|
|||
main ( int argc, char** argv )
|
||||
{
|
||||
main_cfg = new Config;
|
||||
usageMsg.ReplaceAll("%app%", "Rifen");
|
||||
usageMsg.ReplaceAll("%app%", "Pogger");
|
||||
|
||||
invocation( argc, argv, &main_cfg );
|
||||
|
||||
main_cfg->targetFeeds.DoForEach(&processFeed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -68,7 +70,8 @@ invocation ( int argc, char** argv, Config** cfgPtr )
|
|||
break;
|
||||
case '?':
|
||||
if ( optopt == 'O' || optopt == 'm' )
|
||||
fprintf( stderr, "Option `-%c` requires an argument.\n\n", optopt );
|
||||
fprintf( stderr, "Option `-%c` requires an argument.\n\n",
|
||||
optopt );
|
||||
else
|
||||
fprintf( stderr, "Unknown option `-%c`.\n\n", optopt );
|
||||
return 2;
|
||||
|
|
56
src/Util.cpp
56
src/Util.cpp
|
@ -2,40 +2,62 @@
|
|||
#include <sstream>
|
||||
#include <locale>
|
||||
#include <iomanip>
|
||||
#include <DateTime.h>
|
||||
#include <HttpRequest.h>
|
||||
#include "ProtocolListener.h"
|
||||
#include "Util.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
int
|
||||
stringDateToEpoch ( const char* dateCStr )
|
||||
BDateTime
|
||||
feedDateToBDate ( const char* dateCStr )
|
||||
{
|
||||
std::istringstream ss( dateCStr );
|
||||
std::tm t = {};
|
||||
|
||||
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) )
|
||||
return std::mktime( &t );
|
||||
return -1;
|
||||
BDateTime date = dateRfc822ToBDate( dateCStr );
|
||||
if ( date == NULL ) date = dateRfc3339ToBDate( dateCStr );
|
||||
return date;
|
||||
}
|
||||
|
||||
BString
|
||||
stringDateToBString ( const char* dateCStr )
|
||||
BDateTime
|
||||
dateRfc3339ToBDate ( const char* dateCStr )
|
||||
{
|
||||
std::istringstream ss( dateCStr );
|
||||
std::ostringstream dateStream;
|
||||
std::tm t = {};
|
||||
return stringDateToBDate( dateCStr, "%Y-%m-%dT%H:%M:%S" );
|
||||
}
|
||||
|
||||
if ( ss >> std::get_time( &t, "%a, %d %b %Y %H:%M:%S" ) ) {
|
||||
dateStream << std::put_time( &t, "%c" );
|
||||
std::string dateString = dateStream.str();
|
||||
return BString( dateStream.str().c_str() );
|
||||
BDateTime
|
||||
dateRfc822ToBDate ( const char* dateCStr )
|
||||
{
|
||||
return stringDateToBDate( dateCStr, "%a, %d %b %Y %H:%M:%S" );
|
||||
}
|
||||
|
||||
BDateTime
|
||||
stringDateToBDate ( const char* dateCStr, const char* templateCStr )
|
||||
{
|
||||
std::istringstream dateStream( dateCStr );
|
||||
std::tm time = {};
|
||||
|
||||
if ( dateStream >> std::get_time( &time, templateCStr ) ) {
|
||||
BTime newTime = BTime( time.tm_hour, time.tm_min, time.tm_sec, 0 );
|
||||
BDate newDate = BDate( time.tm_year + 1900, time.tm_mon + 1, time.tm_mday );
|
||||
return BDateTime( newDate, newTime );
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
BString
|
||||
dateTo3339String ( BDateTime dt )
|
||||
{
|
||||
char buffer[18];
|
||||
sprintf( buffer, "%i-%02i-%02iT%02i:%02i:%02i",
|
||||
dt.Date().Year(), dt.Date().Month(), dt.Date().Day(),
|
||||
dt.Time().Hour(), dt.Time().Minute(), dt.Time().Second() );
|
||||
|
||||
return BString( buffer );
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
int32
|
||||
webFetch ( char* strUrl, BDataIO* reply )
|
||||
{
|
||||
|
|
11
src/Util.h
11
src/Util.h
|
@ -1,11 +1,18 @@
|
|||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#include <DateTime.h>
|
||||
#include <Url.h>
|
||||
#include "ProtocolListener.h"
|
||||
|
||||
int stringDateToEpoch ( const char* dateStr );
|
||||
BString stringDateToBString ( const char* dateStr );
|
||||
BDateTime feedDateToBDate ( const char* );
|
||||
BDateTime dateRfc3339ToBDate ( const char* );
|
||||
BDateTime dateRfc822ToBDate ( const char* );
|
||||
BDateTime stringDateToBDate ( const char*, const char* );
|
||||
|
||||
BString dateTo3339String ( BDateTime );
|
||||
|
||||
|
||||
int32 webFetch ( BUrl, BDataIO* );
|
||||
int32 webFetch ( char*, BDataIO* );
|
||||
|
||||
|
|
102
src/parsing.cpp
102
src/parsing.cpp
|
@ -19,9 +19,11 @@ feedParser ( Channel** chanPtr, Config* cfg )
|
|||
if ( xml.FirstChildElement("rss") )
|
||||
rssParser( chanPtr, cfg, &xml );
|
||||
else if ( xml.FirstChildElement("feed") )
|
||||
printf("has atom\n");
|
||||
atomParser( chanPtr, cfg, &xml );
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
|
||||
{
|
||||
|
@ -86,15 +88,109 @@ rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
|
|||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
atomParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
|
||||
tinyxml2::XMLElement* xfeed = xml->FirstChildElement("feed");
|
||||
|
||||
atomRootParse( chanPtr, cfg, xfeed );
|
||||
atomParseEntries( chanPtr, cfg, xfeed );
|
||||
}
|
||||
|
||||
void
|
||||
atomRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
|
||||
tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author");
|
||||
tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry");
|
||||
tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link");
|
||||
tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link");
|
||||
|
||||
bool set = false;
|
||||
|
||||
chan->SetTitle( xfeed->FirstChildElement("title") );
|
||||
chan->SetDesc( xfeed->FirstChildElement("description") );
|
||||
|
||||
set = chan->SetLastDate( xfeed->FirstChildElement("updated") );
|
||||
if ( !set ) set = chan->SetLastDate( xfeed->FirstChildElement("published") );
|
||||
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("updated") );
|
||||
if ( !set && xentry ) set = chan->SetLastDate( xentry->FirstChildElement("published") );
|
||||
|
||||
set = chan->SetHomePage( xlink->Attribute( "href" ) );
|
||||
if ( !set && xauthor ) set = chan->SetHomePage( xauthor->FirstChildElement("uri") );
|
||||
if ( !set && xauthlink ) set = chan->SetHomePage( xauthlink->Attribute( "href" ) );
|
||||
|
||||
if ( cfg->verbose )
|
||||
printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String());
|
||||
}
|
||||
|
||||
void
|
||||
atomEntryParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xentry )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
Item* newItem = (Item*)malloc( sizeof(Item) );
|
||||
newItem = new Item( chan->outputDir );
|
||||
|
||||
tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content");
|
||||
tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group");
|
||||
tinyxml2::XMLPrinter xprinter;
|
||||
|
||||
newItem->SetTitle( xentry->FirstChildElement("title") );
|
||||
newItem->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") );
|
||||
|
||||
bool set = false;
|
||||
set = newItem->SetDesc( xentry->FirstChildElement("summary") );
|
||||
if ( !set ) set = newItem->SetDesc( xentry->FirstChildElement("description"));
|
||||
if ( !set && xmedia ) set = newItem->SetDesc( xmedia->FirstChildElement("media:description"));
|
||||
|
||||
set = newItem->SetPubDate( xentry->FirstChildElement("updated") );
|
||||
if ( !set ) set = newItem->SetPubDate( xentry->FirstChildElement("published") );
|
||||
|
||||
if ( xcontent ) {
|
||||
xcontent->Accept( &xprinter );
|
||||
newItem->SetContent( xprinter.CStr() );
|
||||
}
|
||||
|
||||
if ( cfg->verbose )
|
||||
printf("\t%s\n", newItem->title.String());
|
||||
|
||||
chan->items.AddItem( newItem );
|
||||
}
|
||||
|
||||
void
|
||||
atomParseEntries ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
tinyxml2::XMLElement* xentry;
|
||||
|
||||
xentry = xfeed->FirstChildElement("entry");
|
||||
|
||||
int entryCount = xmlCountSiblings( xentry, "entry" );
|
||||
chan->items = BList(entryCount);
|
||||
|
||||
if ( cfg->verbose )
|
||||
printf("\t-%i items-\n", entryCount);
|
||||
|
||||
while ( xentry ) {
|
||||
atomEntryParse( chanPtr, cfg, xentry );
|
||||
xentry = xentry->NextSiblingElement("entry");
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
int
|
||||
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
while ( xsibling ) {
|
||||
count++;
|
||||
xsibling = xsibling->NextSiblingElement(sibling_name);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@ void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* );
|
|||
void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void atomParser ( Channel**, Config*, tinyxml2::XMLDocument* );
|
||||
void atomRootParse ( Channel** chanPtr, Config*, tinyxml2::XMLElement* );
|
||||
void atomEntryParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void atomParseEntries ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
|
||||
|
||||
#endif
|
||||
|
|
Ŝarĝante…
Reference in New Issue