Split Feed into RssFeed and AtomFeed; merged parsing functions into classes

This commit is contained in:
Jaidyn Ann 2020-07-08 04:43:35 -05:00
parent f9e6a53cb7
commit 6b7756f50b
10 changed files with 311 additions and 247 deletions

View File

@ -31,6 +31,8 @@ APP_MIME_SIG = application/x-vnd.Pogger
SRCS = \ SRCS = \
src/Feed.cpp, \ src/Feed.cpp, \
src/Entry.cpp, \ src/Entry.cpp, \
src/AtomFeed.cpp, \
src/RssFeed.cpp, \
src/ProtocolListener.cpp, \ src/ProtocolListener.cpp, \
src/Config.cpp, \ src/Config.cpp, \
src/Util.cpp, \ src/Util.cpp, \

108
src/AtomFeed.cpp Normal file
View File

@ -0,0 +1,108 @@
#include <tinyxml2.h>
#include "Entry.h"
#include "Config.h"
#include "Util.h"
#include "AtomFeed.h"
AtomFeed::AtomFeed ( BString path, BString outputPath )
{
title = BString("Untitled Feed");
description = BString("");
homeUrl = BString("");
xmlUrl = BString("");
filePath = path;
outputDir = outputPath;
}
// ----------------------------------------------------------------------------
void
AtomFeed::Parse ( Config* cfg )
{
entries = BList();
tinyxml2::XMLDocument xml;
xml.LoadFile( filePath.String() );
tinyxml2::XMLElement* xfeed = xml.FirstChildElement("feed");
RootParse( cfg, xfeed );
ParseEntries( cfg, xfeed );
}
void
AtomFeed::RootParse( Config* cfg, tinyxml2::XMLElement* xfeed )
{
tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author");
tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry");
tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link");
tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link");
bool set = false;
SetTitle( xfeed->FirstChildElement("title") );
SetDesc( xfeed->FirstChildElement("description") );
set = SetDate( xfeed->FirstChildElement("updated") );
if ( !set ) set = SetDate( xfeed->FirstChildElement("published") );
if ( !set && xentry ) set = SetDate( xentry->FirstChildElement("updated") );
if ( !set && xentry ) set = SetDate( xentry->FirstChildElement("published") );
set = SetHomeUrl( xlink->Attribute( "href" ) );
if ( !set && xauthor ) set = SetHomeUrl( xauthor->FirstChildElement("uri") );
if ( !set && xauthlink ) set = SetHomeUrl( xauthlink->Attribute( "href" ) );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", title.String(), homeUrl.String());
}
void
AtomFeed::EntryParse ( Config* cfg, tinyxml2::XMLElement* xentry )
{
Entry* newEntry= (Entry*)malloc( sizeof(Entry) );
newEntry = new Entry( outputDir );
tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content");
tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group");
tinyxml2::XMLPrinter xprinter;
newEntry->SetTitle( xentry->FirstChildElement("title") );
newEntry->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") );
bool set = false;
set = newEntry->SetDesc( xentry->FirstChildElement("summary") );
if ( !set ) set = newEntry->SetDesc( xentry->FirstChildElement("description"));
if ( !set && xmedia ) set = newEntry->SetDesc( xmedia->FirstChildElement("media:description"));
set = newEntry->SetDate( xentry->FirstChildElement("updated") );
if ( !set ) set = newEntry->SetDate( xentry->FirstChildElement("published") );
if ( xcontent ) {
xcontent->Accept( &xprinter );
newEntry->SetContent( xprinter.CStr() );
}
if ( cfg->verbose )
printf("\t%s\n", newEntry->title.String());
if ( withinDateRange( cfg->minDate, newEntry->date, cfg->maxDate ) )
entries.AddItem( newEntry );
}
void
AtomFeed::ParseEntries ( Config* cfg, tinyxml2::XMLElement* xfeed )
{
tinyxml2::XMLElement* xentry;
xentry = xfeed->FirstChildElement("entry");
int entryCount = xmlCountSiblings( xentry, "entry" );
entries = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i entries-\n", entryCount);
while ( xentry ) {
EntryParse( cfg, xentry );
xentry = xentry->NextSiblingElement("entry");
}
}

22
src/AtomFeed.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef ATOM_FEED_H
#define ATOM_FEED_H
#include <tinyxml2.h>
#include <DateTime.h>
#include <String.h>
#include <List.h>
#include <Url.h>
#include "Config.h"
#include "Feed.h"
class AtomFeed: public Feed {
public:
AtomFeed ( BString, BString );
void Parse ( Config* );
void RootParse ( Config*, tinyxml2::XMLElement* );
void EntryParse ( Config*, tinyxml2::XMLElement* );
void ParseEntries ( Config*, tinyxml2::XMLElement* );
};
#endif

View File

@ -1,29 +1,64 @@
#include <tinyxml2.h> #include <tinyxml2.h>
#include "Entry.h" #include "Entry.h"
#include "Config.h" #include "Config.h"
#include "parsing.h"
#include "Util.h" #include "Util.h"
#include "Feed.h" #include "Feed.h"
Feed::Feed ( BString path, BString outputPath ) Feed::Feed ( BString path )
{ {
title = BString("Untitled Feed"); title = BString("Untitled Feed");
description = BString("Nondescript, N/A."); description = BString("Nondescript, N/A.");
homeUrl = BString(""); homeUrl = BString("");
xmlUrl = BString(""); xmlUrl = BString("");
filePath = path; filePath = path;
// lastDate = NULL;
outputDir = outputPath;
} }
void Feed::Feed () {
Feed::Parse ( Config* cfg ) title = BString("");
{ description = BString("");
entries = BList(); homeUrl = BString("");
Feed* feed = this; xmlUrl = BString("");
feedParser(&feed, cfg);
} }
// ----------------------------------------------------------------------------
bool
Feed::IsRss ( )
{
tinyxml2::XMLDocument xml;
xml.LoadFile( filePath.String() );
if ( xml.FirstChildElement("rss") )
return true;
return false;
}
bool
Feed::IsAtom ( )
{
tinyxml2::XMLDocument xml;
xml.LoadFile( filePath.String() );
if ( xml.FirstChildElement("feed") )
return true;
return false;
}
// ----------------------------------------------------------------------------
int
Feed::xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
{
int count = 0;
while ( xsibling ) {
count++;
xsibling = xsibling->NextSiblingElement(sibling_name);
}
return count;
}
// ----------------------------------------------------------------------------
bool Feed::SetTitle ( const char* titleStr ) { bool Feed::SetTitle ( const char* titleStr ) {
if ( titleStr != NULL ) title = BString( titleStr ); if ( titleStr != NULL ) title = BString( titleStr );
else return false; else return false;
@ -68,3 +103,4 @@ bool Feed::SetDate ( tinyxml2::XMLElement* elem ) {
else return false; else return false;
} }

View File

@ -10,21 +10,17 @@
class Feed { class Feed {
public: public:
char lang[3]; Feed ( BString );
Feed ( );
BString title; BString title;
BString description; BString description;
BDateTime date; BDateTime date;
BString homeUrl; BString homeUrl;
BString xmlUrl; BString xmlUrl;
BList entries;
BString topLevelSubject;
BString lastSubject;
BString filePath; BString filePath;
BString outputDir; BString outputDir;
BList entries;
Feed ( BString, BString );
void Parse ( Config* ); void Parse ( Config* );
@ -36,6 +32,12 @@ public:
bool SetDate ( tinyxml2::XMLElement* ); bool SetDate ( tinyxml2::XMLElement* );
bool SetHomeUrl ( const char* ); bool SetHomeUrl ( const char* );
bool SetHomeUrl ( tinyxml2::XMLElement* ); bool SetHomeUrl ( tinyxml2::XMLElement* );
bool IsRss ( );
bool IsAtom ( );
protected:
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
}; };
#endif #endif

View File

@ -1,9 +1,10 @@
#include <StorageKit.h> #include <StorageKit.h>
#include <String.h> #include <String.h>
#include <getopt.h> #include <getopt.h>
#include "AtomFeed.h"
#include "RssFeed.h"
#include "Feed.h" #include "Feed.h"
#include "Entry.h" #include "Entry.h"
#include "parsing.h"
#include "Config.h" #include "Config.h"
#include "Util.h" #include "Util.h"
#include "Pogger.h" #include "Pogger.h"
@ -14,7 +15,6 @@ main ( int argc, char** argv )
main_cfg = new Config; main_cfg = new Config;
usageMsg.ReplaceAll("%app%", "Pogger"); usageMsg.ReplaceAll("%app%", "Pogger");
invocation( argc, argv, &main_cfg ); invocation( argc, argv, &main_cfg );
main_cfg->Load(); main_cfg->Load();
main_cfg->targetFeeds.DoForEach( &processFeed ); main_cfg->targetFeeds.DoForEach( &processFeed );
@ -109,7 +109,7 @@ invocation ( int argc, char** argv, Config** cfgPtr )
} }
} }
// ――――――――――――――――― // -------------------------------------
void void
freeargInvocation ( int argc, char** argv, int optind, Config** cfgPtr ) freeargInvocation ( int argc, char** argv, int optind, Config** cfgPtr )
@ -140,13 +140,22 @@ bool
processFeed ( void* feedArg ) processFeed ( void* feedArg )
{ {
BString* feedStr = (BString*)feedArg; BString* feedStr = (BString*)feedArg;
Feed* testFeed = new Feed( *(feedStr) );
Feed* feed = (Feed*)malloc( sizeof(feed) ); BList entries;
feed = new Feed(*(feedStr), main_cfg->outDir);
feed->Parse(main_cfg);
BList entries = feed->entries;
entries.DoForEach(&processEntry);
free(feed);
if ( testFeed->IsAtom() ) {
AtomFeed* feed = (AtomFeed*)malloc( sizeof(AtomFeed) );
feed = new AtomFeed( *(feedStr), main_cfg->outDir );
feed->Parse(main_cfg);
entries = feed->entries;
}
if ( testFeed->IsRss() ) {
RssFeed* feed = (RssFeed*)malloc( sizeof(RssFeed) );
feed = new RssFeed( *(feedStr), main_cfg->outDir );
feed->Parse(main_cfg);
entries = feed->entries;
}
entries.DoForEach(&processEntry);
return false; return false;
} }

82
src/RssFeed.cpp Normal file
View File

@ -0,0 +1,82 @@
#include <tinyxml2.h>
#include "Entry.h"
#include "Config.h"
#include "Util.h"
#include "RssFeed.h"
RssFeed::RssFeed ( BString path, BString outputPath )
{
title = BString("Untitled Feed");
description = BString("");
homeUrl = BString("");
xmlUrl = BString("");
filePath = path;
outputDir = outputPath;
}
// ----------------------------------------------------------------------------
void
RssFeed::Parse ( Config* cfg )
{
entries = BList();
tinyxml2::XMLDocument xml;
xml.LoadFile( filePath.String() );
tinyxml2::XMLElement* xchan = xml.FirstChildElement("rss")->FirstChildElement("channel");
RootParse( cfg, xchan );
ParseEntries( cfg, xchan );
}
// -------------------------------------
void
RssFeed::RootParse ( Config* cfg, tinyxml2::XMLElement* xchan )
{
SetTitle ( xchan->FirstChildElement("title") );
SetDesc ( xchan->FirstChildElement("description") );
SetHomeUrl ( xchan->FirstChildElement("link") );
SetDate ( xchan->FirstChildElement("lastBuildDate") );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", title.String(), homeUrl.String());
}
void
RssFeed::EntryParse ( Config* cfg, tinyxml2::XMLElement* xitem )
{
Entry* newEntry = (Entry*)malloc( sizeof(Entry) );
newEntry = new Entry( outputDir );
newEntry->SetTitle ( xitem->FirstChildElement("title") );
newEntry->SetDesc ( xitem->FirstChildElement("description") );
newEntry->SetDate ( xitem->FirstChildElement("pubDate") );
newEntry->SetPostUrl ( xitem->FirstChildElement("link") );
newEntry->SetContent ( xitem->FirstChildElement("content:encoded") );
if (cfg->verbose )
printf("\t%s\n", newEntry->title.String());
if ( withinDateRange( cfg->minDate, newEntry->date, cfg->maxDate ) )
entries.AddItem( newEntry );
}
void
RssFeed::ParseEntries ( Config* cfg, tinyxml2::XMLElement* xchan )
{
tinyxml2::XMLElement* xitem;
xitem = xchan->FirstChildElement("item");
int entryCount = xmlCountSiblings( xitem, "item" );
entries = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i entries-\n", entryCount);
while ( xitem ) {
EntryParse( cfg, xitem );
xitem = xitem->NextSiblingElement("item");
}
}

22
src/RssFeed.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef RSS_FEED_H
#define RSS_FEED_H
#include <tinyxml2.h>
#include <DateTime.h>
#include <String.h>
#include <List.h>
#include <Url.h>
#include "Config.h"
#include "Feed.h"
class RssFeed: public Feed {
public:
RssFeed ( BString, BString );
void Parse ( Config* );
void RootParse ( Config*, tinyxml2::XMLElement* );
void EntryParse ( Config*, tinyxml2::XMLElement* );
void ParseEntries ( Config*, tinyxml2::XMLElement* );
};
#endif

View File

@ -1,200 +0,0 @@
#include <iostream>
#include <sstream>
#include <tinyxml2.h>
#include "Feed.h"
#include "Entry.h"
#include "Util.h"
#include "parsing.h"
// ============================================================================
// PARSERS
void
feedParser ( Feed** feedPtr, Config* cfg )
{
Feed* feed = *(feedPtr);
tinyxml2::XMLDocument xml;
xml.LoadFile( feed->filePath.String() );
if ( xml.FirstChildElement("rss") )
rssParser( feedPtr, cfg, &xml );
else if ( xml.FirstChildElement("feed") )
atomParser( feedPtr, cfg, &xml );
}
// ----------------------------------------------------------------------------
void
rssParser ( Feed** feedPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{
Feed* chan = *(feedPtr);
tinyxml2::XMLElement* xchan = xml->FirstChildElement("rss")->FirstChildElement("channel");
rssRootParse( feedPtr, cfg, xchan );
rssParseEntries( feedPtr, cfg, xchan );
}
void
rssRootParse( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xchan )
{
Feed* feed = *(feedPtr);
feed->SetTitle ( xchan->FirstChildElement("title") );
feed->SetDesc ( xchan->FirstChildElement("description") );
feed->SetHomeUrl ( xchan->FirstChildElement("link") );
feed->SetDate ( xchan->FirstChildElement("lastBuildDate") );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", feed->title.String(), feed->homeUrl.String());
}
void
rssEntryParse ( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xitem )
{
Feed* feed = *(feedPtr);
Entry* newEntry = (Entry*)malloc( sizeof(Entry) );
newEntry = new Entry( feed->outputDir );
newEntry->SetTitle ( xitem->FirstChildElement("title") );
newEntry->SetDesc ( xitem->FirstChildElement("description") );
newEntry->SetDate ( xitem->FirstChildElement("pubDate") );
newEntry->SetPostUrl ( xitem->FirstChildElement("link") );
newEntry->SetContent ( xitem->FirstChildElement("content:encoded") );
if (cfg->verbose )
printf("\t%s\n", newEntry->title.String());
if ( withinDateRange( cfg->minDate, newEntry->date, cfg->maxDate ) )
feed->entries.AddItem( newEntry );
}
void
rssParseEntries ( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xchan )
{
Feed* feed = *(feedPtr);
tinyxml2::XMLElement* xitem;
xitem = xchan->FirstChildElement("item");
int entryCount = xmlCountSiblings( xitem, "item" );
feed->entries = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i entries-\n", entryCount);
while ( xitem ) {
rssEntryParse( feedPtr, cfg, xitem );
xitem = xitem->NextSiblingElement("item");
}
}
// ----------------------------------------------------------------------------
void
atomParser ( Feed** feedPtr, Config* cfg, tinyxml2::XMLDocument* xml )
{
Feed* feed = *(feedPtr);
tinyxml2::XMLElement* xfeed = xml->FirstChildElement("feed");
atomRootParse( feedPtr, cfg, xfeed );
atomParseEntries( feedPtr, cfg, xfeed );
}
void
atomRootParse( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Feed* feed = *(feedPtr);
tinyxml2::XMLElement* xauthor = xfeed->FirstChildElement("author");
tinyxml2::XMLElement* xentry = xfeed->FirstChildElement("entry");
tinyxml2::XMLElement* xlink = xfeed->FirstChildElement("link");
tinyxml2::XMLElement* xauthlink = xauthor->FirstChildElement("link");
bool set = false;
feed->SetTitle( xfeed->FirstChildElement("title") );
feed->SetDesc( xfeed->FirstChildElement("description") );
set = feed->SetDate( xfeed->FirstChildElement("updated") );
if ( !set ) set = feed->SetDate( xfeed->FirstChildElement("published") );
if ( !set && xentry ) set = feed->SetDate( xentry->FirstChildElement("updated") );
if ( !set && xentry ) set = feed->SetDate( xentry->FirstChildElement("published") );
set = feed->SetHomeUrl( xlink->Attribute( "href" ) );
if ( !set && xauthor ) set = feed->SetHomeUrl( xauthor->FirstChildElement("uri") );
if ( !set && xauthlink ) set = feed->SetHomeUrl( xauthlink->Attribute( "href" ) );
if ( cfg->verbose )
printf("Channel '%s' at '%s':\n", feed->title.String(), feed->homeUrl.String());
}
void
atomEntryParse ( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xentry )
{
Feed* feed = *(feedPtr);
Entry* newEntry= (Entry*)malloc( sizeof(Entry) );
newEntry = new Entry( feed->outputDir );
tinyxml2::XMLElement* xcontent = xentry->FirstChildElement("content");
tinyxml2::XMLElement* xmedia = xentry->FirstChildElement("media:group");
tinyxml2::XMLPrinter xprinter;
newEntry->SetTitle( xentry->FirstChildElement("title") );
newEntry->SetPostUrl( xentry->FirstChildElement("link")->Attribute("href") );
bool set = false;
set = newEntry->SetDesc( xentry->FirstChildElement("summary") );
if ( !set ) set = newEntry->SetDesc( xentry->FirstChildElement("description"));
if ( !set && xmedia ) set = newEntry->SetDesc( xmedia->FirstChildElement("media:description"));
set = newEntry->SetDate( xentry->FirstChildElement("updated") );
if ( !set ) set = newEntry->SetDate( xentry->FirstChildElement("published") );
if ( xcontent ) {
xcontent->Accept( &xprinter );
newEntry->SetContent( xprinter.CStr() );
}
if ( cfg->verbose )
printf("\t%s\n", newEntry->title.String());
if ( withinDateRange( cfg->minDate, newEntry->date, cfg->maxDate ) )
feed->entries.AddItem( newEntry );
}
void
atomParseEntries ( Feed** feedPtr, Config* cfg, tinyxml2::XMLElement* xfeed )
{
Feed* feed = *(feedPtr);
tinyxml2::XMLElement* xentry;
xentry = xfeed->FirstChildElement("entry");
int entryCount = xmlCountSiblings( xentry, "entry" );
feed->entries = BList(entryCount);
if ( cfg->verbose )
printf("\t-%i entries-\n", entryCount);
while ( xentry ) {
atomEntryParse( feedPtr, cfg, xentry );
xentry = xentry->NextSiblingElement("entry");
}
}
// ----------------------------------------------------------------------------
int
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
{
int count = 0;
while ( xsibling ) {
count++;
xsibling = xsibling->NextSiblingElement(sibling_name);
}
return count;
}

View File

@ -1,19 +0,0 @@
#ifndef PARSE_H
#define PARSE_H
#include <tinyxml2.h>
#include "Config.h"
#include "Feed.h"
void feedParser ( Feed**, Config* );
void rssParser ( Feed**, Config*, tinyxml2::XMLDocument* );
void rssRootParse ( Feed**, Config*, tinyxml2::XMLElement* );
void rssEntryParse ( Feed**, Config*, tinyxml2::XMLElement* );
void rssParseEntries ( Feed**, Config*, tinyxml2::XMLElement* );
void atomParser ( Feed**, Config*, tinyxml2::XMLDocument* );
void atomRootParse ( Feed**, Config*, tinyxml2::XMLElement* );
void atomEntryParse ( Feed**, Config*, tinyxml2::XMLElement* );
void atomParseEntries ( Feed**, Config*, tinyxml2::XMLElement* );
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
#endif