Start replacing raptor with tinyxml (finally\!)
This commit is contained in:
parent
52908aef68
commit
39d5842e7c
2
Makefile
2
Makefile
|
@ -68,7 +68,7 @@ RSRCS = \
|
||||||
# - if your library does not follow the standard library naming scheme,
|
# - if your library does not follow the standard library naming scheme,
|
||||||
# you need to specify the path to the library and it's name.
|
# you need to specify the path to the library and it's name.
|
||||||
# (e.g. for mylib.a, specify "mylib.a" or "path/mylib.a")
|
# (e.g. for mylib.a, specify "mylib.a" or "path/mylib.a")
|
||||||
LIBS = be tracker shared raptor2 bnetapi network $(STDCPPLIBS)
|
LIBS = be tracker shared tinyxml2 bnetapi network $(STDCPPLIBS)
|
||||||
|
|
||||||
# Specify additional paths to directories following the standard libXXX.so
|
# Specify additional paths to directories following the standard libXXX.so
|
||||||
# or libXXX.a naming scheme. You can specify full paths or paths relative
|
# or libXXX.a naming scheme. You can specify full paths or paths relative
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <raptor2/raptor2.h>
|
#include <tinyxml2.h>
|
||||||
#include "Channel.h"
|
#include "Channel.h"
|
||||||
#include "Item.h"
|
#include "Item.h"
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
|
@ -12,6 +12,7 @@ Channel::Channel ( BString path, BString outputPath )
|
||||||
homePage = BString("");
|
homePage = BString("");
|
||||||
xmlUrl = BString("");
|
xmlUrl = BString("");
|
||||||
filePath = path;
|
filePath = path;
|
||||||
|
lastDate = BString("");
|
||||||
topLevelSubject = "";
|
topLevelSubject = "";
|
||||||
lastSubject = "";
|
lastSubject = "";
|
||||||
outputDir = outputPath;
|
outputDir = outputPath;
|
||||||
|
@ -20,8 +21,36 @@ Channel::Channel ( BString path, BString outputPath )
|
||||||
void
|
void
|
||||||
Channel::Parse ( Config* cfg )
|
Channel::Parse ( Config* cfg )
|
||||||
{
|
{
|
||||||
int itemCount = countItemParser( filePath.String() );
|
items = BList();
|
||||||
items = BList(itemCount);
|
|
||||||
Channel* chan = this;
|
Channel* chan = this;
|
||||||
feedParser(&chan);
|
feedParser(&chan, cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Channel::SetTitle ( const char* titleStr ) {
|
||||||
|
if ( titleStr != NULL ) title = BString( titleStr );
|
||||||
|
}
|
||||||
|
void Channel::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetTitle( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Channel::SetDesc ( const char* descStr ) {
|
||||||
|
if ( descStr != NULL ) description = BString( descStr );
|
||||||
|
}
|
||||||
|
void Channel::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetDesc( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Channel::SetHomePage ( const char* homepageStr ) {
|
||||||
|
if ( homepageStr != NULL )
|
||||||
|
homePage = BString( homepageStr );
|
||||||
|
}
|
||||||
|
void Channel::SetHomePage ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetHomePage( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Channel::SetLastDate ( const char* dateStr ) {
|
||||||
|
if ( dateStr != NULL ) lastDate = BString( dateStr );
|
||||||
|
}
|
||||||
|
void Channel::SetLastDate ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetLastDate( elem->GetText() );
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#ifndef CHANNEL_H
|
#ifndef CHANNEL_H
|
||||||
#define CHANNEL_H
|
#define CHANNEL_H
|
||||||
|
|
||||||
|
#include <tinyxml2.h>
|
||||||
#include <DateTime.h>
|
#include <DateTime.h>
|
||||||
#include <String.h>
|
#include <String.h>
|
||||||
#include <List.h>
|
#include <List.h>
|
||||||
|
@ -12,7 +13,7 @@ public:
|
||||||
char lang[3];
|
char lang[3];
|
||||||
BString title;
|
BString title;
|
||||||
BString description;
|
BString description;
|
||||||
BDate lastBuildDate;
|
BString lastDate;
|
||||||
BString homePage;
|
BString homePage;
|
||||||
BString xmlUrl;
|
BString xmlUrl;
|
||||||
BList items;
|
BList items;
|
||||||
|
@ -27,6 +28,15 @@ public:
|
||||||
// Channel ( BEntry );
|
// Channel ( BEntry );
|
||||||
// Channel ( BUrl );
|
// Channel ( BUrl );
|
||||||
void Parse ( Config* );
|
void Parse ( Config* );
|
||||||
|
|
||||||
|
void SetTitle ( const char* );
|
||||||
|
void SetTitle ( tinyxml2::XMLElement* );
|
||||||
|
void SetDesc ( const char* );
|
||||||
|
void SetDesc ( tinyxml2::XMLElement* );
|
||||||
|
void SetLastDate ( const char* );
|
||||||
|
void SetLastDate ( tinyxml2::XMLElement* );
|
||||||
|
void SetHomePage ( const char* );
|
||||||
|
void SetHomePage ( tinyxml2::XMLElement* );
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#ifndef CONFIG_H
|
#ifndef CONFIG_H
|
||||||
#define CONFIG_H
|
#define CONFIG_H
|
||||||
|
|
||||||
|
#include <String.h>
|
||||||
#include <StorageKit.h>
|
#include <StorageKit.h>
|
||||||
|
|
||||||
class Config {
|
class Config {
|
||||||
|
|
66
src/Item.cpp
66
src/Item.cpp
|
@ -1,30 +1,30 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <raptor2/raptor2.h>
|
#include <tinyxml2.h>
|
||||||
#include <StorageKit.h>
|
#include <StorageKit.h>
|
||||||
|
#include "Config.h"
|
||||||
#include "Item.h"
|
#include "Item.h"
|
||||||
|
|
||||||
Item::Item ( BString localSubject, BString outputPath )
|
Item::Item ( BString outputPath )
|
||||||
{
|
{
|
||||||
subject = localSubject;
|
|
||||||
title = BString("");
|
title = BString("");
|
||||||
description = BString("");
|
description = BString("");
|
||||||
homePage = BString("");
|
homePage = BString("");
|
||||||
postUrl = BString("");
|
postUrl = BString("");
|
||||||
content = "";
|
content = BString("");
|
||||||
pubDate = BString("");
|
pubDate = BString("");
|
||||||
outputDir = outputPath;
|
outputDir = outputPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
Item::Filetize ( bool onlyIfNew = false )
|
Item::Filetize ( Config* cfg, bool onlyIfNew = false )
|
||||||
{
|
{
|
||||||
BDirectory* dir = new BDirectory( outputDir );
|
BDirectory* dir = new BDirectory( outputDir );
|
||||||
BFile* file = new BFile( title.String(), B_READ_WRITE );
|
BFile* file = new BFile( title.String(), B_READ_WRITE );
|
||||||
|
|
||||||
dir->CreateFile( title.String(), file );
|
dir->CreateFile( title.String(), file );
|
||||||
|
|
||||||
BString betype = "text/html";
|
BString betype = cfg->mimetype;
|
||||||
|
|
||||||
file->WriteAttr( "META:title", B_STRING_TYPE, 0,
|
file->WriteAttr( "META:title", B_STRING_TYPE, 0,
|
||||||
title.String(), title.CountChars() );
|
title.String(), title.CountChars() );
|
||||||
|
@ -37,15 +37,53 @@ Item::Filetize ( bool onlyIfNew = false )
|
||||||
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
|
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
|
||||||
betype.String(), betype.CountChars() );
|
betype.String(), betype.CountChars() );
|
||||||
|
|
||||||
|
file->Write(content.String(), content.Length());
|
||||||
// using file->Write with content converted to C string messes up length ofc
|
// using file->Write with content converted to C string messes up length ofc
|
||||||
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
|
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
|
||||||
const char* strPath = outputDir.String();
|
// const char* strPath = outputDir.String();
|
||||||
std::string path(strPath);
|
// std::string path(strPath);
|
||||||
path += std::string(title.String());
|
// path += std::string(title.String());
|
||||||
std::cout << path << std::endl;
|
// std::cout << path << std::endl;
|
||||||
|
//
|
||||||
std::ofstream pFile(path);
|
// std::ofstream pFile(path);
|
||||||
pFile << content;
|
// pFile << content;
|
||||||
pFile.close();
|
// pFile.close();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Item::SetTitle ( const char* titleStr ) {
|
||||||
|
if ( titleStr != NULL ) title = BString( titleStr );
|
||||||
|
}
|
||||||
|
void Item::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetTitle( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Item::SetDesc ( const char* descStr ) {
|
||||||
|
if ( descStr != NULL ) description = BString( descStr );
|
||||||
|
}
|
||||||
|
void Item::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetDesc( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Item::SetContent ( const char* contentStr ) {
|
||||||
|
if ( contentStr != NULL ) content = BString( contentStr );
|
||||||
|
}
|
||||||
|
void Item::SetContent ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetContent( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Item::SetPostUrl ( const char* urlStr ) {
|
||||||
|
if ( urlStr != NULL )
|
||||||
|
postUrl = BString( urlStr );
|
||||||
|
}
|
||||||
|
void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetPostUrl( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
||||||
|
void Item::SetPubDate ( const char* dateStr ) {
|
||||||
|
if ( dateStr != NULL )
|
||||||
|
pubDate = BString( dateStr );
|
||||||
|
}
|
||||||
|
void Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
|
||||||
|
if ( elem != NULL ) SetPubDate( elem->GetText() );
|
||||||
|
}
|
||||||
|
|
19
src/Item.h
19
src/Item.h
|
@ -14,14 +14,25 @@ public:
|
||||||
BString pubDate;
|
BString pubDate;
|
||||||
BString homePage;
|
BString homePage;
|
||||||
BString postUrl;
|
BString postUrl;
|
||||||
std::string content;
|
BString content;
|
||||||
BString outputDir;
|
BString outputDir;
|
||||||
|
|
||||||
BString subject;
|
Item ( BString );
|
||||||
|
|
||||||
Item ( BString, BString );
|
bool Filetize ( Config*, bool );
|
||||||
|
|
||||||
bool Filetize ( bool );
|
void SetTitle ( const char* );
|
||||||
|
void SetTitle ( tinyxml2::XMLElement* );
|
||||||
|
void SetDesc ( const char* );
|
||||||
|
void SetDesc ( tinyxml2::XMLElement* );
|
||||||
|
void SetContent ( const char* );
|
||||||
|
void SetContent ( tinyxml2::XMLElement* );
|
||||||
|
void SetPostUrl ( const char* );
|
||||||
|
void SetPostUrl ( tinyxml2::XMLElement* );
|
||||||
|
void SetPubDate ( const char* );
|
||||||
|
void SetPubDate ( tinyxml2::XMLElement* );
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#include <raptor2/raptor2.h>
|
|
||||||
#include <StorageKit.h>
|
#include <StorageKit.h>
|
||||||
#include <String.h>
|
#include <String.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
@ -8,6 +7,8 @@
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "Rifen.h"
|
#include "Rifen.h"
|
||||||
|
|
||||||
|
Config* main_cfg;
|
||||||
|
|
||||||
int
|
int
|
||||||
usage ()
|
usage ()
|
||||||
{
|
{
|
||||||
|
@ -18,9 +19,8 @@ usage ()
|
||||||
bool
|
bool
|
||||||
create_item ( void* item )
|
create_item ( void* item )
|
||||||
{
|
{
|
||||||
printf("hi");
|
|
||||||
Item* itemPtr = (Item*)item;
|
Item* itemPtr = (Item*)item;
|
||||||
itemPtr->Filetize( false );
|
itemPtr->Filetize( main_cfg, false );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,16 +78,14 @@ invocation ( int argc, char** argv, Config** cfgPtr )
|
||||||
int
|
int
|
||||||
main ( int argc, char** argv )
|
main ( int argc, char** argv )
|
||||||
{
|
{
|
||||||
//
|
main_cfg = new Config;
|
||||||
//
|
|
||||||
Config* cfg = new Config;
|
|
||||||
usageMsg.ReplaceAll("%app%", "Rifen");
|
usageMsg.ReplaceAll("%app%", "Rifen");
|
||||||
|
|
||||||
invocation( argc, argv, &cfg );
|
invocation( argc, argv, &main_cfg );
|
||||||
|
|
||||||
Channel* chan = (Channel*)malloc( sizeof(Channel) );
|
Channel* chan = (Channel*)malloc( sizeof(Channel) );
|
||||||
chan = new Channel(cfg->targetFeed, cfg->outDir);
|
chan = new Channel(main_cfg->targetFeed, main_cfg->outDir);
|
||||||
chan->Parse(cfg);
|
chan->Parse(main_cfg);
|
||||||
|
|
||||||
BList items = chan->items;
|
BList items = chan->items;
|
||||||
items.DoForEach(&create_item);
|
items.DoForEach(&create_item);
|
||||||
|
|
|
@ -37,5 +37,12 @@ BString usageMsg =
|
||||||
"Both -t and -T use the ISO 8601 format for specifying datetimes:\n"
|
"Both -t and -T use the ISO 8601 format for specifying datetimes:\n"
|
||||||
" YYYY-MM-DDTHH:MM:SS - 2020-01-01T07:07:07\n"
|
" YYYY-MM-DDTHH:MM:SS - 2020-01-01T07:07:07\n"
|
||||||
"You can leave out seconds, minutes, or hours, but YMD are required.\n"
|
"You can leave out seconds, minutes, or hours, but YMD are required.\n"
|
||||||
|
"\n"
|
||||||
|
"NOTE: This message doesn't reflect reality. This is more of a spec of\n"
|
||||||
|
" what I hope this program will be. As of now -t and -T aren't\n"
|
||||||
|
" implemented, and running %app% without a file/url free-argument\n"
|
||||||
|
" is invalid, as the daemon isn't implemented at all. As such,\n"
|
||||||
|
" -D is also non-functional.\n"
|
||||||
|
" But it sure can turn an XML feed into files! Lol.\n"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
332
src/parsing.cpp
332
src/parsing.cpp
|
@ -1,6 +1,6 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <raptor2/raptor2.h>
|
#include <tinyxml2.h>
|
||||||
#include "Channel.h"
|
#include "Channel.h"
|
||||||
#include "Item.h"
|
#include "Item.h"
|
||||||
#include "parsing.h"
|
#include "parsing.h"
|
||||||
|
@ -9,300 +9,92 @@
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// PARSERS
|
// PARSERS
|
||||||
void
|
void
|
||||||
feedParser ( Channel** chanPtr )
|
feedParser ( Channel** chanPtr, Config* cfg )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
raptor_parser* rss_parser = NULL;
|
|
||||||
raptor_world* world;
|
|
||||||
world = raptor_new_world();
|
|
||||||
|
|
||||||
unsigned char *uri_string;
|
tinyxml2::XMLDocument xml;
|
||||||
raptor_uri *uri, *base_uri;
|
xml.LoadFile( chan->filePath.String() );
|
||||||
|
|
||||||
rss_parser = raptor_new_parser( world, "rss-tag-soup" );
|
if ( xml.FirstChildElement("rss") )
|
||||||
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() );
|
rssParser( chanPtr, cfg, &xml );
|
||||||
uri = raptor_new_uri( world, uri_string );
|
else if ( xml.FirstChildElement("feed") )
|
||||||
base_uri = raptor_uri_copy( uri );
|
printf("has atom\n");
|
||||||
|
|
||||||
raptor_parser_set_statement_handler( rss_parser, &chan, feedHandler );
|
|
||||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
|
||||||
|
|
||||||
raptor_free_parser( rss_parser );
|
|
||||||
raptor_free_uri( base_uri );
|
|
||||||
raptor_free_uri( uri );
|
|
||||||
raptor_free_memory( uri_string );
|
|
||||||
raptor_free_world( world );
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------
|
|
||||||
int
|
|
||||||
countItemParser ( const char* filePath )
|
|
||||||
{
|
|
||||||
raptor_parser* rss_parser = NULL;
|
|
||||||
raptor_world* world;
|
|
||||||
world = raptor_new_world();
|
|
||||||
|
|
||||||
unsigned char *uri_string;
|
|
||||||
raptor_uri *uri, *base_uri;
|
|
||||||
|
|
||||||
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
|
||||||
uri_string = raptor_uri_filename_to_uri_string( filePath );
|
|
||||||
uri = raptor_new_uri( world, uri_string );
|
|
||||||
base_uri = raptor_uri_copy( uri );
|
|
||||||
|
|
||||||
int* itemCount = (int*)malloc( sizeof(int) );
|
|
||||||
*itemCount = 0;
|
|
||||||
raptor_parser_set_statement_handler( rss_parser, &itemCount, countItemHandler );
|
|
||||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
|
||||||
|
|
||||||
free( itemCount );
|
|
||||||
raptor_free_parser( rss_parser );
|
|
||||||
raptor_free_uri( base_uri );
|
|
||||||
raptor_free_uri( uri );
|
|
||||||
raptor_free_memory( uri_string );
|
|
||||||
raptor_free_world( world );
|
|
||||||
|
|
||||||
return *(itemCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
printStatementParser ( const char* filePath )
|
rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
|
||||||
{
|
|
||||||
raptor_parser* rss_parser = NULL;
|
|
||||||
raptor_world* world;
|
|
||||||
world = raptor_new_world();
|
|
||||||
|
|
||||||
unsigned char *uri_string;
|
|
||||||
raptor_uri *uri, *base_uri;
|
|
||||||
|
|
||||||
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
|
||||||
uri_string = raptor_uri_filename_to_uri_string( filePath );
|
|
||||||
uri = raptor_new_uri( world, uri_string );
|
|
||||||
base_uri = raptor_uri_copy( uri );
|
|
||||||
|
|
||||||
raptor_parser_set_statement_handler( rss_parser, NULL, printStatementHandler );
|
|
||||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
|
||||||
|
|
||||||
raptor_free_parser( rss_parser );
|
|
||||||
raptor_free_uri( base_uri );
|
|
||||||
raptor_free_uri( uri );
|
|
||||||
raptor_free_memory( uri_string );
|
|
||||||
raptor_free_world( world );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// HANDLERS
|
|
||||||
void
|
|
||||||
feedHandler ( void* user_data, raptor_statement* statement )
|
|
||||||
{
|
|
||||||
if ( user_data != NULL ) {
|
|
||||||
Channel** chanPtr = (Channel**)user_data;
|
|
||||||
handleFeedStatement( chanPtr, statement );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
countItemHandler ( void* user_data, raptor_statement* statement )
|
|
||||||
{
|
|
||||||
int** countPtr = ( int** )user_data;
|
|
||||||
int* count = *(countPtr);
|
|
||||||
|
|
||||||
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
|
||||||
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
|
||||||
|
|
||||||
if (getPredicateTag(predicate) == "type"
|
|
||||||
&& getPredicateTag(object) == "item")
|
|
||||||
*count += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
printStatementHandler ( void* user_data, raptor_statement* statement )
|
|
||||||
{
|
|
||||||
int** countPtr = (int**)user_data;
|
|
||||||
int* count = *(countPtr);
|
|
||||||
|
|
||||||
const char* subject = ( const char* )raptor_term_to_string( statement->subject );
|
|
||||||
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
|
||||||
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
|
||||||
|
|
||||||
printf("%s\t-%s\n%.50s\n", subject, predicate, object);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
|
||||||
// FEEDHANDLER HELPERS
|
|
||||||
void
|
|
||||||
handleFeedStatement ( Channel** chanPtr, raptor_statement* statement )
|
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
const char* cpredicate = (const char*)raptor_term_to_string( statement->predicate );
|
|
||||||
const char* csubject = (const char*)raptor_term_to_string( statement->subject );
|
|
||||||
const char* cobject = (const char*)raptor_term_to_string( statement->object );
|
|
||||||
|
|
||||||
BString predicate = BString(cpredicate);
|
tinyxml2::XMLElement* xchan = xml->FirstChildElement("rss")->FirstChildElement("channel");
|
||||||
BString subject = BString(csubject);
|
|
||||||
BString bobject = BString(cobject);
|
|
||||||
|
|
||||||
bobject.ReplaceAll("\\\"","\"");
|
rssRootParse( chanPtr, cfg, xchan );
|
||||||
bobject.ReplaceFirst("\"","");
|
rssParseItems( chanPtr, cfg, xchan );
|
||||||
bobject.ReplaceLast("\"","");
|
|
||||||
|
|
||||||
std::string object = unescape(bobject.String());
|
|
||||||
|
|
||||||
predicate = getPredicateTag( predicate );
|
|
||||||
|
|
||||||
if ( predicate == "type" && getPredicateTag( object ) == "channel" )
|
|
||||||
chan->topLevelSubject = subject;
|
|
||||||
|
|
||||||
if ( subject != chan->topLevelSubject )
|
|
||||||
// handleChannelStatement( chanPtr, predicate, object );
|
|
||||||
// else
|
|
||||||
handleItemStatement( chanPtr, subject, predicate, object );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
handleChannelStatement ( Channel** chanPtr, BString predicate, BString object )
|
rssRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
|
|
||||||
|
chan->SetTitle( xchan->FirstChildElement("title") );
|
||||||
|
chan->SetDesc( xchan->FirstChildElement("description") );
|
||||||
|
chan->SetHomePage( xchan->FirstChildElement("link") );
|
||||||
|
chan->SetLastDate( xchan->FirstChildElement("lastBuildDate") );
|
||||||
|
|
||||||
|
if ( cfg->verbose )
|
||||||
|
printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String());
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
handleItemStatement ( Channel** chanPtr, BString subject, BString predicate, std::string object )
|
rssItemParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xitem )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
if ( subject.StartsWith("_:genid") )
|
|
||||||
return;
|
|
||||||
|
|
||||||
chan->title = BString("dad");
|
|
||||||
|
|
||||||
if ( subject != chan->lastSubject ) {
|
|
||||||
chan->lastSubject = subject;
|
|
||||||
|
|
||||||
Item* newItem = (Item*)malloc( sizeof(Item) );
|
Item* newItem = (Item*)malloc( sizeof(Item) );
|
||||||
newItem = new Item( subject, chan->outputDir );
|
newItem = new Item( chan->outputDir );
|
||||||
|
|
||||||
|
newItem->SetTitle( xitem->FirstChildElement("title") );
|
||||||
|
newItem->SetDesc( xitem->FirstChildElement("description") );
|
||||||
|
newItem->SetPubDate( xitem->FirstChildElement("pubDate") );
|
||||||
|
newItem->SetContent( xitem->FirstChildElement("content:encoded") );
|
||||||
|
|
||||||
|
if (cfg->verbose )
|
||||||
|
printf("\t%s\n", newItem->title.String());
|
||||||
|
|
||||||
chan->items.AddItem( newItem );
|
chan->items.AddItem( newItem );
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
|
||||||
|
{
|
||||||
|
Channel* chan = *(chanPtr);
|
||||||
|
tinyxml2::XMLElement* xitem;
|
||||||
|
|
||||||
|
xitem = xchan->FirstChildElement("item");
|
||||||
|
|
||||||
|
int itemCount = xmlCountSiblings( xitem, "item" );
|
||||||
|
chan->items = BList(itemCount);
|
||||||
|
|
||||||
|
if ( cfg->verbose )
|
||||||
|
printf("\t-%i items-\n", itemCount);
|
||||||
|
|
||||||
|
while ( xitem ) {
|
||||||
|
rssItemParse( chanPtr, cfg, xitem );
|
||||||
|
xitem = xitem->NextSiblingElement("item");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
while ( xsibling ) {
|
||||||
|
count++;
|
||||||
|
xsibling = xsibling->NextSiblingElement(sibling_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
Item* nowItem = (Item*)chan->items.LastItem();
|
return count;
|
||||||
|
|
||||||
if ( predicate == "title" )
|
|
||||||
nowItem->title = BString(object.c_str());
|
|
||||||
if ( predicate == "encoded" || predicate == "Atomcontent" )
|
|
||||||
nowItem->content = object;
|
|
||||||
if ( predicate == "description" )
|
|
||||||
nowItem->description = BString(object.c_str());
|
|
||||||
if ( predicate == "link" || predicate == "Atomlink" )
|
|
||||||
nowItem->postUrl = BString(object.c_str());
|
|
||||||
if ( predicate == "Atomhref" )
|
|
||||||
nowItem->postUrl = BString(object.c_str());
|
|
||||||
if ( predicate == "date" || predicate == "Atompublished" ) // 2019-02-18T01:43:43Z
|
|
||||||
nowItem->pubDate = BString(object.c_str());
|
|
||||||
if ( predicate == "pubDate" ) // Sun, 17 Feb 2019 19:43:43 -0600
|
|
||||||
nowItem->pubDate = BString(object.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// UTIL
|
|
||||||
BString
|
|
||||||
getPredicateTag ( BString spec )
|
|
||||||
{
|
|
||||||
int32 lastSlash = spec.FindLast( '/' );
|
|
||||||
spec.RemoveChars( 0, lastSlash + 1 );
|
|
||||||
int32 lastHash = spec.FindLast( '#' );
|
|
||||||
spec.RemoveChars( 0, lastHash + 1 );
|
|
||||||
spec.RemoveLast( ">" );
|
|
||||||
|
|
||||||
return spec;
|
|
||||||
}
|
|
||||||
BString
|
|
||||||
getPredicateTag ( const char* spec )
|
|
||||||
{
|
|
||||||
return getPredicateTag( BString(spec) );
|
|
||||||
}
|
|
||||||
BString
|
|
||||||
getPredicateTag ( std::string spec )
|
|
||||||
{
|
|
||||||
return getPredicateTag( spec.c_str() );
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/* What ensues is a terrifying violation of the human form.
|
|
||||||
* Just atrotious. I deserve to be impaled by by an ice-pick.
|
|
||||||
* ... something (unfortunately), directly ripped from StackOverflow.
|
|
||||||
* So when getting a raptor_statement's object, it's a char array filled
|
|
||||||
* with escaped characters (\U2901, etc).
|
|
||||||
* I'm really not sure how to best manage this, so SO.
|
|
||||||
* Thanks remy-lebeau, I owe you.
|
|
||||||
* https://stackoverflow.com/questions/28534221 */
|
|
||||||
std::string
|
|
||||||
toUtf8 ( uint32_t cp )
|
|
||||||
{
|
|
||||||
std::string result;
|
|
||||||
|
|
||||||
int count;
|
|
||||||
if (cp <= 0x007F)
|
|
||||||
count = 1;
|
|
||||||
else if (cp <= 0x07FF)
|
|
||||||
count = 2;
|
|
||||||
else if (cp <= 0xFFFF)
|
|
||||||
count = 3;
|
|
||||||
else if (cp <= 0x10FFFF)
|
|
||||||
count = 4;
|
|
||||||
else
|
|
||||||
return result; // or throw an exception
|
|
||||||
|
|
||||||
result.resize(count);
|
|
||||||
|
|
||||||
if (count > 1) {
|
|
||||||
for (int i = count-1; i > 0; --i) {
|
|
||||||
result[i] = (char) (0x80 | (cp & 0x3F));
|
|
||||||
cp >>= 6;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i)
|
|
||||||
cp |= (1 << (7-i));
|
|
||||||
}
|
|
||||||
|
|
||||||
result[0] = (char) cp;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
unescape ( std::string str, std::string escape )
|
|
||||||
{
|
|
||||||
std::string::size_type startIdx = 0;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
startIdx = str.find(escape, startIdx);
|
|
||||||
if (startIdx == std::string::npos) break;
|
|
||||||
|
|
||||||
std::string::size_type endIdx = str.find_first_not_of("0123456789abcdefABCDEF",
|
|
||||||
startIdx+2);
|
|
||||||
if (endIdx == std::string::npos) break;
|
|
||||||
|
|
||||||
std::string tmpStr = str.substr(startIdx+2, endIdx-(startIdx+2));
|
|
||||||
std::istringstream iss(tmpStr);
|
|
||||||
|
|
||||||
uint32_t cp;
|
|
||||||
if (iss >> std::hex >> cp)
|
|
||||||
{
|
|
||||||
std::string utf8 = toUtf8(cp);
|
|
||||||
str.replace(startIdx, 2+tmpStr.length(), utf8);
|
|
||||||
startIdx += utf8.length();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
startIdx += 2;
|
|
||||||
}
|
|
||||||
while (true);
|
|
||||||
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
unescape (const char* str )
|
|
||||||
{
|
|
||||||
return unescape(std::string( unescape(std::string(str), "\\u") ), "\\U");
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,28 +1,15 @@
|
||||||
#ifndef PARSE_H
|
#ifndef PARSE_H
|
||||||
#define PARSE_H
|
#define PARSE_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <tinyxml2.h>
|
||||||
#include <raptor2/raptor2.h>
|
#include "Config.h"
|
||||||
#include "Channel.h"
|
#include "Channel.h"
|
||||||
|
|
||||||
|
void feedParser ( Channel**, Config* );
|
||||||
void feedParser (Channel**);
|
void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* );
|
||||||
void feedHandler ( void*, raptor_statement* );
|
void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||||
void handleFeedStatement ( Channel**, raptor_statement* );
|
void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||||
void handleChannelStatement ( Channel**, BString, BString );
|
void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||||
void handleItemStatement ( Channel**, BString, BString, std::string );
|
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
|
||||||
|
|
||||||
int countItemParser ( const char* );
|
|
||||||
void countItemHandler ( void*, raptor_statement* );
|
|
||||||
|
|
||||||
void printStatementParser ( const char* );
|
|
||||||
void printStatementHandler ( void*, raptor_statement* );
|
|
||||||
|
|
||||||
BString getPredicateTag ( const char* );
|
|
||||||
BString getPredicateTag ( BString );
|
|
||||||
BString getPredicateTag ( std::string );
|
|
||||||
std::string to_utf ( uint32 );
|
|
||||||
std::string unescape ( std::string, std::string );
|
|
||||||
std::string unescape ( const char* );
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Ŝarĝante…
Reference in New Issue