Start replacing raptor with tinyxml (finally\!)
This commit is contained in:
parent
52908aef68
commit
39d5842e7c
2
Makefile
2
Makefile
|
@ -68,7 +68,7 @@ RSRCS = \
|
|||
# - if your library does not follow the standard library naming scheme,
|
||||
# you need to specify the path to the library and it's name.
|
||||
# (e.g. for mylib.a, specify "mylib.a" or "path/mylib.a")
|
||||
LIBS = be tracker shared raptor2 bnetapi network $(STDCPPLIBS)
|
||||
LIBS = be tracker shared tinyxml2 bnetapi network $(STDCPPLIBS)
|
||||
|
||||
# Specify additional paths to directories following the standard libXXX.so
|
||||
# or libXXX.a naming scheme. You can specify full paths or paths relative
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <cstdio>
|
||||
#include <raptor2/raptor2.h>
|
||||
#include <tinyxml2.h>
|
||||
#include "Channel.h"
|
||||
#include "Item.h"
|
||||
#include "Config.h"
|
||||
|
@ -12,6 +12,7 @@ Channel::Channel ( BString path, BString outputPath )
|
|||
homePage = BString("");
|
||||
xmlUrl = BString("");
|
||||
filePath = path;
|
||||
lastDate = BString("");
|
||||
topLevelSubject = "";
|
||||
lastSubject = "";
|
||||
outputDir = outputPath;
|
||||
|
@ -20,8 +21,36 @@ Channel::Channel ( BString path, BString outputPath )
|
|||
void
|
||||
Channel::Parse ( Config* cfg )
|
||||
{
|
||||
int itemCount = countItemParser( filePath.String() );
|
||||
items = BList(itemCount);
|
||||
items = BList();
|
||||
Channel* chan = this;
|
||||
feedParser(&chan);
|
||||
feedParser(&chan, cfg);
|
||||
}
|
||||
|
||||
void Channel::SetTitle ( const char* titleStr ) {
|
||||
if ( titleStr != NULL ) title = BString( titleStr );
|
||||
}
|
||||
void Channel::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetTitle( elem->GetText() );
|
||||
}
|
||||
|
||||
void Channel::SetDesc ( const char* descStr ) {
|
||||
if ( descStr != NULL ) description = BString( descStr );
|
||||
}
|
||||
void Channel::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetDesc( elem->GetText() );
|
||||
}
|
||||
|
||||
void Channel::SetHomePage ( const char* homepageStr ) {
|
||||
if ( homepageStr != NULL )
|
||||
homePage = BString( homepageStr );
|
||||
}
|
||||
void Channel::SetHomePage ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetHomePage( elem->GetText() );
|
||||
}
|
||||
|
||||
void Channel::SetLastDate ( const char* dateStr ) {
|
||||
if ( dateStr != NULL ) lastDate = BString( dateStr );
|
||||
}
|
||||
void Channel::SetLastDate ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetLastDate( elem->GetText() );
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef CHANNEL_H
|
||||
#define CHANNEL_H
|
||||
|
||||
#include <tinyxml2.h>
|
||||
#include <DateTime.h>
|
||||
#include <String.h>
|
||||
#include <List.h>
|
||||
|
@ -12,7 +13,7 @@ public:
|
|||
char lang[3];
|
||||
BString title;
|
||||
BString description;
|
||||
BDate lastBuildDate;
|
||||
BString lastDate;
|
||||
BString homePage;
|
||||
BString xmlUrl;
|
||||
BList items;
|
||||
|
@ -27,6 +28,15 @@ public:
|
|||
// Channel ( BEntry );
|
||||
// Channel ( BUrl );
|
||||
void Parse ( Config* );
|
||||
|
||||
void SetTitle ( const char* );
|
||||
void SetTitle ( tinyxml2::XMLElement* );
|
||||
void SetDesc ( const char* );
|
||||
void SetDesc ( tinyxml2::XMLElement* );
|
||||
void SetLastDate ( const char* );
|
||||
void SetLastDate ( tinyxml2::XMLElement* );
|
||||
void SetHomePage ( const char* );
|
||||
void SetHomePage ( tinyxml2::XMLElement* );
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#include <String.h>
|
||||
#include <StorageKit.h>
|
||||
|
||||
class Config {
|
||||
|
|
66
src/Item.cpp
66
src/Item.cpp
|
@ -1,30 +1,30 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <raptor2/raptor2.h>
|
||||
#include <tinyxml2.h>
|
||||
#include <StorageKit.h>
|
||||
#include "Config.h"
|
||||
#include "Item.h"
|
||||
|
||||
Item::Item ( BString localSubject, BString outputPath )
|
||||
Item::Item ( BString outputPath )
|
||||
{
|
||||
subject = localSubject;
|
||||
title = BString("");
|
||||
description = BString("");
|
||||
homePage = BString("");
|
||||
postUrl = BString("");
|
||||
content = "";
|
||||
content = BString("");
|
||||
pubDate = BString("");
|
||||
outputDir = outputPath;
|
||||
}
|
||||
|
||||
bool
|
||||
Item::Filetize ( bool onlyIfNew = false )
|
||||
Item::Filetize ( Config* cfg, bool onlyIfNew = false )
|
||||
{
|
||||
BDirectory* dir = new BDirectory( outputDir );
|
||||
BFile* file = new BFile( title.String(), B_READ_WRITE );
|
||||
|
||||
dir->CreateFile( title.String(), file );
|
||||
|
||||
BString betype = "text/html";
|
||||
BString betype = cfg->mimetype;
|
||||
|
||||
file->WriteAttr( "META:title", B_STRING_TYPE, 0,
|
||||
title.String(), title.CountChars() );
|
||||
|
@ -37,15 +37,53 @@ Item::Filetize ( bool onlyIfNew = false )
|
|||
file->WriteAttr( "BEOS:TYPE", B_STRING_TYPE, 0,
|
||||
betype.String(), betype.CountChars() );
|
||||
|
||||
file->Write(content.String(), content.Length());
|
||||
// using file->Write with content converted to C string messes up length ofc
|
||||
// this is required to preserve length (because of UTF char substitutions in parsing.cpp)
|
||||
const char* strPath = outputDir.String();
|
||||
std::string path(strPath);
|
||||
path += std::string(title.String());
|
||||
std::cout << path << std::endl;
|
||||
|
||||
std::ofstream pFile(path);
|
||||
pFile << content;
|
||||
pFile.close();
|
||||
// const char* strPath = outputDir.String();
|
||||
// std::string path(strPath);
|
||||
// path += std::string(title.String());
|
||||
// std::cout << path << std::endl;
|
||||
//
|
||||
// std::ofstream pFile(path);
|
||||
// pFile << content;
|
||||
// pFile.close();
|
||||
return false;
|
||||
}
|
||||
|
||||
void Item::SetTitle ( const char* titleStr ) {
|
||||
if ( titleStr != NULL ) title = BString( titleStr );
|
||||
}
|
||||
void Item::SetTitle ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetTitle( elem->GetText() );
|
||||
}
|
||||
|
||||
void Item::SetDesc ( const char* descStr ) {
|
||||
if ( descStr != NULL ) description = BString( descStr );
|
||||
}
|
||||
void Item::SetDesc ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetDesc( elem->GetText() );
|
||||
}
|
||||
|
||||
void Item::SetContent ( const char* contentStr ) {
|
||||
if ( contentStr != NULL ) content = BString( contentStr );
|
||||
}
|
||||
void Item::SetContent ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetContent( elem->GetText() );
|
||||
}
|
||||
|
||||
void Item::SetPostUrl ( const char* urlStr ) {
|
||||
if ( urlStr != NULL )
|
||||
postUrl = BString( urlStr );
|
||||
}
|
||||
void Item::SetPostUrl ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetPostUrl( elem->GetText() );
|
||||
}
|
||||
|
||||
void Item::SetPubDate ( const char* dateStr ) {
|
||||
if ( dateStr != NULL )
|
||||
pubDate = BString( dateStr );
|
||||
}
|
||||
void Item::SetPubDate ( tinyxml2::XMLElement* elem ) {
|
||||
if ( elem != NULL ) SetPubDate( elem->GetText() );
|
||||
}
|
||||
|
|
19
src/Item.h
19
src/Item.h
|
@ -14,14 +14,25 @@ public:
|
|||
BString pubDate;
|
||||
BString homePage;
|
||||
BString postUrl;
|
||||
std::string content;
|
||||
BString content;
|
||||
BString outputDir;
|
||||
|
||||
BString subject;
|
||||
Item ( BString );
|
||||
|
||||
Item ( BString, BString );
|
||||
bool Filetize ( Config*, bool );
|
||||
|
||||
bool Filetize ( bool );
|
||||
void SetTitle ( const char* );
|
||||
void SetTitle ( tinyxml2::XMLElement* );
|
||||
void SetDesc ( const char* );
|
||||
void SetDesc ( tinyxml2::XMLElement* );
|
||||
void SetContent ( const char* );
|
||||
void SetContent ( tinyxml2::XMLElement* );
|
||||
void SetPostUrl ( const char* );
|
||||
void SetPostUrl ( tinyxml2::XMLElement* );
|
||||
void SetPubDate ( const char* );
|
||||
void SetPubDate ( tinyxml2::XMLElement* );
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#include <raptor2/raptor2.h>
|
||||
#include <StorageKit.h>
|
||||
#include <String.h>
|
||||
#include <getopt.h>
|
||||
|
@ -8,6 +7,8 @@
|
|||
#include "Config.h"
|
||||
#include "Rifen.h"
|
||||
|
||||
Config* main_cfg;
|
||||
|
||||
int
|
||||
usage ()
|
||||
{
|
||||
|
@ -18,9 +19,8 @@ usage ()
|
|||
bool
|
||||
create_item ( void* item )
|
||||
{
|
||||
printf("hi");
|
||||
Item* itemPtr = (Item*)item;
|
||||
itemPtr->Filetize( false );
|
||||
itemPtr->Filetize( main_cfg, false );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -78,16 +78,14 @@ invocation ( int argc, char** argv, Config** cfgPtr )
|
|||
int
|
||||
main ( int argc, char** argv )
|
||||
{
|
||||
//
|
||||
//
|
||||
Config* cfg = new Config;
|
||||
main_cfg = new Config;
|
||||
usageMsg.ReplaceAll("%app%", "Rifen");
|
||||
|
||||
invocation( argc, argv, &cfg );
|
||||
invocation( argc, argv, &main_cfg );
|
||||
|
||||
Channel* chan = (Channel*)malloc( sizeof(Channel) );
|
||||
chan = new Channel(cfg->targetFeed, cfg->outDir);
|
||||
chan->Parse(cfg);
|
||||
chan = new Channel(main_cfg->targetFeed, main_cfg->outDir);
|
||||
chan->Parse(main_cfg);
|
||||
|
||||
BList items = chan->items;
|
||||
items.DoForEach(&create_item);
|
||||
|
|
|
@ -37,5 +37,12 @@ BString usageMsg =
|
|||
"Both -t and -T use the ISO 8601 format for specifying datetimes:\n"
|
||||
" YYYY-MM-DDTHH:MM:SS - 2020-01-01T07:07:07\n"
|
||||
"You can leave out seconds, minutes, or hours, but YMD are required.\n"
|
||||
"\n"
|
||||
"NOTE: This message doesn't reflect reality. This is more of a spec of\n"
|
||||
" what I hope this program will be. As of now -t and -T aren't\n"
|
||||
" implemented, and running %app% without a file/url free-argument\n"
|
||||
" is invalid, as the daemon isn't implemented at all. As such,\n"
|
||||
" -D is also non-functional.\n"
|
||||
" But it sure can turn an XML feed into files! Lol.\n"
|
||||
;
|
||||
|
||||
|
|
364
src/parsing.cpp
364
src/parsing.cpp
|
@ -1,6 +1,6 @@
|
|||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <raptor2/raptor2.h>
|
||||
#include <tinyxml2.h>
|
||||
#include "Channel.h"
|
||||
#include "Item.h"
|
||||
#include "parsing.h"
|
||||
|
@ -9,300 +9,92 @@
|
|||
// ============================================================================
|
||||
// PARSERS
|
||||
void
|
||||
feedParser ( Channel** chanPtr )
|
||||
feedParser ( Channel** chanPtr, Config* cfg )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
raptor_parser* rss_parser = NULL;
|
||||
raptor_world* world;
|
||||
world = raptor_new_world();
|
||||
|
||||
unsigned char *uri_string;
|
||||
raptor_uri *uri, *base_uri;
|
||||
tinyxml2::XMLDocument xml;
|
||||
xml.LoadFile( chan->filePath.String() );
|
||||
|
||||
rss_parser = raptor_new_parser( world, "rss-tag-soup" );
|
||||
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() );
|
||||
uri = raptor_new_uri( world, uri_string );
|
||||
base_uri = raptor_uri_copy( uri );
|
||||
|
||||
raptor_parser_set_statement_handler( rss_parser, &chan, feedHandler );
|
||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
||||
|
||||
raptor_free_parser( rss_parser );
|
||||
raptor_free_uri( base_uri );
|
||||
raptor_free_uri( uri );
|
||||
raptor_free_memory( uri_string );
|
||||
raptor_free_world( world );
|
||||
if ( xml.FirstChildElement("rss") )
|
||||
rssParser( chanPtr, cfg, &xml );
|
||||
else if ( xml.FirstChildElement("feed") )
|
||||
printf("has atom\n");
|
||||
}
|
||||
|
||||
void
|
||||
rssParser ( Channel** chanPtr, Config* cfg, tinyxml2::XMLDocument* xml )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
|
||||
tinyxml2::XMLElement* xchan = xml->FirstChildElement("rss")->FirstChildElement("channel");
|
||||
|
||||
rssRootParse( chanPtr, cfg, xchan );
|
||||
rssParseItems( chanPtr, cfg, xchan );
|
||||
}
|
||||
|
||||
void
|
||||
rssRootParse( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
|
||||
chan->SetTitle( xchan->FirstChildElement("title") );
|
||||
chan->SetDesc( xchan->FirstChildElement("description") );
|
||||
chan->SetHomePage( xchan->FirstChildElement("link") );
|
||||
chan->SetLastDate( xchan->FirstChildElement("lastBuildDate") );
|
||||
|
||||
if ( cfg->verbose )
|
||||
printf("Channel '%s' at '%s':\n", chan->title.String(), chan->homePage.String());
|
||||
}
|
||||
|
||||
void
|
||||
rssItemParse ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xitem )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
|
||||
Item* newItem = (Item*)malloc( sizeof(Item) );
|
||||
newItem = new Item( chan->outputDir );
|
||||
|
||||
newItem->SetTitle( xitem->FirstChildElement("title") );
|
||||
newItem->SetDesc( xitem->FirstChildElement("description") );
|
||||
newItem->SetPubDate( xitem->FirstChildElement("pubDate") );
|
||||
newItem->SetContent( xitem->FirstChildElement("content:encoded") );
|
||||
|
||||
if (cfg->verbose )
|
||||
printf("\t%s\n", newItem->title.String());
|
||||
|
||||
chan->items.AddItem( newItem );
|
||||
}
|
||||
|
||||
void
|
||||
rssParseItems ( Channel** chanPtr, Config* cfg, tinyxml2::XMLElement* xchan )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
tinyxml2::XMLElement* xitem;
|
||||
|
||||
xitem = xchan->FirstChildElement("item");
|
||||
|
||||
int itemCount = xmlCountSiblings( xitem, "item" );
|
||||
chan->items = BList(itemCount);
|
||||
|
||||
if ( cfg->verbose )
|
||||
printf("\t-%i items-\n", itemCount);
|
||||
|
||||
while ( xitem ) {
|
||||
rssItemParse( chanPtr, cfg, xitem );
|
||||
xitem = xitem->NextSiblingElement("item");
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
int
|
||||
countItemParser ( const char* filePath )
|
||||
xmlCountSiblings ( tinyxml2::XMLElement* xsibling, const char* sibling_name )
|
||||
{
|
||||
raptor_parser* rss_parser = NULL;
|
||||
raptor_world* world;
|
||||
world = raptor_new_world();
|
||||
int count = 0;
|
||||
|
||||
unsigned char *uri_string;
|
||||
raptor_uri *uri, *base_uri;
|
||||
|
||||
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
||||
uri_string = raptor_uri_filename_to_uri_string( filePath );
|
||||
uri = raptor_new_uri( world, uri_string );
|
||||
base_uri = raptor_uri_copy( uri );
|
||||
|
||||
int* itemCount = (int*)malloc( sizeof(int) );
|
||||
*itemCount = 0;
|
||||
raptor_parser_set_statement_handler( rss_parser, &itemCount, countItemHandler );
|
||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
||||
|
||||
free( itemCount );
|
||||
raptor_free_parser( rss_parser );
|
||||
raptor_free_uri( base_uri );
|
||||
raptor_free_uri( uri );
|
||||
raptor_free_memory( uri_string );
|
||||
raptor_free_world( world );
|
||||
|
||||
return *(itemCount);
|
||||
}
|
||||
|
||||
void
|
||||
printStatementParser ( const char* filePath )
|
||||
{
|
||||
raptor_parser* rss_parser = NULL;
|
||||
raptor_world* world;
|
||||
world = raptor_new_world();
|
||||
|
||||
unsigned char *uri_string;
|
||||
raptor_uri *uri, *base_uri;
|
||||
|
||||
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
||||
uri_string = raptor_uri_filename_to_uri_string( filePath );
|
||||
uri = raptor_new_uri( world, uri_string );
|
||||
base_uri = raptor_uri_copy( uri );
|
||||
|
||||
raptor_parser_set_statement_handler( rss_parser, NULL, printStatementHandler );
|
||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
||||
|
||||
raptor_free_parser( rss_parser );
|
||||
raptor_free_uri( base_uri );
|
||||
raptor_free_uri( uri );
|
||||
raptor_free_memory( uri_string );
|
||||
raptor_free_world( world );
|
||||
}
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// HANDLERS
|
||||
void
|
||||
feedHandler ( void* user_data, raptor_statement* statement )
|
||||
{
|
||||
if ( user_data != NULL ) {
|
||||
Channel** chanPtr = (Channel**)user_data;
|
||||
handleFeedStatement( chanPtr, statement );
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
countItemHandler ( void* user_data, raptor_statement* statement )
|
||||
{
|
||||
int** countPtr = ( int** )user_data;
|
||||
int* count = *(countPtr);
|
||||
|
||||
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
||||
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
||||
|
||||
if (getPredicateTag(predicate) == "type"
|
||||
&& getPredicateTag(object) == "item")
|
||||
*count += 1;
|
||||
}
|
||||
|
||||
void
|
||||
printStatementHandler ( void* user_data, raptor_statement* statement )
|
||||
{
|
||||
int** countPtr = (int**)user_data;
|
||||
int* count = *(countPtr);
|
||||
|
||||
const char* subject = ( const char* )raptor_term_to_string( statement->subject );
|
||||
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
||||
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
||||
|
||||
printf("%s\t-%s\n%.50s\n", subject, predicate, object);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// FEEDHANDLER HELPERS
|
||||
void
|
||||
handleFeedStatement ( Channel** chanPtr, raptor_statement* statement )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
const char* cpredicate = (const char*)raptor_term_to_string( statement->predicate );
|
||||
const char* csubject = (const char*)raptor_term_to_string( statement->subject );
|
||||
const char* cobject = (const char*)raptor_term_to_string( statement->object );
|
||||
|
||||
BString predicate = BString(cpredicate);
|
||||
BString subject = BString(csubject);
|
||||
BString bobject = BString(cobject);
|
||||
|
||||
bobject.ReplaceAll("\\\"","\"");
|
||||
bobject.ReplaceFirst("\"","");
|
||||
bobject.ReplaceLast("\"","");
|
||||
|
||||
std::string object = unescape(bobject.String());
|
||||
|
||||
predicate = getPredicateTag( predicate );
|
||||
|
||||
if ( predicate == "type" && getPredicateTag( object ) == "channel" )
|
||||
chan->topLevelSubject = subject;
|
||||
|
||||
if ( subject != chan->topLevelSubject )
|
||||
// handleChannelStatement( chanPtr, predicate, object );
|
||||
// else
|
||||
handleItemStatement( chanPtr, subject, predicate, object );
|
||||
}
|
||||
|
||||
void
|
||||
handleChannelStatement ( Channel** chanPtr, BString predicate, BString object )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
}
|
||||
|
||||
void
|
||||
handleItemStatement ( Channel** chanPtr, BString subject, BString predicate, std::string object )
|
||||
{
|
||||
Channel* chan = *(chanPtr);
|
||||
if ( subject.StartsWith("_:genid") )
|
||||
return;
|
||||
|
||||
chan->title = BString("dad");
|
||||
|
||||
if ( subject != chan->lastSubject ) {
|
||||
chan->lastSubject = subject;
|
||||
|
||||
Item* newItem = (Item*)malloc( sizeof(Item) );
|
||||
newItem = new Item( subject, chan->outputDir );
|
||||
|
||||
chan->items.AddItem( newItem );
|
||||
while ( xsibling ) {
|
||||
count++;
|
||||
xsibling = xsibling->NextSiblingElement(sibling_name);
|
||||
}
|
||||
|
||||
Item* nowItem = (Item*)chan->items.LastItem();
|
||||
|
||||
if ( predicate == "title" )
|
||||
nowItem->title = BString(object.c_str());
|
||||
if ( predicate == "encoded" || predicate == "Atomcontent" )
|
||||
nowItem->content = object;
|
||||
if ( predicate == "description" )
|
||||
nowItem->description = BString(object.c_str());
|
||||
if ( predicate == "link" || predicate == "Atomlink" )
|
||||
nowItem->postUrl = BString(object.c_str());
|
||||
if ( predicate == "Atomhref" )
|
||||
nowItem->postUrl = BString(object.c_str());
|
||||
if ( predicate == "date" || predicate == "Atompublished" ) // 2019-02-18T01:43:43Z
|
||||
nowItem->pubDate = BString(object.c_str());
|
||||
if ( predicate == "pubDate" ) // Sun, 17 Feb 2019 19:43:43 -0600
|
||||
nowItem->pubDate = BString(object.c_str());
|
||||
}
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// UTIL
|
||||
BString
|
||||
getPredicateTag ( BString spec )
|
||||
{
|
||||
int32 lastSlash = spec.FindLast( '/' );
|
||||
spec.RemoveChars( 0, lastSlash + 1 );
|
||||
int32 lastHash = spec.FindLast( '#' );
|
||||
spec.RemoveChars( 0, lastHash + 1 );
|
||||
spec.RemoveLast( ">" );
|
||||
|
||||
return spec;
|
||||
}
|
||||
BString
|
||||
getPredicateTag ( const char* spec )
|
||||
{
|
||||
return getPredicateTag( BString(spec) );
|
||||
}
|
||||
BString
|
||||
getPredicateTag ( std::string spec )
|
||||
{
|
||||
return getPredicateTag( spec.c_str() );
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/* What ensues is a terrifying violation of the human form.
|
||||
* Just atrotious. I deserve to be impaled by by an ice-pick.
|
||||
* ... something (unfortunately), directly ripped from StackOverflow.
|
||||
* So when getting a raptor_statement's object, it's a char array filled
|
||||
* with escaped characters (\U2901, etc).
|
||||
* I'm really not sure how to best manage this, so SO.
|
||||
* Thanks remy-lebeau, I owe you.
|
||||
* https://stackoverflow.com/questions/28534221 */
|
||||
std::string
|
||||
toUtf8 ( uint32_t cp )
|
||||
{
|
||||
std::string result;
|
||||
|
||||
int count;
|
||||
if (cp <= 0x007F)
|
||||
count = 1;
|
||||
else if (cp <= 0x07FF)
|
||||
count = 2;
|
||||
else if (cp <= 0xFFFF)
|
||||
count = 3;
|
||||
else if (cp <= 0x10FFFF)
|
||||
count = 4;
|
||||
else
|
||||
return result; // or throw an exception
|
||||
|
||||
result.resize(count);
|
||||
|
||||
if (count > 1) {
|
||||
for (int i = count-1; i > 0; --i) {
|
||||
result[i] = (char) (0x80 | (cp & 0x3F));
|
||||
cp >>= 6;
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; ++i)
|
||||
cp |= (1 << (7-i));
|
||||
}
|
||||
|
||||
result[0] = (char) cp;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string
|
||||
unescape ( std::string str, std::string escape )
|
||||
{
|
||||
std::string::size_type startIdx = 0;
|
||||
do
|
||||
{
|
||||
startIdx = str.find(escape, startIdx);
|
||||
if (startIdx == std::string::npos) break;
|
||||
|
||||
std::string::size_type endIdx = str.find_first_not_of("0123456789abcdefABCDEF",
|
||||
startIdx+2);
|
||||
if (endIdx == std::string::npos) break;
|
||||
|
||||
std::string tmpStr = str.substr(startIdx+2, endIdx-(startIdx+2));
|
||||
std::istringstream iss(tmpStr);
|
||||
|
||||
uint32_t cp;
|
||||
if (iss >> std::hex >> cp)
|
||||
{
|
||||
std::string utf8 = toUtf8(cp);
|
||||
str.replace(startIdx, 2+tmpStr.length(), utf8);
|
||||
startIdx += utf8.length();
|
||||
}
|
||||
else
|
||||
startIdx += 2;
|
||||
}
|
||||
while (true);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string
|
||||
unescape (const char* str )
|
||||
{
|
||||
return unescape(std::string( unescape(std::string(str), "\\u") ), "\\U");
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -1,28 +1,15 @@
|
|||
#ifndef PARSE_H
|
||||
#define PARSE_H
|
||||
|
||||
#include <iostream>
|
||||
#include <raptor2/raptor2.h>
|
||||
#include <tinyxml2.h>
|
||||
#include "Config.h"
|
||||
#include "Channel.h"
|
||||
|
||||
|
||||
void feedParser (Channel**);
|
||||
void feedHandler ( void*, raptor_statement* );
|
||||
void handleFeedStatement ( Channel**, raptor_statement* );
|
||||
void handleChannelStatement ( Channel**, BString, BString );
|
||||
void handleItemStatement ( Channel**, BString, BString, std::string );
|
||||
|
||||
int countItemParser ( const char* );
|
||||
void countItemHandler ( void*, raptor_statement* );
|
||||
|
||||
void printStatementParser ( const char* );
|
||||
void printStatementHandler ( void*, raptor_statement* );
|
||||
|
||||
BString getPredicateTag ( const char* );
|
||||
BString getPredicateTag ( BString );
|
||||
BString getPredicateTag ( std::string );
|
||||
std::string to_utf ( uint32 );
|
||||
std::string unescape ( std::string, std::string );
|
||||
std::string unescape ( const char* );
|
||||
void feedParser ( Channel**, Config* );
|
||||
void rssParser ( Channel**, Config*, tinyxml2::XMLDocument* );
|
||||
void rssRootParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void rssItemParse ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
void rssParseItems ( Channel**, Config*, tinyxml2::XMLElement* );
|
||||
int xmlCountSiblings ( tinyxml2::XMLElement*, const char* );
|
||||
|
||||
#endif
|
||||
|
|
Ŝarĝante…
Reference in New Issue