Reorganize parsing

This commit is contained in:
Jaidyn Ann 2020-06-20 01:01:28 -05:00
parent f921d9f6c3
commit 00c42a860c
3 changed files with 85 additions and 35 deletions

View File

@ -18,8 +18,8 @@ Channel::Channel ( BString path )
void void
Channel::Parse ( ) Channel::Parse ( )
{ {
int itemCount = countFeedItems( filePath.String() ); int itemCount = countItemParser( filePath.String() );
items = BList(itemCount); items = BList(itemCount);
Channel* chan = this; Channel* chan = this;
processFeedItems(&chan); feedParser(&chan);
} }

View File

@ -3,12 +3,41 @@
#include "Item.h" #include "Item.h"
#include "parsing.h" #include "parsing.h"
// predicate == sweet https version of tag (e.g. <http://purl.org/rss/1.0/modules/rss091#language> ) /* predicate == sweet http version of tag
// subject == parent subject == parent
// object == data object == data */
// ============================================================================
// PARSERS
void
feedParser ( Channel** chanPtr )
{
Channel* chan = *(chanPtr);
raptor_parser* rss_parser = NULL;
raptor_world* world;
world = raptor_new_world();
unsigned char *uri_string;
raptor_uri *uri, *base_uri;
rss_parser = raptor_new_parser(world, "rss-tag-soup");
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() );
uri = raptor_new_uri( world, uri_string );
base_uri = raptor_uri_copy( uri );
raptor_parser_set_statement_handler( rss_parser, &chan, feedHandler );
raptor_parser_parse_file( rss_parser, uri, base_uri );
raptor_free_parser(rss_parser);
raptor_free_uri(base_uri);
raptor_free_uri(uri);
raptor_free_memory(uri_string);
raptor_free_world( world );
}
// -------------------------------------
int int
countFeedItems ( const char* filePath ) countItemParser ( const char* filePath )
{ {
raptor_parser* rss_parser = NULL; raptor_parser* rss_parser = NULL;
raptor_world* world; raptor_world* world;
@ -38,9 +67,8 @@ countFeedItems ( const char* filePath )
} }
void void
processFeedItems ( Channel** chanPtr ) printStatementParser ( const char* filePath )
{ {
Channel* chan = *(chanPtr);
raptor_parser* rss_parser = NULL; raptor_parser* rss_parser = NULL;
raptor_world* world; raptor_world* world;
world = raptor_new_world(); world = raptor_new_world();
@ -49,11 +77,11 @@ processFeedItems ( Channel** chanPtr )
raptor_uri *uri, *base_uri; raptor_uri *uri, *base_uri;
rss_parser = raptor_new_parser(world, "rss-tag-soup"); rss_parser = raptor_new_parser(world, "rss-tag-soup");
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() ); uri_string = raptor_uri_filename_to_uri_string( filePath );
uri = raptor_new_uri( world, uri_string ); uri = raptor_new_uri( world, uri_string );
base_uri = raptor_uri_copy( uri ); base_uri = raptor_uri_copy( uri );
raptor_parser_set_statement_handler( rss_parser, &chan, channelHandler ); raptor_parser_set_statement_handler( rss_parser, NULL, printStatementHandler );
raptor_parser_parse_file( rss_parser, uri, base_uri ); raptor_parser_parse_file( rss_parser, uri, base_uri );
raptor_free_parser(rss_parser); raptor_free_parser(rss_parser);
@ -63,58 +91,73 @@ processFeedItems ( Channel** chanPtr )
raptor_free_world( world ); raptor_free_world( world );
} }
// ============================================================================
// HANDLERS
void void
channelHandler ( void* user_data, raptor_statement* statement ) feedHandler ( void* user_data, raptor_statement* statement )
{ {
if ( user_data != NULL ) { if ( user_data != NULL ) {
Channel** chanPtr = (Channel**)user_data; Channel** chanPtr = (Channel**)user_data;
parseRssStatement( chanPtr, statement ); handleFeedStatement( chanPtr, statement );
} }
} }
void void
countItemHandler ( void* user_data, raptor_statement* statement ) countItemHandler ( void* user_data, raptor_statement* statement )
{ {
int** countPtr = (int**)user_data; int** countPtr = ( int** )user_data;
int* count = *(countPtr); int* count = *(countPtr);
const char* object = (const char*)raptor_term_to_string(statement->object); const char* object = ( const char* )raptor_term_to_string( statement->object );
const char* predicate = (const char*)raptor_term_to_string(statement->predicate); const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
if (getPredicateTag(predicate) == "type" if (getPredicateTag(predicate) == "type"
&& getPredicateTag(object) == "item") && getPredicateTag(object) == "item")
*count += 1; *count += 1;
} }
// ============================================================================
void void
parseRssStatement ( Channel** chanPtr, raptor_statement* statement ) printStatementHandler ( void* user_data, raptor_statement* statement )
{
int** countPtr = (int**)user_data;
int* count = *(countPtr);
const char* subject = ( const char* )raptor_term_to_string( statement->subject );
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
const char* object = ( const char* )raptor_term_to_string( statement->object );
printf("%s\t-%s\n%.5s\n", subject, predicate, object);
}
// ----------------------------------------------------------------------------
// FEEDHANDLER HELPERS
void
handleFeedStatement ( Channel** chanPtr, raptor_statement* statement )
{ {
Channel* chan = *(chanPtr); Channel* chan = *(chanPtr);
BString predicate = BString( (const char*)raptor_term_to_string(statement->predicate) ); BString predicate = BString(( const char* )raptor_term_to_string( statement->predicate ));
BString subject = BString( (const char*)raptor_term_to_string(statement->subject) ); BString subject = BString(( const char* )raptor_term_to_string( statement->subject ));
BString object = BString( (const char*)raptor_term_to_string(statement->object) ); BString object = BString(( const char* )raptor_term_to_string( statement->object ));
predicate = getPredicateTag( predicate ); predicate = getPredicateTag( predicate );
if (predicate == "type" && getPredicateTag(object) == "channel") if ( predicate == "type" && getPredicateTag( object ) == "channel" )
chan->topLevelSubject = subject; chan->topLevelSubject = subject;
if ( subject != chan->topLevelSubject ) if ( subject != chan->topLevelSubject )
// parseChannelStatement( chanPtr, predicate, object ); // handleChannelStatement( chanPtr, predicate, object );
// else // else
parseItemStatement( chanPtr, subject, predicate, object ); handleItemStatement( chanPtr, subject, predicate, object );
} }
void void
parseChannelStatement ( Channel** chanPtr, BString predicate, BString object ) handleChannelStatement ( Channel** chanPtr, BString predicate, BString object )
{ {
Channel* chan = *(chanPtr); Channel* chan = *(chanPtr);
} }
void void
parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object ) handleItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object )
{ {
Channel* chan = *(chanPtr); Channel* chan = *(chanPtr);
if ( subject.StartsWith("_:genid") ) if ( subject.StartsWith("_:genid") )
@ -140,6 +183,9 @@ parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BStr
nowItem->content = object; nowItem->content = object;
} }
// ============================================================================
// UTIL
BString BString
getPredicateTag ( BString spec ) getPredicateTag ( BString spec )
{ {

View File

@ -5,15 +5,19 @@
#include "Channel.h" #include "Channel.h"
void channelHandler ( void*, raptor_statement* ); void feedParser (Channel**);
void feedHandler ( void*, raptor_statement* );
void handleFeedStatement ( Channel**, raptor_statement* );
void handleChannelStatement ( Channel**, BString, BString );
void handleItemStatement ( Channel**, BString, BString, BString );
int countItemParser ( const char* );
void countItemHandler ( void*, raptor_statement* ); void countItemHandler ( void*, raptor_statement* );
void parseRssStatement ( Channel**, raptor_statement* );
void printStatementParser ( const char* );
void printStatementHandler ( void*, raptor_statement* );
BString getPredicateTag ( char* ); BString getPredicateTag ( char* );
BString getPredicateTag ( BString ); BString getPredicateTag ( BString );
void parseChannelStatement ( Channel**, BString, BString );
void parseItemStatement ( Channel**, BString, BString, BString );
int countFeedItems ( const char* );
void processFeedItems (Channel**);
#endif #endif