From 00c42a860c8644eae2c170afb078613471dd84c6 Mon Sep 17 00:00:00 2001 From: Jaidyn Ann Date: Sat, 20 Jun 2020 01:01:28 -0500 Subject: [PATCH] Reorganize parsing --- src/Channel.cpp | 4 +- src/parsing.cpp | 98 ++++++++++++++++++++++++++++++++++++------------- src/parsing.h | 18 +++++---- 3 files changed, 85 insertions(+), 35 deletions(-) diff --git a/src/Channel.cpp b/src/Channel.cpp index d386a79..5bc88aa 100644 --- a/src/Channel.cpp +++ b/src/Channel.cpp @@ -18,8 +18,8 @@ Channel::Channel ( BString path ) void Channel::Parse ( ) { - int itemCount = countFeedItems( filePath.String() ); + int itemCount = countItemParser( filePath.String() ); items = BList(itemCount); Channel* chan = this; - processFeedItems(&chan); + feedParser(&chan); } diff --git a/src/parsing.cpp b/src/parsing.cpp index 5bd9d44..b6f5e84 100644 --- a/src/parsing.cpp +++ b/src/parsing.cpp @@ -3,12 +3,41 @@ #include "Item.h" #include "parsing.h" -// predicate == sweet https version of tag (e.g. ) -// subject == parent -// object == data +/* predicate == sweet http version of tag + subject == parent + object == data */ +// ============================================================================ +// PARSERS +void +feedParser ( Channel** chanPtr ) +{ + Channel* chan = *(chanPtr); + raptor_parser* rss_parser = NULL; + raptor_world* world; + world = raptor_new_world(); + + unsigned char *uri_string; + raptor_uri *uri, *base_uri; + + rss_parser = raptor_new_parser(world, "rss-tag-soup"); + uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() ); + uri = raptor_new_uri( world, uri_string ); + base_uri = raptor_uri_copy( uri ); + + raptor_parser_set_statement_handler( rss_parser, &chan, feedHandler ); + raptor_parser_parse_file( rss_parser, uri, base_uri ); + + raptor_free_parser(rss_parser); + raptor_free_uri(base_uri); + raptor_free_uri(uri); + raptor_free_memory(uri_string); + raptor_free_world( world ); +} + +// ------------------------------------- int -countFeedItems ( const char* filePath ) +countItemParser ( const char* filePath ) { raptor_parser* rss_parser = NULL; raptor_world* world; @@ -38,9 +67,8 @@ countFeedItems ( const char* filePath ) } void -processFeedItems ( Channel** chanPtr ) +printStatementParser ( const char* filePath ) { - Channel* chan = *(chanPtr); raptor_parser* rss_parser = NULL; raptor_world* world; world = raptor_new_world(); @@ -49,11 +77,11 @@ processFeedItems ( Channel** chanPtr ) raptor_uri *uri, *base_uri; rss_parser = raptor_new_parser(world, "rss-tag-soup"); - uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() ); + uri_string = raptor_uri_filename_to_uri_string( filePath ); uri = raptor_new_uri( world, uri_string ); base_uri = raptor_uri_copy( uri ); - raptor_parser_set_statement_handler( rss_parser, &chan, channelHandler ); + raptor_parser_set_statement_handler( rss_parser, NULL, printStatementHandler ); raptor_parser_parse_file( rss_parser, uri, base_uri ); raptor_free_parser(rss_parser); @@ -63,58 +91,73 @@ processFeedItems ( Channel** chanPtr ) raptor_free_world( world ); } + +// ============================================================================ +// HANDLERS void -channelHandler ( void* user_data, raptor_statement* statement ) +feedHandler ( void* user_data, raptor_statement* statement ) { if ( user_data != NULL ) { Channel** chanPtr = (Channel**)user_data; - parseRssStatement( chanPtr, statement ); + handleFeedStatement( chanPtr, statement ); } } void countItemHandler ( void* user_data, raptor_statement* statement ) { - int** countPtr = (int**)user_data; - int* count = *(countPtr); + int** countPtr = ( int** )user_data; + int* count = *(countPtr); - const char* object = (const char*)raptor_term_to_string(statement->object); - const char* predicate = (const char*)raptor_term_to_string(statement->predicate); + const char* object = ( const char* )raptor_term_to_string( statement->object ); + const char* predicate = ( const char* )raptor_term_to_string( statement->predicate ); if (getPredicateTag(predicate) == "type" && getPredicateTag(object) == "item") *count += 1; } - -// ============================================================================ - void -parseRssStatement ( Channel** chanPtr, raptor_statement* statement ) +printStatementHandler ( void* user_data, raptor_statement* statement ) +{ + int** countPtr = (int**)user_data; + int* count = *(countPtr); + + const char* subject = ( const char* )raptor_term_to_string( statement->subject ); + const char* predicate = ( const char* )raptor_term_to_string( statement->predicate ); + const char* object = ( const char* )raptor_term_to_string( statement->object ); + + printf("%s\t-%s\n%.5s\n", subject, predicate, object); +} + +// ---------------------------------------------------------------------------- +// FEEDHANDLER HELPERS +void +handleFeedStatement ( Channel** chanPtr, raptor_statement* statement ) { Channel* chan = *(chanPtr); - BString predicate = BString( (const char*)raptor_term_to_string(statement->predicate) ); - BString subject = BString( (const char*)raptor_term_to_string(statement->subject) ); - BString object = BString( (const char*)raptor_term_to_string(statement->object) ); + BString predicate = BString(( const char* )raptor_term_to_string( statement->predicate )); + BString subject = BString(( const char* )raptor_term_to_string( statement->subject )); + BString object = BString(( const char* )raptor_term_to_string( statement->object )); predicate = getPredicateTag( predicate ); - if (predicate == "type" && getPredicateTag(object) == "channel") + if ( predicate == "type" && getPredicateTag( object ) == "channel" ) chan->topLevelSubject = subject; if ( subject != chan->topLevelSubject ) -// parseChannelStatement( chanPtr, predicate, object ); +// handleChannelStatement( chanPtr, predicate, object ); // else - parseItemStatement( chanPtr, subject, predicate, object ); + handleItemStatement( chanPtr, subject, predicate, object ); } void -parseChannelStatement ( Channel** chanPtr, BString predicate, BString object ) +handleChannelStatement ( Channel** chanPtr, BString predicate, BString object ) { Channel* chan = *(chanPtr); } void -parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object ) +handleItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object ) { Channel* chan = *(chanPtr); if ( subject.StartsWith("_:genid") ) @@ -140,6 +183,9 @@ parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BStr nowItem->content = object; } + +// ============================================================================ +// UTIL BString getPredicateTag ( BString spec ) { diff --git a/src/parsing.h b/src/parsing.h index ae9db58..e299c7a 100644 --- a/src/parsing.h +++ b/src/parsing.h @@ -5,15 +5,19 @@ #include "Channel.h" -void channelHandler ( void*, raptor_statement* ); +void feedParser (Channel**); +void feedHandler ( void*, raptor_statement* ); +void handleFeedStatement ( Channel**, raptor_statement* ); +void handleChannelStatement ( Channel**, BString, BString ); +void handleItemStatement ( Channel**, BString, BString, BString ); + +int countItemParser ( const char* ); void countItemHandler ( void*, raptor_statement* ); -void parseRssStatement ( Channel**, raptor_statement* ); + +void printStatementParser ( const char* ); +void printStatementHandler ( void*, raptor_statement* ); + BString getPredicateTag ( char* ); BString getPredicateTag ( BString ); -void parseChannelStatement ( Channel**, BString, BString ); -void parseItemStatement ( Channel**, BString, BString, BString ); - -int countFeedItems ( const char* ); -void processFeedItems (Channel**); #endif