Reorganize parsing
This commit is contained in:
parent
f921d9f6c3
commit
00c42a860c
|
@ -18,8 +18,8 @@ Channel::Channel ( BString path )
|
||||||
void
|
void
|
||||||
Channel::Parse ( )
|
Channel::Parse ( )
|
||||||
{
|
{
|
||||||
int itemCount = countFeedItems( filePath.String() );
|
int itemCount = countItemParser( filePath.String() );
|
||||||
items = BList(itemCount);
|
items = BList(itemCount);
|
||||||
Channel* chan = this;
|
Channel* chan = this;
|
||||||
processFeedItems(&chan);
|
feedParser(&chan);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,12 +3,41 @@
|
||||||
#include "Item.h"
|
#include "Item.h"
|
||||||
#include "parsing.h"
|
#include "parsing.h"
|
||||||
|
|
||||||
// predicate == sweet https version of tag (e.g. <http://purl.org/rss/1.0/modules/rss091#language> )
|
/* predicate == sweet http version of tag
|
||||||
// subject == parent
|
subject == parent
|
||||||
// object == data
|
object == data */
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// PARSERS
|
||||||
|
void
|
||||||
|
feedParser ( Channel** chanPtr )
|
||||||
|
{
|
||||||
|
Channel* chan = *(chanPtr);
|
||||||
|
raptor_parser* rss_parser = NULL;
|
||||||
|
raptor_world* world;
|
||||||
|
world = raptor_new_world();
|
||||||
|
|
||||||
|
unsigned char *uri_string;
|
||||||
|
raptor_uri *uri, *base_uri;
|
||||||
|
|
||||||
|
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
||||||
|
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() );
|
||||||
|
uri = raptor_new_uri( world, uri_string );
|
||||||
|
base_uri = raptor_uri_copy( uri );
|
||||||
|
|
||||||
|
raptor_parser_set_statement_handler( rss_parser, &chan, feedHandler );
|
||||||
|
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
||||||
|
|
||||||
|
raptor_free_parser(rss_parser);
|
||||||
|
raptor_free_uri(base_uri);
|
||||||
|
raptor_free_uri(uri);
|
||||||
|
raptor_free_memory(uri_string);
|
||||||
|
raptor_free_world( world );
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------
|
||||||
int
|
int
|
||||||
countFeedItems ( const char* filePath )
|
countItemParser ( const char* filePath )
|
||||||
{
|
{
|
||||||
raptor_parser* rss_parser = NULL;
|
raptor_parser* rss_parser = NULL;
|
||||||
raptor_world* world;
|
raptor_world* world;
|
||||||
|
@ -38,9 +67,8 @@ countFeedItems ( const char* filePath )
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
processFeedItems ( Channel** chanPtr )
|
printStatementParser ( const char* filePath )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
|
||||||
raptor_parser* rss_parser = NULL;
|
raptor_parser* rss_parser = NULL;
|
||||||
raptor_world* world;
|
raptor_world* world;
|
||||||
world = raptor_new_world();
|
world = raptor_new_world();
|
||||||
|
@ -49,11 +77,11 @@ processFeedItems ( Channel** chanPtr )
|
||||||
raptor_uri *uri, *base_uri;
|
raptor_uri *uri, *base_uri;
|
||||||
|
|
||||||
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
rss_parser = raptor_new_parser(world, "rss-tag-soup");
|
||||||
uri_string = raptor_uri_filename_to_uri_string( chan->filePath.String() );
|
uri_string = raptor_uri_filename_to_uri_string( filePath );
|
||||||
uri = raptor_new_uri( world, uri_string );
|
uri = raptor_new_uri( world, uri_string );
|
||||||
base_uri = raptor_uri_copy( uri );
|
base_uri = raptor_uri_copy( uri );
|
||||||
|
|
||||||
raptor_parser_set_statement_handler( rss_parser, &chan, channelHandler );
|
raptor_parser_set_statement_handler( rss_parser, NULL, printStatementHandler );
|
||||||
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
raptor_parser_parse_file( rss_parser, uri, base_uri );
|
||||||
|
|
||||||
raptor_free_parser(rss_parser);
|
raptor_free_parser(rss_parser);
|
||||||
|
@ -63,58 +91,73 @@ processFeedItems ( Channel** chanPtr )
|
||||||
raptor_free_world( world );
|
raptor_free_world( world );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// HANDLERS
|
||||||
void
|
void
|
||||||
channelHandler ( void* user_data, raptor_statement* statement )
|
feedHandler ( void* user_data, raptor_statement* statement )
|
||||||
{
|
{
|
||||||
if ( user_data != NULL ) {
|
if ( user_data != NULL ) {
|
||||||
Channel** chanPtr = (Channel**)user_data;
|
Channel** chanPtr = (Channel**)user_data;
|
||||||
parseRssStatement( chanPtr, statement );
|
handleFeedStatement( chanPtr, statement );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
countItemHandler ( void* user_data, raptor_statement* statement )
|
countItemHandler ( void* user_data, raptor_statement* statement )
|
||||||
{
|
{
|
||||||
int** countPtr = (int**)user_data;
|
int** countPtr = ( int** )user_data;
|
||||||
int* count = *(countPtr);
|
int* count = *(countPtr);
|
||||||
|
|
||||||
const char* object = (const char*)raptor_term_to_string(statement->object);
|
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
||||||
const char* predicate = (const char*)raptor_term_to_string(statement->predicate);
|
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
||||||
|
|
||||||
if (getPredicateTag(predicate) == "type"
|
if (getPredicateTag(predicate) == "type"
|
||||||
&& getPredicateTag(object) == "item")
|
&& getPredicateTag(object) == "item")
|
||||||
*count += 1;
|
*count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
void
|
void
|
||||||
parseRssStatement ( Channel** chanPtr, raptor_statement* statement )
|
printStatementHandler ( void* user_data, raptor_statement* statement )
|
||||||
|
{
|
||||||
|
int** countPtr = (int**)user_data;
|
||||||
|
int* count = *(countPtr);
|
||||||
|
|
||||||
|
const char* subject = ( const char* )raptor_term_to_string( statement->subject );
|
||||||
|
const char* predicate = ( const char* )raptor_term_to_string( statement->predicate );
|
||||||
|
const char* object = ( const char* )raptor_term_to_string( statement->object );
|
||||||
|
|
||||||
|
printf("%s\t-%s\n%.5s\n", subject, predicate, object);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// FEEDHANDLER HELPERS
|
||||||
|
void
|
||||||
|
handleFeedStatement ( Channel** chanPtr, raptor_statement* statement )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
BString predicate = BString( (const char*)raptor_term_to_string(statement->predicate) );
|
BString predicate = BString(( const char* )raptor_term_to_string( statement->predicate ));
|
||||||
BString subject = BString( (const char*)raptor_term_to_string(statement->subject) );
|
BString subject = BString(( const char* )raptor_term_to_string( statement->subject ));
|
||||||
BString object = BString( (const char*)raptor_term_to_string(statement->object) );
|
BString object = BString(( const char* )raptor_term_to_string( statement->object ));
|
||||||
predicate = getPredicateTag( predicate );
|
predicate = getPredicateTag( predicate );
|
||||||
|
|
||||||
if (predicate == "type" && getPredicateTag(object) == "channel")
|
if ( predicate == "type" && getPredicateTag( object ) == "channel" )
|
||||||
chan->topLevelSubject = subject;
|
chan->topLevelSubject = subject;
|
||||||
|
|
||||||
if ( subject != chan->topLevelSubject )
|
if ( subject != chan->topLevelSubject )
|
||||||
// parseChannelStatement( chanPtr, predicate, object );
|
// handleChannelStatement( chanPtr, predicate, object );
|
||||||
// else
|
// else
|
||||||
parseItemStatement( chanPtr, subject, predicate, object );
|
handleItemStatement( chanPtr, subject, predicate, object );
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
parseChannelStatement ( Channel** chanPtr, BString predicate, BString object )
|
handleChannelStatement ( Channel** chanPtr, BString predicate, BString object )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object )
|
handleItemStatement ( Channel** chanPtr, BString subject, BString predicate, BString object )
|
||||||
{
|
{
|
||||||
Channel* chan = *(chanPtr);
|
Channel* chan = *(chanPtr);
|
||||||
if ( subject.StartsWith("_:genid") )
|
if ( subject.StartsWith("_:genid") )
|
||||||
|
@ -140,6 +183,9 @@ parseItemStatement ( Channel** chanPtr, BString subject, BString predicate, BStr
|
||||||
nowItem->content = object;
|
nowItem->content = object;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// UTIL
|
||||||
BString
|
BString
|
||||||
getPredicateTag ( BString spec )
|
getPredicateTag ( BString spec )
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,15 +5,19 @@
|
||||||
#include "Channel.h"
|
#include "Channel.h"
|
||||||
|
|
||||||
|
|
||||||
void channelHandler ( void*, raptor_statement* );
|
void feedParser (Channel**);
|
||||||
|
void feedHandler ( void*, raptor_statement* );
|
||||||
|
void handleFeedStatement ( Channel**, raptor_statement* );
|
||||||
|
void handleChannelStatement ( Channel**, BString, BString );
|
||||||
|
void handleItemStatement ( Channel**, BString, BString, BString );
|
||||||
|
|
||||||
|
int countItemParser ( const char* );
|
||||||
void countItemHandler ( void*, raptor_statement* );
|
void countItemHandler ( void*, raptor_statement* );
|
||||||
void parseRssStatement ( Channel**, raptor_statement* );
|
|
||||||
|
void printStatementParser ( const char* );
|
||||||
|
void printStatementHandler ( void*, raptor_statement* );
|
||||||
|
|
||||||
BString getPredicateTag ( char* );
|
BString getPredicateTag ( char* );
|
||||||
BString getPredicateTag ( BString );
|
BString getPredicateTag ( BString );
|
||||||
void parseChannelStatement ( Channel**, BString, BString );
|
|
||||||
void parseItemStatement ( Channel**, BString, BString, BString );
|
|
||||||
|
|
||||||
int countFeedItems ( const char* );
|
|
||||||
void processFeedItems (Channel**);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Ŝarĝante…
Reference in New Issue