From 0438c097bf5d1a3c7fa32fcb5b9b7c31a4cce162 Mon Sep 17 00:00:00 2001 From: Jaidyn Ann Date: Sun, 2 Aug 2020 02:05:20 -0500 Subject: [PATCH] Hash cache entries; replace string/unixdate attrs with TIME 'when' attr --- Makefile | 2 +- src/Feed.cpp | 42 +++++++++++++++++++++++++++++----------- src/Feed.h | 3 +++ src/Pogger.cpp | 2 -- src/ProtocolListener.cpp | 36 ++++++++++++++++++++++++++++++++-- src/ProtocolListener.h | 7 +++++++ src/Util.cpp | 12 +++++++++--- src/Util.h | 3 +-- 8 files changed, 86 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 2b67a44..364ec4c 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ RSRCS = \ # - if your library does not follow the standard library naming scheme, # you need to specify the path to the library and it's name. # (e.g. for mylib.a, specify "mylib.a" or "path/mylib.a") -LIBS = be tracker shared tinyxml2 bnetapi network $(STDCPPLIBS) +LIBS = be tracker bnetapi network shared tinyxml2 $(STDCPPLIBS) # Specify additional paths to directories following the standard libXXX.so # or libXXX.a naming scheme. You can specify full paths or paths relative diff --git a/src/Feed.cpp b/src/Feed.cpp index 758c3d1..711d1c5 100644 --- a/src/Feed.cpp +++ b/src/Feed.cpp @@ -10,7 +10,7 @@ Feed::Feed ( BString path, Config* cfg ) description = BString( "Nondescript, N/A." ); homeUrl = BString(""); xmlUrl = BString(""); - + updated = true; filePath = GetCachePath( path, cfg ); } @@ -24,28 +24,48 @@ Feed::Feed ( ) { // ---------------------------------------------------------------------------- BString -Feed::GetCachePath ( BString falsePath, Config* cfg ) +Feed::GetCachePath ( BString givenPath, Config* cfg ) { - BUrl falseUrl = BUrl(falsePath); - BString protocol = falseUrl.Protocol().String(); + BUrl givenUrl = BUrl( givenPath ); + BString protocol = givenUrl.Protocol().String(); - if ( protocol == NULL && falseUrl.UrlString() != NULL ) - return falsePath; + if ( protocol == NULL && givenUrl.UrlString() != NULL ) + return givenPath; if ( protocol != BString("http") && protocol != BString("https") ) return NULL; - BString splitName = falseUrl.Host( ); - splitName.Append( falseUrl.Path() ); + return FetchRemoteFeed( givenPath, cfg ); +} + +BString +Feed::FetchRemoteFeed ( BString givenPath, Config* cfg ) +{ + BUrl givenUrl = BUrl( givenPath ); + BString* newHash = new BString(); + char oldHash[41]; + + BString splitName = givenUrl.Host( ); + splitName.Append( givenUrl.Path() ); splitName.ReplaceAll("/", "_"); BString filename = cfg->cacheDir; filename.Append(splitName); BFile* cacheFile = new BFile( filename, B_READ_WRITE | B_CREATE_FILE ); - if ( cfg->verbose ) - printf( "Saving %s to %s...\n", falsePath.String(), filename.String() ); + cacheFile->ReadAttr( "FeedSum", B_STRING_TYPE, 0, + oldHash, 41 ); + + if ( cfg->verbose ) + printf( "Saving %s...\n", givenPath.String() ); + + webFetch( givenUrl, cacheFile, newHash ); + + cacheFile->WriteAttr( "FeedSum", B_STRING_TYPE, 0, + newHash->String(), newHash->CountChars() ); + + if ( *(newHash) == BString(oldHash) ) + updated = false; - webFetch( falseUrl, cacheFile ); return filename; } diff --git a/src/Feed.h b/src/Feed.h index 749a57d..2b67c03 100644 --- a/src/Feed.h +++ b/src/Feed.h @@ -16,11 +16,13 @@ public: BString title; BString description; BDateTime date; + BDateTime lastDate; BString homeUrl; BString xmlUrl; BString filePath; BString outputDir; BList entries; + bool updated; void Parse ( Config* ); @@ -38,6 +40,7 @@ public: protected: BString GetCachePath ( BString, Config* ); + BString FetchRemoteFeed ( BString, Config* ); int xmlCountSiblings ( tinyxml2::XMLElement*, const char* ); }; diff --git a/src/Pogger.cpp b/src/Pogger.cpp index a491fde..3055f6f 100644 --- a/src/Pogger.cpp +++ b/src/Pogger.cpp @@ -148,14 +148,12 @@ processFeed ( void* feedArg ) BList entries; if ( testFeed->IsAtom() ) { - printf("Atom\n"); AtomFeed* feed = (AtomFeed*)malloc( sizeof(AtomFeed) ); feed = new AtomFeed( testFeed->filePath, main_cfg ); feed->Parse(main_cfg); entries = feed->entries; } if ( testFeed->IsRss() ) { - printf("RSS\n"); RssFeed* feed = (RssFeed*)malloc( sizeof(RssFeed) ); feed = new RssFeed( testFeed->filePath, main_cfg ); feed->Parse(main_cfg); diff --git a/src/ProtocolListener.cpp b/src/ProtocolListener.cpp index 48e5d87..beb9f9d 100644 --- a/src/ProtocolListener.cpp +++ b/src/ProtocolListener.cpp @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include "ProtocolListener.h" ProtocolListener::ProtocolListener ( bool traceLogging ) @@ -14,8 +17,10 @@ ProtocolListener::~ProtocolListener ( ) void ProtocolListener::DataReceived ( BUrlRequest* caller, const char* data, off_t position, ssize_t size ) { - if (fDownloadIO != NULL) - fDownloadIO->Write(data, size); + if ( fDownloadIO != NULL ) + fDownloadIO->Write( data, size ); + if ( fSha1 != NULL ) + fSha1->process_bytes( data, size ); } void @@ -29,3 +34,30 @@ ProtocolListener::GetDownloadIO ( ) { return fDownloadIO; } + +void +ProtocolListener::SetSha1 ( boost::uuids::detail::sha1* sha1 ) +{ + fSha1 = sha1; +} + +boost::uuids::detail::sha1* +ProtocolListener::GetSha1 ( ) +{ + return fSha1; +} + +BString +ProtocolListener::GetHash ( ) +{ + unsigned int hashInt[5]; + fSha1->get_digest( hashInt ); + + std::ostringstream hashStr; + for(std::size_t i=0; i #include +#include class ProtocolListener : public BUrlProtocolListener { @@ -11,11 +12,17 @@ public: virtual ~ProtocolListener(); virtual void DataReceived(BUrlRequest*, const char*, off_t, ssize_t); + void SetDownloadIO ( BDataIO* ); BDataIO* GetDownloadIO ( ); + void SetSha1 ( boost::uuids::detail::sha1* ); + boost::uuids::detail::sha1* GetSha1 ( ); + BString GetHash ( ); + private: BDataIO* fDownloadIO; + boost::uuids::detail::sha1* fSha1; bool fTraceLogging; }; diff --git a/src/Util.cpp b/src/Util.cpp index 98d6cd2..99b3c6e 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "ProtocolListener.h" #include "Util.h" @@ -73,25 +74,30 @@ withinDateRange ( BDateTime minDate, BDateTime nowDate, BDateTime maxDate ) // ---------------------------------------------------------------------------- int32 -webFetch ( char* strUrl, BDataIO* reply ) +webFetch ( BUrl url, BDataIO* reply ) { - return webFetch( BUrl(strUrl), reply ); + BString* ignored = new BString(); + return webFetch( url, reply, ignored ); } int32 -webFetch ( BUrl url, BDataIO* reply ) +webFetch ( BUrl url, BDataIO* reply, BString* hash ) { ProtocolListener listener(true); BUrlContext context; + boost::uuids::detail::sha1 sha1; BHttpRequest request( url, true, "HTTP", &listener, &context ); listener.SetDownloadIO( reply ); + listener.SetSha1( &sha1 ); thread_id thread = request.Run(); wait_for_thread( thread, NULL ); + *(hash) = listener.GetHash(); + const BHttpResult& result = dynamic_cast( request.Result() ); return result.StatusCode(); } diff --git a/src/Util.h b/src/Util.h index 21621a8..a838c79 100644 --- a/src/Util.h +++ b/src/Util.h @@ -14,8 +14,7 @@ BString dateTo3339String ( BDateTime ); bool withinDateRange ( BDateTime, BDateTime, BDateTime ); +int32 webFetch ( BUrl, BDataIO*, BString* ); int32 webFetch ( BUrl, BDataIO* ); -int32 webFetch ( char*, BDataIO* ); - #endif