Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
rss2email
rss2email-
rss2email.log
42 changes: 22 additions & 20 deletions config_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,26 +95,28 @@ to appear before a URL.
Per-Feed Configuration Options
------------------------------

Key | Purpose
--------------+--------------------------------------------------------------
delay | The amount of time to sleep before retrying a failed HTTP-fetch
| in seconds - "retry" configures the number of attempts to be made.
exclude | Exclude any item which matches the given regular-expression.
exclude-title | Exclude any item with a title matching the given regular-expression.
exclude-older | Exclude any items whose publication date is older than the
| specified number of days.
frequency | How frequently to poll this feed, in minutes.
include | Include only items which match the given regular-expression.
include-title | Include only items with a title matching the given regular-expression.
insecure | Ignore TLS failures when fetching feeds over https.
| Disable the checks by setting this value to "true", or "yes".
notify | Comma-delimited list of emails to send notifications to (if set,
| replaces the emails specified in the cron/daemon command-line).
retry | The maximum number of times to retry a failing HTTP-fetch.
sleep | Sleep the specified number of seconds, before making the request.
tag | Setup a tag for this feed, which can be accessed in the template.
template | The path to a feed-specific email template to use.
user-agent | Configure a specific User-Agent when making HTTP requests.
Key | Purpose
-----------------+--------------------------------------------------------------
delay | The amount of time to sleep before retrying a failed HTTP-fetch
| in seconds - "retry" configures the number of attempts to be made.
exclude | Exclude any item which matches the given regular-expression.
exclude-category | Exclude any item with a category matching the given regular-expression.
exclude-title | Exclude any item with a title matching the given regular-expression.
exclude-older | Exclude any items whose publication date is older than the
| specified number of days.
frequency | How frequently to poll this feed, in minutes.
include | Include only items which match the given regular-expression.
include-category | Include only items with a category matching the given regular-expression.
include-title | Include only items with a title matching the given regular-expression.
insecure | Ignore TLS failures when fetching feeds over https.
| Disable the checks by setting this value to "true", or "yes".
notify | Comma-delimited list of emails to send notifications to (if set,
| replaces the emails specified in the cron/daemon command-line).
retry | The maximum number of times to retry a failing HTTP-fetch.
sleep | Sleep the specified number of seconds, before making the request.
tag | Setup a tag for this feed, which can be accessed in the template.
template | The path to a feed-specific email template to use.
user-agent | Configure a specific User-Agent when making HTTP requests.


Polling Frequency
Expand Down
78 changes: 77 additions & 1 deletion processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func New() (*Processor, error) {

// Ensure we have a state-directory.
dir := state.Directory()
errM := os.MkdirAll(dir, 0666)
errM := os.MkdirAll(dir, 0755)
if errM != nil {
return nil, errM
}
Expand Down Expand Up @@ -406,6 +406,9 @@ func (p *Processor) processFeed(entry configfile.Feed, recipients []string) erro
// check for age (exclude-older)
skip = skip || p.shouldSkipOlder(logger, entry, item.Published)

// check for category filtering
skip = skip || p.shouldSkipCategory(logger, entry, item.Categories)

if !skip {
// Convert the content to text.
text := html2text.HTML2Text(content)
Expand Down Expand Up @@ -832,6 +835,79 @@ func (p *Processor) shouldSkipOlder(logger *slog.Logger, config configfile.Feed,
return false
}

// shouldSkipCategory returns true if this entry should be skipped based on category.
//
// Our configuration file allows a series of per-feed configuration items,
// and those allow skipping the entry by regular expression matches on
// the item categories.
//
// If `exclude-category` is set and any category matches, the item is skipped.
// If `include-category` is set and no category matches, the item is skipped.
func (p *Processor) shouldSkipCategory(logger *slog.Logger, config configfile.Feed, categories []string) bool {

// Walk over the options to see if there are any exclude-category options
// specified.
for _, opt := range config.Options {
if opt.Name == "exclude-category" {
for _, cat := range categories {
match, err := regexp.MatchString(opt.Value, cat)
if err != nil {
logger.Warn("invalid regular expression in exclude-category",
slog.String("exclude-category", opt.Value),
slog.String("error", err.Error()))
continue
}
if match {
logger.Debug("excluding entry due to exclude-category",
slog.String("exclude-category", opt.Value),
slog.String("matched-category", cat))
return true
}
}
}
}

// If we have an include-category setting then we must skip the entry unless
// at least one category matches.
//
// There might be more than one include-category setting and a match against
// any will suffice.
includeCategory := false

for _, opt := range config.Options {
if opt.Name == "include-category" {
includeCategory = true

for _, cat := range categories {
match, err := regexp.MatchString(opt.Value, cat)
if err != nil {
logger.Warn("invalid regular expression in include-category",
slog.String("include-category", opt.Value),
slog.String("error", err.Error()))
continue
}
if match {
logger.Debug("including entry due to 'include-category'",
slog.String("include-category", opt.Value),
slog.String("matched-category", cat))
return false
}
}
}
}

// If we had at least one "include-category" setting and we reach here
// then we had no match.
if includeCategory {
logger.Debug("excluding entry due to 'include-category' (no match)",
slog.String("categories", strings.Join(categories, ", ")))
return true
}

// False: Do not skip/ignore this entry
return false
}

// SetSendEmail updates the state of this object, when the send-flag
// is false zero emails are generated.
func (p *Processor) SetSendEmail(state bool) {
Expand Down
171 changes: 171 additions & 0 deletions processor/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,16 @@ func init() {
logger = slog.New(handler)
}

// setupTestHome sets up a temporary HOME directory for tests
// This ensures tests can create the .rss2email directory and state.db
func setupTestHome(t *testing.T) {
t.Helper()
tmpDir := t.TempDir()
t.Setenv("HOME", tmpDir)
}

func TestSendEmail(t *testing.T) {
setupTestHome(t)

p, err := New()

Expand All @@ -51,6 +60,7 @@ func TestSendEmail(t *testing.T) {
}

func TestVerbose(t *testing.T) {
setupTestHome(t)

p, err := New()

Expand All @@ -63,6 +73,7 @@ func TestVerbose(t *testing.T) {

// TestSkipExclude ensures that we can exclude items by regexp
func TestSkipExclude(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Expand Down Expand Up @@ -102,6 +113,7 @@ func TestSkipExclude(t *testing.T) {

// TestSkipInclude ensures that we can exclude items by regexp
func TestSkipInclude(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Expand Down Expand Up @@ -140,6 +152,7 @@ func TestSkipInclude(t *testing.T) {

// TestSkipIncludeTitle ensures that we can exclude items by regexp
func TestSkipIncludeTitle(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Expand Down Expand Up @@ -204,6 +217,7 @@ func TestSkipIncludeTitle(t *testing.T) {

// TestSkipOlder ensures that we can exclude items by age
func TestSkipOlder(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Expand Down Expand Up @@ -246,3 +260,160 @@ func TestSkipOlder(t *testing.T) {
t.Fatalf("skipped age with no options!")
}
}

// TestSkipExcludeCategory ensures that we can exclude items by category regexp
func TestSkipExcludeCategory(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Options: []configfile.Option{
{Name: "exclude-category", Value: "(?i)sports"},
},
}

// Create the new processor
x, err := New()
if err != nil {
t.Fatalf("error creating processor %s", err.Error())
}
defer x.Close()

// Should skip because "Sports" matches "(?i)sports"
if !x.shouldSkipCategory(logger, feed, []string{"News", "Sports", "Entertainment"}) {
t.Fatalf("failed to skip entry by category regexp")
}

// Should not skip because no category matches "(?i)sports"
if x.shouldSkipCategory(logger, feed, []string{"News", "Entertainment"}) {
t.Fatalf("skipped entry that doesn't match category regexp")
}

// Empty categories should not be skipped
if x.shouldSkipCategory(logger, feed, []string{}) {
t.Fatalf("skipped entry with empty categories")
}

// With no options we're not going to skip
feed = configfile.Feed{
URL: "blah",
Options: []configfile.Option{},
}

if x.shouldSkipCategory(logger, feed, []string{"Sports", "News"}) {
t.Fatalf("skipped something with no options!")
}
}

// TestSkipIncludeCategory ensures that we can include items by category regexp
func TestSkipIncludeCategory(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Options: []configfile.Option{
{Name: "include-category", Value: "(?i)tech"},
},
}

// Create the new processor
x, err := New()
if err != nil {
t.Fatalf("error creating processor %s", err.Error())
}
defer x.Close()

// Should not skip because "Technology" matches "(?i)tech"
if x.shouldSkipCategory(logger, feed, []string{"Technology", "News"}) {
t.Fatalf("skipped entry that should be included by category")
}

// Should skip because no category matches "(?i)tech"
if !x.shouldSkipCategory(logger, feed, []string{"Sports", "Entertainment"}) {
t.Fatalf("failed to skip entry that doesn't match include-category")
}

// With no options we're not going to skip
feed = configfile.Feed{
URL: "blah",
Options: []configfile.Option{},
}

if x.shouldSkipCategory(logger, feed, []string{"Sports", "News"}) {
t.Fatalf("skipped something with no options!")
}
}

// TestSkipMultipleIncludeCategory ensures that multiple include-category options work
func TestSkipMultipleIncludeCategory(t *testing.T) {
setupTestHome(t)

feed := configfile.Feed{
URL: "blah",
Options: []configfile.Option{
{Name: "include-category", Value: "(?i)tech"},
{Name: "include-category", Value: "(?i)programming"},
},
}

// Create the new processor
x, err := New()
if err != nil {
t.Fatalf("error creating processor %s", err.Error())
}
defer x.Close()

// Should not skip because "Programming" matches second include-category
if x.shouldSkipCategory(logger, feed, []string{"Programming"}) {
t.Fatalf("skipped entry that should be included by second include-category")
}

// Should not skip because "Technology" matches first include-category
if x.shouldSkipCategory(logger, feed, []string{"Technology"}) {
t.Fatalf("skipped entry that should be included by first include-category")
}

// Should skip because no category matches any include-category
if !x.shouldSkipCategory(logger, feed, []string{"Sports", "Entertainment"}) {
t.Fatalf("failed to skip entry that doesn't match any include-category")
}
}

// TestSkipInvalidCategoryRegex ensures that invalid regex patterns don't cause panics
func TestSkipInvalidCategoryRegex(t *testing.T) {
setupTestHome(t)

// Test with invalid regex in exclude-category
feed := configfile.Feed{
URL: "blah",
Options: []configfile.Option{
{Name: "exclude-category", Value: "[invalid"},
},
}

// Create the new processor
x, err := New()
if err != nil {
t.Fatalf("error creating processor %s", err.Error())
}
defer x.Close()

// Should not panic and should not skip (invalid regex is logged as warning)
if x.shouldSkipCategory(logger, feed, []string{"Sports", "Entertainment"}) {
t.Fatalf("skipped entry with invalid regex pattern")
}

// Test with invalid regex in include-category
feed = configfile.Feed{
URL: "blah",
Options: []configfile.Option{
{Name: "include-category", Value: "[invalid"},
},
}

// Should skip because include-category was specified but none matched
// (invalid regex fails to match)
if !x.shouldSkipCategory(logger, feed, []string{"Sports"}) {
t.Fatalf("failed to skip entry when include-category has invalid regex")
}
}
Loading