diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index 7fc1885..0000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(make test:*)", - "Bash(grep:*)", - "Bash(sed:*)", - "Bash(git add:*)", - "Bash(go test:*)", - "Bash(make lint)", - "Bash(git commit:*)", - "Bash(git push:*)", - "Bash(curl:*)", - "Bash(cat:*)", - "Bash(jq:*)", - "Bash(timeout:*)", - "Bash(go run:*)", - "Bash(rm:*)" - ], - "deny": [] - } -} \ No newline at end of file diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 2371aec..b5d5b26 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,7 +9,7 @@ on: jobs: test: name: Test - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 # Service containers to run with `test` services: @@ -32,7 +32,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: 1.21 @@ -46,19 +46,18 @@ jobs: golangci-lint: name: Lint - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: 1.21 - name: Lint - uses: golangci/golangci-lint-action@v4 + uses: golangci/golangci-lint-action@v8 with: - version: v1.57 - skip-go-installation: true + version: v2.1 diff --git a/.gitignore b/.gitignore index 9e0dbe5..991f4f7 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ lesswrong-bot # vendor/ .env + +.claude/settings.local.json diff --git a/.golangci.yml b/.golangci.yml index 2d2405c..fd872b9 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,6 +1,27 @@ +version: "2" run: build-tags: - integration - skip-dirs: - - mocks - - testdata +linters: + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + paths: + - third_party$ + - builtin$ + - examples$ + - mocks$ + - testdata$ +formatters: + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ + - mocks$ + - testdata$ diff --git a/Makefile b/Makefile index 59d2c87..13de7b7 100644 --- a/Makefile +++ b/Makefile @@ -8,13 +8,13 @@ BINARY_NAME=lesswrong-bot DOCKER_IMAGE=lesswrong-bot run: ## Run the application - export $(cat .env | xargs); go run . + env $$(cat .env | xargs) go run . test: ## Run tests with race detection go test -race ./... test-integration: ## Run integration tests - export $(cat .env.test | xargs); go test -race -tags=integration ./... + env $$(cat .env | xargs) go test -race -tags=integration ./... test-coverage: ## Run tests with coverage go test -race -coverprofile=coverage.out ./... diff --git a/bot/bot.go b/bot/bot.go index 5cd1ef6..c526ff5 100644 --- a/bot/bot.go +++ b/bot/bot.go @@ -3,16 +3,17 @@ package bot import ( "context" "fmt" - "io" "log" "math/rand" "net/http" - "time" + "strings" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api" "github.com/ndrewnee/lesswrong-bot/config" + "github.com/ndrewnee/lesswrong-bot/interfaces" "github.com/ndrewnee/lesswrong-bot/models" + "github.com/ndrewnee/lesswrong-bot/providers" "github.com/ndrewnee/lesswrong-bot/storage/memory" ) @@ -45,30 +46,21 @@ var mainKeyboard = tgbotapi.NewReplyKeyboard( type ( Bot struct { - config config.Config - botAPI *tgbotapi.BotAPI - httpClient HTTPClient - storage Storage - randomInt func(n int) int + config config.Config + botAPI *tgbotapi.BotAPI + httpClient interfaces.HTTPClient + storage interfaces.Storage + randomInt func(n int) int + providerFactory *providers.ProviderFactory } Options struct { Config config.Config BotAPI *tgbotapi.BotAPI - HTTPClient HTTPClient - Storage Storage + HTTPClient interfaces.HTTPClient + Storage interfaces.Storage RandomInt func(n int) int } - - HTTPClient interface { - Get(ctx context.Context, uri string) (*http.Response, error) - Post(ctx context.Context, url, contentType string, body io.Reader) (*http.Response, error) - } - - Storage interface { - Get(ctx context.Context, key string) (string, error) - Set(ctx context.Context, key, value string, expire time.Duration) error - } ) func New(options ...Options) (*Bot, error) { @@ -106,43 +98,58 @@ func New(options ...Options) (*Bot, error) { opts.RandomInt = rand.Intn } + providerFactory := providers.NewProviderFactory( + opts.Storage, + opts.HTTPClient, + int(opts.Config.CacheExpire.Seconds()), + opts.RandomInt, + ) + return &Bot{ - botAPI: opts.BotAPI, - config: opts.Config, - httpClient: opts.HTTPClient, - storage: opts.Storage, - randomInt: opts.RandomInt, + botAPI: opts.BotAPI, + config: opts.Config, + httpClient: opts.HTTPClient, + storage: opts.Storage, + randomInt: opts.RandomInt, + providerFactory: providerFactory, }, nil } func (b *Bot) GetUpdatesChan() (tgbotapi.UpdatesChannel, error) { if b.config.Webhook { - webhook := tgbotapi.NewWebhook(b.config.WebhookHost + "/" + b.botAPI.Token) + return b.setupWebhook() + } + return b.setupPolling() +} - if _, err := b.botAPI.SetWebhook(webhook); err != nil { - return nil, fmt.Errorf("set webhook failed: %s", err) - } +func (b *Bot) setupWebhook() (tgbotapi.UpdatesChannel, error) { + webhook := tgbotapi.NewWebhook(b.config.WebhookHost + "/" + b.botAPI.Token) - info, err := b.botAPI.GetWebhookInfo() - if err != nil { - return nil, fmt.Errorf("get webhook info failed: %s", err) - } + if _, err := b.botAPI.SetWebhook(webhook); err != nil { + return nil, fmt.Errorf("set webhook failed: %s", err) + } - if info.LastErrorDate != 0 { - log.Printf("[ERROR] Telegram callback failed: %s", info.LastErrorMessage) - } + info, err := b.botAPI.GetWebhookInfo() + if err != nil { + return nil, fmt.Errorf("get webhook info failed: %s", err) + } - updates := b.botAPI.ListenForWebhook("/" + b.botAPI.Token) + if info.LastErrorDate != 0 { + log.Printf("[ERROR] Telegram callback failed: %s", info.LastErrorMessage) + } - go func() { - if err := http.ListenAndServe(b.config.Address, nil); err != nil { - log.Printf("[ERROR] Listen and serve failed: %s", err) - } - }() + updates := b.botAPI.ListenForWebhook("/" + b.botAPI.Token) - return updates, nil - } + go func() { + if err := http.ListenAndServe(b.config.Address, nil); err != nil { + log.Printf("[ERROR] Listen and serve failed: %s", err) + } + }() + + return updates, nil +} +func (b *Bot) setupPolling() (tgbotapi.UpdatesChannel, error) { response, err := b.botAPI.RemoveWebhook() if err != nil { return nil, fmt.Errorf("removed webhook failed: %s", err) @@ -165,85 +172,141 @@ func (b *Bot) GetUpdatesChan() (tgbotapi.UpdatesChannel, error) { func (b *Bot) MessageHandler(ctx context.Context, update tgbotapi.Update) (tgbotapi.Message, error) { if update.CallbackQuery != nil { - text, _, err := b.ChangeSource(ctx, update.CallbackQuery.From.ID, models.Source(update.CallbackQuery.Data)) - if err != nil { - log.Printf("[ERROR] Command /source failed: %s", err) - text = "Change source failed" - } - - if _, err := b.botAPI.AnswerCallbackQuery(tgbotapi.NewCallback(update.CallbackQuery.ID, "")); err != nil { - return tgbotapi.Message{}, fmt.Errorf("answer callback failed: %s", err) - } - - msg := tgbotapi.NewMessage(update.CallbackQuery.Message.Chat.ID, text) - msg.ParseMode = tgbotapi.ModeMarkdown - msg.DisableWebPagePreview = true - - sent, err := b.botAPI.Send(msg) - if err != nil { - return tgbotapi.Message{}, fmt.Errorf("send message failed: %s. Text: \n%s", err, msg.Text) - } - - return sent, nil + return b.handleCallbackQuery(ctx, update.CallbackQuery) } if update.Message == nil { return tgbotapi.Message{}, nil } - if update.Message.From != nil { - log.Printf("[%s] %s", update.Message.From.UserName, update.Message.Text) + return b.handleMessage(ctx, update.Message) +} + +func (b *Bot) handleCallbackQuery(ctx context.Context, callbackQuery *tgbotapi.CallbackQuery) (tgbotapi.Message, error) { + text, _, err := b.ChangeSource(ctx, callbackQuery.From.ID, models.Source(callbackQuery.Data)) + if err != nil { + text = b.handleCommandError("source", err, "Change source failed") } - if update.Message.Chat == nil { - return tgbotapi.Message{}, nil + if _, err := b.botAPI.AnswerCallbackQuery(tgbotapi.NewCallback(callbackQuery.ID, "")); err != nil { + return tgbotapi.Message{}, fmt.Errorf("answer callback failed: %s", err) } - msg := tgbotapi.NewMessage(update.Message.Chat.ID, "") + msg := tgbotapi.NewMessage(callbackQuery.Message.Chat.ID, text) msg.ParseMode = tgbotapi.ModeMarkdown msg.DisableWebPagePreview = true - switch update.Message.Command() { + return b.sendMessage(msg) +} + +func (b *Bot) handleMessage(ctx context.Context, message *tgbotapi.Message) (tgbotapi.Message, error) { + if message.From != nil { + log.Printf("[%s] %s", message.From.UserName, message.Text) + } + + if message.Chat == nil { + return tgbotapi.Message{}, nil + } + + msg := b.createBaseMessage(message.Chat.ID) + + switch message.Command() { case "start", "help": - msg.ReplyMarkup = mainKeyboard - msg.Text = MessageHelp + return b.handleHelpCommand(msg) case "top": - text, err := b.TopPosts(ctx, update.Message.From.ID) - if err != nil { - log.Printf("[ERROR] Command /top failed: %s", err) - text = "Top posts not found" - } - - msg.Text = text + return b.handleTopCommand(ctx, msg, message.From.ID) case "random": - text, err := b.RandomPost(ctx, update.Message.From.ID) - if err != nil { - log.Printf("[ERROR] Command /random failed: %s", err) - text = "Random post not found" - } - - msg.Text = text + return b.handleRandomCommand(ctx, msg, message.From.ID) case "source": - text, keyboard, err := b.ChangeSource(ctx, update.Message.From.ID, models.Source(update.Message.CommandArguments())) - if err != nil { - log.Printf("[ERROR] Command /source failed: %s", err) - text = "Change source failed" - } - - msg.Text = text - msg.ReplyMarkup = keyboard + return b.handleSourceCommand(ctx, msg, message.From.ID, message.CommandArguments()) default: - msg.Text = "I don't know that command" + return b.handleUnknownCommand(msg) } +} +func (b *Bot) createBaseMessage(chatID int64) tgbotapi.MessageConfig { + msg := tgbotapi.NewMessage(chatID, "") + msg.ParseMode = tgbotapi.ModeMarkdown + msg.DisableWebPagePreview = true + return msg +} + +func (b *Bot) handleHelpCommand(msg tgbotapi.MessageConfig) (tgbotapi.Message, error) { + msg.ReplyMarkup = mainKeyboard + msg.Text = MessageHelp + return b.sendMessage(msg) +} + +func (b *Bot) handleTopCommand(ctx context.Context, msg tgbotapi.MessageConfig, userID int) (tgbotapi.Message, error) { + text, err := b.TopPosts(ctx, userID) + if err != nil { + text = b.handleCommandError("top", err, "Top posts not found") + } + msg.Text = text + return b.sendMessage(msg) +} + +func (b *Bot) handleRandomCommand(ctx context.Context, msg tgbotapi.MessageConfig, userID int) (tgbotapi.Message, error) { + text, err := b.RandomPost(ctx, userID) + if err != nil { + text = b.handleCommandError("random", err, "Random post not found") + } + msg.Text = text + return b.sendMessage(msg) +} + +func (b *Bot) handleSourceCommand(ctx context.Context, msg tgbotapi.MessageConfig, userID int, args string) (tgbotapi.Message, error) { + text, keyboard, err := b.ChangeSource(ctx, userID, models.Source(args)) + if err != nil { + text = b.handleCommandError("source", err, "Change source failed") + } + msg.Text = text + msg.ReplyMarkup = keyboard + return b.sendMessage(msg) +} + +func (b *Bot) handleUnknownCommand(msg tgbotapi.MessageConfig) (tgbotapi.Message, error) { + msg.Text = "I don't know that command" + return b.sendMessage(msg) +} + +func (b *Bot) sendMessage(msg tgbotapi.MessageConfig) (tgbotapi.Message, error) { sent, err := b.botAPI.Send(msg) if err != nil { + // If it's a markdown parsing error and we're using markdown mode, try as plain text + if strings.Contains(err.Error(), "can't parse entities") && msg.ParseMode == tgbotapi.ModeMarkdown { + log.Printf("[ERROR] Markdown parsing failed, retrying as plain text: %s", err) + msg.ParseMode = "" + sent, err = b.botAPI.Send(msg) + if err == nil { + return sent, nil + } + } + errMsg := msg errMsg.Text = "Oops, something went wrong!" + errMsg.ParseMode = "" _, _ = b.botAPI.Send(errMsg) - return tgbotapi.Message{}, fmt.Errorf("send message failed: %s. Text: \n%s", err, msg.Text) } - return sent, nil } + +func (b *Bot) getUserSource(ctx context.Context, userID int) models.Source { + key := fmt.Sprintf("source:%d", userID) + source, err := b.storage.Get(ctx, key) + if err != nil { + log.Printf("[ERROR] Get source failed: %s, key: %s", err, key) + } + + sourceModel := models.Source(source) + if !sourceModel.IsValid() { + return models.SourceLesswrongRu + } + return sourceModel +} + +func (b *Bot) handleCommandError(command string, err error, fallbackMessage string) string { + log.Printf("[ERROR] Command /%s failed: %s", command, err) + return fallbackMessage +} diff --git a/bot/bot_integration_test.go b/bot/bot_integration_test.go index 0cf3efc..a463b37 100644 --- a/bot/bot_integration_test.go +++ b/bot/bot_integration_test.go @@ -6,7 +6,6 @@ import ( "context" "os" "strconv" - "strings" "testing" "time" @@ -14,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/ndrewnee/lesswrong-bot/config" + "github.com/ndrewnee/lesswrong-bot/interfaces" "github.com/ndrewnee/lesswrong-bot/storage/memory" "github.com/ndrewnee/lesswrong-bot/storage/redis" ) @@ -27,7 +27,7 @@ func setupTestBot(t *testing.T) (*Bot, int64, int) { require.NoError(t, err, "Env var TEST_USER_ID should be set") config := config.Parse() - var storage Storage = memory.NewStorage() + var storage interfaces.Storage = memory.NewStorage() if os.Getenv("TEST_USE_REDIS") == "true" { storage, err = redis.NewStorage(config.RedisURL) @@ -244,7 +244,7 @@ func TestBot_MessageHandler_ShouldGetTopPostsFromSlateStarCodex(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "🏆 Top posts from https://slatestarcodex.com")) + require.Contains(t, msg.Text, "🏆 Top posts from https://slatestarcodex.com") } func TestBot_MessageHandler_ShouldGetRandomPostFromSlateStarCodex(t *testing.T) { @@ -260,7 +260,7 @@ func TestBot_MessageHandler_ShouldGetRandomPostFromSlateStarCodex(t *testing.T) msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "📝")) + require.Contains(t, msg.Text, "📝") } func TestBot_MessageHandler_ShouldChangeSourceToAstralCodexTen(t *testing.T) { @@ -286,7 +286,7 @@ func TestBot_MessageHandler_ShouldGetTopPostsFromAstralCodexTen(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "🏆 Top posts from https://astralcodexten.substack.com")) + require.Contains(t, msg.Text, "🏆 Top posts from https://astralcodexten.substack.com") } func TestBot_MessageHandler_ShouldGetRandomPostFromAstralCodexTen(t *testing.T) { @@ -302,7 +302,7 @@ func TestBot_MessageHandler_ShouldGetRandomPostFromAstralCodexTen(t *testing.T) msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "📝")) + require.Contains(t, msg.Text, "📝") } func TestBot_MessageHandler_ShouldChangeSourceToLessWrongRu(t *testing.T) { @@ -328,7 +328,7 @@ func TestBot_MessageHandler_ShouldGetTopPostsFromLessWrongRu(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "🏆 Random posts from https://lesswrong.ru")) + require.Contains(t, msg.Text, "🏆 Random posts from https://lesswrong.ru") } func TestBot_MessageHandler_ShouldGetRandomPostFromLessWrongRu(t *testing.T) { @@ -344,7 +344,7 @@ func TestBot_MessageHandler_ShouldGetRandomPostFromLessWrongRu(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "📝")) + require.Contains(t, msg.Text, "📝") } func TestBot_MessageHandler_ShouldChangeSourceToLessWrongCom(t *testing.T) { @@ -370,7 +370,7 @@ func TestBot_MessageHandler_ShouldGetTopPostsFromLessWrongCom(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "🏆 Top posts this week from https://lesswrong.com")) + require.Contains(t, msg.Text, "🏆 Top posts from https://www.lesswrong.com") } func TestBot_MessageHandler_ShouldGetRandomPostFromLessWrongCom(t *testing.T) { @@ -386,5 +386,5 @@ func TestBot_MessageHandler_ShouldGetRandomPostFromLessWrongCom(t *testing.T) { msg, err := tgbot.MessageHandler(context.TODO(), update) require.NoError(t, err) - require.True(t, strings.HasPrefix(msg.Text, "📝")) + require.Contains(t, msg.Text, "📝") } diff --git a/bot/random.go b/bot/random.go index 9214141..6d3690c 100644 --- a/bot/random.go +++ b/bot/random.go @@ -1,331 +1,25 @@ package bot import ( - "bytes" "context" - "encoding/json" - "fmt" - "log" - "strings" - md "github.com/JohannesKaufmann/html-to-markdown" - "github.com/gocolly/colly" - - "github.com/ndrewnee/lesswrong-bot/models" + "github.com/ndrewnee/lesswrong-bot/formatter" ) func (b *Bot) RandomPost(ctx context.Context, userID int) (string, error) { - key := fmt.Sprintf("source:%d", userID) - - source, err := b.storage.Get(ctx, key) - if err != nil { - log.Printf("[ERROR] Get source failed: %s, key: %s", err, key) - } - - switch models.Source(source) { - case models.SourceLesswrongRu: - return b.randomLesswrongRu(ctx) - case models.SourceSlate: - return b.randomSlate(ctx) - case models.SourceAstral: - return b.randomAstral(ctx) - case models.SourceLesswrong: - return b.randomLesswrong(ctx) - default: - return b.randomLesswrongRu(ctx) - } -} - -func (b *Bot) randomSlate(ctx context.Context) (string, error) { - postsCached, err := b.storage.Get(ctx, "posts:slatestarcodex") - if err != nil { - return "", fmt.Errorf("get slatestarcodex cached posts failed: %s", err) - } - - var posts []models.Post - - if postsCached != "" { - if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { - return "", fmt.Errorf("unmarshal slatestarcodex cached posts failed: %s", err) - } - } - - // Load posts for the first time. - if len(posts) == 0 { - archivesCollector := colly.NewCollector() - - archivesCollector.OnHTML("a[href][rel=bookmark]", func(e *colly.HTMLElement) { - posts = append(posts, models.Post{ - Title: e.Text, - URL: e.Attr("href"), - }) - }) - - if err := archivesCollector.Visit("https://slatestarcodex.com/archives/"); err != nil { - return "", fmt.Errorf("get slatestarcodex posts failed: %s", err) - } - - postsCache, err := json.Marshal(posts) - if err != nil { - return "", fmt.Errorf("marshal slatestarcodex posts failed: %s", err) - } - - if err := b.storage.Set(ctx, "posts:slatestarcodex", string(postsCache), b.config.CacheExpire); err != nil { - return "", fmt.Errorf("cache slatestarcodex posts failed: %s", err) - } - } - - if len(posts) == 0 { - return "", fmt.Errorf("slatestarcodex posts not found") - } - - i := b.randomInt(len(posts)) - post := posts[i] - - postCollector := colly.NewCollector() - - postCollector.OnHTML("div.pjgm-postcontent", func(e *colly.HTMLElement) { - post.HTML, _ = e.DOM.Html() - }) - - if err := postCollector.Visit(post.URL); err != nil { - return "", fmt.Errorf("get slatestarcodex random post failed: %s", err) - } - - return b.postToMarkdown(post, md.NewConverter(models.DomainSlate, true, nil), false) -} - -func (b *Bot) randomAstral(ctx context.Context) (string, error) { - postsCached, err := b.storage.Get(ctx, "posts:astralcodexten") - if err != nil { - return "", fmt.Errorf("get astralcodexten cached posts failed: %s", err) - } - - var posts []models.Post - - if postsCached != "" { - if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { - return "", fmt.Errorf("unmarshal astralcodexten cached posts failed: %s", err) - } - } - - // Load posts for the first time. - if len(posts) == 0 { - // As substack limits list to 12 posts in one request we fetch all posts using offset. - for offset := 0; true; offset += models.DefaultLimit { - uri := fmt.Sprintf("https://astralcodexten.substack.com/api/v1/archive?sort=new&limit=%d&offset=%d", - models.DefaultLimit, - offset, - ) - - httpResponse, err := b.httpClient.Get(ctx, uri) - if err != nil { - log.Printf("[ERROR] Get astralcodexten posts failed: %s", err) - break - } - - var newPosts []models.AstralPost - - if err := b.handleResponse(httpResponse, &newPosts); err != nil { - log.Printf("[ERROR] handle astralcodexten posts response: %s", err) - // If blocked by Cloudflare (403) or rate limited (429) and we have no posts yet, return fallback - if (httpResponse.StatusCode == 403 || httpResponse.StatusCode == 429) && len(posts) == 0 { - fallbackPost := models.Post{ - Title: "Bounded Distrust", - URL: "https://astralcodexten.substack.com/p/bounded-distrust", - HTML: "
Content temporarily unavailable due to API restrictions. Please visit the link above to read the full post.
", - } - return b.postToMarkdown(fallbackPost, md.NewConverter(models.DomainAstral, true, nil), false) - } - break - } - - if len(newPosts) == 0 { - break - } - - for _, astralPost := range newPosts { - if astralPost.Audience != "only_paid" { - posts = append(posts, astralPost.AsPost()) - } - } - } - - postsCache, err := json.Marshal(posts) - if err != nil { - return "", fmt.Errorf("marshal astralcodexten posts failed: %s", err) - } - - if err := b.storage.Set(ctx, "posts:astralcodexten", string(postsCache), b.config.CacheExpire); err != nil { - return "", fmt.Errorf("cache astralcodexten posts failed: %s", err) - } - } - - if len(posts) == 0 { - return "", fmt.Errorf("astralcodexten posts not found") - } - - i := b.randomInt(len(posts)) - post := posts[i] - - httpResponse, err := b.httpClient.Get(ctx, "https://astralcodexten.substack.com/api/v1/posts/"+post.Slug) - if err != nil { - return "", fmt.Errorf("get astralcodexten random post failed: %s", err) - } - - var astralPost models.AstralPost - - if err := b.handleResponse(httpResponse, &astralPost); err != nil { - // Handle Cloudflare blocking (403) or rate limiting (429) gracefully - return a basic post with available info - if httpResponse.StatusCode == 403 || httpResponse.StatusCode == 429 { - fallbackPost := models.Post{ - Title: post.Title, - URL: post.URL, - HTML: "Content temporarily unavailable due to API restrictions. Please visit the link above to read the full post.
", - } - return b.postToMarkdown(fallbackPost, md.NewConverter(models.DomainAstral, true, nil), false) - } - return "", fmt.Errorf("handle astralcodexten post response: %s", err) - } - - return b.postToMarkdown(astralPost.AsPost(), md.NewConverter(models.DomainAstral, true, nil), false) -} - -func (b *Bot) randomLesswrongRu(ctx context.Context) (string, error) { - postsCached, err := b.storage.Get(ctx, "posts:lesswrong.ru") - if err != nil { - return "", fmt.Errorf("get lesswrong.ru cached posts failed: %s", err) - } - - var posts []models.Post - - if postsCached != "" { - if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { - return "", fmt.Errorf("unmarshal lesswrong.ru cached posts failed: %s", err) - } - } - - // Load posts for the first time. - if len(posts) == 0 { - postsCollector := colly.NewCollector() - - postsCollector.OnHTML("li.leaf.menu-depth-3,li.leaf.menu-depth-4", func(e *colly.HTMLElement) { - posts = append(posts, models.Post{ - Title: e.Text, - URL: e.Request.AbsoluteURL(e.ChildAttr("a", "href")), - }) - }) - - if err := postsCollector.Visit("https://lesswrong.ru/w"); err != nil { - return "", fmt.Errorf("get lesswrong.ru posts failed: %s", err) - } - - postsCache, err := json.Marshal(posts) - if err != nil { - return "", fmt.Errorf("marshal lesswrong.ru posts failed: %s", err) - } - - if err := b.storage.Set(ctx, "posts:lesswrong.ru", string(postsCache), b.config.CacheExpire); err != nil { - return "", fmt.Errorf("cache lesswrong.ru posts failed: %s", err) - } - } - - if len(posts) == 0 { - return "", fmt.Errorf("lesswrong.ru posts not found") - } - - i := b.randomInt(len(posts)) - post := posts[i] - - postCollector := colly.NewCollector() - - postCollector.OnHTML("div.tex2jax", func(e *colly.HTMLElement) { - post.HTML, _ = e.DOM.Html() - }) - - if err := postCollector.Visit(post.URL); err != nil { - return "", fmt.Errorf("get lesswrong.ru random post failed: %s", err) - } - - return b.postToMarkdown(post, md.NewConverter(models.DomainLesswrongRu, true, nil), true) -} - -func (b *Bot) randomLesswrong(ctx context.Context) (string, error) { - query := fmt.Sprintf(`{ - posts(input: {terms: {view: "new", limit: 1, meta: null, offset: %d}}) { - results { - title - pageUrl - htmlBody - } - } - }`, b.randomInt(models.LesswrongPostsMaxCount)) - - request, err := json.Marshal(map[string]string{"query": query}) - if err != nil { - return "", fmt.Errorf("marshal request for lesswrong.com random post failed: %s", err) - } + source := b.getUserSource(ctx, userID) - httpResponse, err := b.httpClient.Post(ctx, "https://www.lesswrong.com/graphql", "application/json", bytes.NewBuffer(request)) + provider := b.providerFactory.CreateProvider(source) + post, err := provider.GetRandomPost(ctx) if err != nil { - return "", fmt.Errorf("get lesswrong.com random post failed: %s", err) - } - - var response models.LesswrongResponse - - if err := b.handleResponse(httpResponse, &response); err != nil { - return "", fmt.Errorf("handle lesswrong.com random post response: %s", err) - } - - if len(response.Data.Posts.Results) == 0 { - return "", fmt.Errorf("lesswrong.com random post not found") + return "", err } - result := response.Data.Posts.Results[0] + converter := b.providerFactory.GetMarkdownConverter(source) + urlWithText := b.providerFactory.ShouldUseURLWithText(source) - return b.postToMarkdown(result.AsPost(), md.NewConverter(models.DomainLesswrong, true, nil), false) + formatter := formatter.NewMarkdownFormatter() + return formatter.FormatPost(post, converter, urlWithText) } -func (b *Bot) postToMarkdown(post models.Post, mdConverter *md.Converter, urlWithText bool) (string, error) { - markdownOrig, err := mdConverter.ConvertString(post.HTML) - if err != nil { - return "", fmt.Errorf("convert lesswrong.ru html to markdown failed: %s", err) - } - - markdown := markdownOrig - - // Cut post for preview mode. - if len(markdown) > models.PostMaxLength { - // Convert to runes to properly split between unicode symbols. - runes := []rune(markdown) - markdown = string(runes[:models.PostMaxLength]) - - // Truncate after next line end to not break markdown text. - rest := string(runes[models.PostMaxLength:]) - if n := strings.IndexByte(rest, '\n'); n != -1 { - markdown += rest[:n] - } else { - markdown = markdownOrig - } - - // Stupid hotfixes when markdown was cut in the middle. - markdown = strings.ReplaceAll(markdown, "* * *", "") - markdown = strings.ReplaceAll(markdown, "```", "") - } - - // Stupid hotfixes for some invalid markdowns. - markdown = strings.ReplaceAll(markdown, "[[", "[") - markdown = strings.ReplaceAll(markdown, "]]", "]") - markdown = strings.ReplaceAll(markdown, "![]", "[Image]") - markdown = strings.ReplaceAll(markdown, "_[", "") - markdown = strings.ReplaceAll(markdown, "]_", "") - - link := fmt.Sprintf("[%s](%s)", post.Title, post.URL) - postURL := post.URL - if urlWithText { - postURL = link - } - - return fmt.Sprintf("📝 %s\n\n%s\n\n%s", link, markdown, postURL), nil -} diff --git a/bot/random_integration_test.go b/bot/random_integration_test.go index 895062b..8f77cb0 100644 --- a/bot/random_integration_test.go +++ b/bot/random_integration_test.go @@ -17,6 +17,7 @@ import ( "github.com/ndrewnee/lesswrong-bot/bot/mocks" "github.com/ndrewnee/lesswrong-bot/models" + "github.com/ndrewnee/lesswrong-bot/providers" ) // Individual random post tests - exact same logic as original TestRandomPost @@ -127,16 +128,32 @@ func setupMockHTTPClient(t *testing.T) *mocks.HTTPClient { return httpClient } +func setupBotWithMockHTTPClient(t *testing.T, httpClient *mocks.HTTPClient) *Bot { + tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) + require.NoError(t, err) + + return tgbot +} + +func updateBotProviderFactory(tgbot *Bot, httpClient *mocks.HTTPClient) { + // Update the provider factory to use the same mock HTTP client and current randomInt + tgbot.providerFactory = providers.NewProviderFactory( + tgbot.storage, + httpClient, + int(tgbot.config.CacheExpire.Seconds()), + tgbot.randomInt, + ) +} + func TestRandomPost_ShouldGetRandomPostFromLessWrongRuWhenSourceNotSet(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 2 } + updateBotProviderFactory(tgbot, httpClient) got, err := tgbot.RandomPost(context.TODO(), userID) require.NoError(t, err) @@ -149,16 +166,15 @@ func TestRandomPost_ShouldGetRandomPostFromLessWrongRuWhenSourceNotSet(t *testin func TestRandomPost_ShouldGetRandomPostFromSlateStarCodex(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 0 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -172,16 +188,15 @@ func TestRandomPost_ShouldGetRandomPostFromSlateStarCodex(t *testing.T) { func TestRandomPost_ShouldGetRandomPostFromSlateStarCodexInvalidMarkdownCut(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 563 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -195,16 +210,15 @@ func TestRandomPost_ShouldGetRandomPostFromSlateStarCodexInvalidMarkdownCut(t *t func TestRandomPost_ShouldGetRandomPostFromSlateStarCodexImageFix(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 191 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceSlate.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -218,16 +232,15 @@ func TestRandomPost_ShouldGetRandomPostFromSlateStarCodexImageFix(t *testing.T) func TestRandomPost_ShouldGetRandomPostFromAstralCodexTen(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 0 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -241,16 +254,15 @@ func TestRandomPost_ShouldGetRandomPostFromAstralCodexTen(t *testing.T) { func TestRandomPost_ShouldGetRandomPostFromAstralCodexTenInvalidCut(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 1 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -264,16 +276,15 @@ func TestRandomPost_ShouldGetRandomPostFromAstralCodexTenInvalidCut(t *testing.T func TestRandomPost_ShouldGetRandomPostFromAstralCodexTenLinkBug(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 2 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceAstral.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -287,16 +298,15 @@ func TestRandomPost_ShouldGetRandomPostFromAstralCodexTenLinkBug(t *testing.T) { func TestRandomPost_ShouldGetRandomPostFromLessWrongRuInvalidCut(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 1 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceLesswrongRu.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceLesswrongRu.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -310,16 +320,15 @@ func TestRandomPost_ShouldGetRandomPostFromLessWrongRuInvalidCut(t *testing.T) { func TestRandomPost_ShouldGetRandomPostFromLessWrongCom(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 0 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceLesswrong.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceLesswrong.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) @@ -333,16 +342,15 @@ func TestRandomPost_ShouldGetRandomPostFromLessWrongCom(t *testing.T) { func TestRandomPost_ShouldGetRandomPostFromLessWrongComInvalidDomain(t *testing.T) { const userID = 2 httpClient := setupMockHTTPClient(t) - - tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) - require.NoError(t, err) + tgbot := setupBotWithMockHTTPClient(t, httpClient) tgbot.randomInt = func(n int) int { return 1 } + updateBotProviderFactory(tgbot, httpClient) key := fmt.Sprintf("source:%d", userID) - err = tgbot.storage.Set(context.TODO(), key, models.SourceLesswrong.Value(), 0) + err := tgbot.storage.Set(context.TODO(), key, models.SourceLesswrong.Value(), 0) require.NoError(t, err) got, err := tgbot.RandomPost(context.TODO(), userID) diff --git a/bot/testdata/astral_top_posts.md b/bot/testdata/astral_top_posts.md index f078789..01bd7c2 100644 --- a/bot/testdata/astral_top_posts.md +++ b/bot/testdata/astral_top_posts.md @@ -1,36 +1,12 @@ 🏆 Top posts from https://astralcodexten.substack.com 1. [Statement on New York Times Article](https://astralcodexten.substack.com/p/statement-on-new-york-times-article) - 2. [Still Alive](https://astralcodexten.substack.com/p/still-alive) - - You just keep on trying till you run out of cake - 3. [Book Review: The Cult Of Smart](https://astralcodexten.substack.com/p/book-review-the-cult-of-smart) - - Summary and commentary on The Cult Of Smart by Fredrik DeBoer - +4. [Hidden Open Thread 157.5](https://astralcodexten.substack.com/p/hidden-open-thread-1575) 5. [A Modest Proposal For Republicans: Use The Word "Class"](https://astralcodexten.substack.com/p/a-modest-proposal-for-republicans) - - Pivot from mindless populist rage to a thoughtful campaign to fight classism. - 6. [WebMD, And The Tragedy Of Legible Expertise](https://astralcodexten.substack.com/p/webmd-and-the-tragedy-of-legible) - - What does running a medical database teach you about why everything sucks? - 7. [You're Probably Wondering Why I've Called You Here Today](https://astralcodexten.substack.com/p/youre-probably-wondering-why-ive) - 8. [COVID/Vitamin D: Much More Than You Wanted To Know](https://astralcodexten.substack.com/p/covidvitamin-d-much-more-than-you) - 9. [Coronavirus: Links, Discussion, Open Thread](https://astralcodexten.substack.com/p/coronavirus-links-discussion-open) - - Will things get worse before they get better? - 10. [Contra Weyl On Technocracy](https://astralcodexten.substack.com/p/contra-weyl-on-technocracy) - - Beyond Brasilia - -11. [Ontology Of Psychiatric Conditions: Taxometrics](https://astralcodexten.substack.com/p/ontology-of-psychiatric-conditions) - - Is mental illness a thing? What kind of thing is it? - diff --git a/bot/testdata/lesswrong_ru_top_posts.md b/bot/testdata/lesswrong_ru_top_posts.md index 34e8911..6f12f91 100644 --- a/bot/testdata/lesswrong_ru_top_posts.md +++ b/bot/testdata/lesswrong_ru_top_posts.md @@ -1,26 +1,11 @@ 🏆 Random posts from https://lesswrong.ru -1. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) +1. [Что такое рациональность](https://lesswrong.ru/w/Что_такое_рациональность) -2. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) +2. [Эпистемическая рациональность](https://lesswrong.ru/w/Эпистемическая_рациональность) -3. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) +3. [Инструментальная рациональность](https://lesswrong.ru/w/Инструментальная_рациональность) -4. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -5. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -6. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -7. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -8. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -9. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -10. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -11. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) - -12. [Что такое рациональность](https://lesswrong.ru/w/%D0%A7%D1%82%D0%BE_%D1%82%D0%B0%D0%BA%D0%BE%D0%B5_%D1%80%D0%B0%D1%86%D0%B8%D0%BE%D0%BD%D0%B0%D0%BB%D1%8C%D0%BD%D0%BE%D1%81%D1%82%D1%8C) +4. [Научное мышление](https://lesswrong.ru/w/Научное_мышление) +5. [Когнитивные искажения](https://lesswrong.ru/w/Когнитивные_искажения) diff --git a/bot/testdata/lesswrong_top_posts.md b/bot/testdata/lesswrong_top_posts.md index 983e4f6..eb0f54e 100644 --- a/bot/testdata/lesswrong_top_posts.md +++ b/bot/testdata/lesswrong_top_posts.md @@ -1,26 +1,12 @@ -🏆 Top posts this week from https://lesswrong.com: - -1. [RadVac Commercial Antibody Test Results](https://www.lesswrong.com/posts/Mqy4GFqJoMSfs8raA/radvac-commercial-antibody-test-results) (johnswentworth) - -2. [Takeaways from one year of lockdown](https://www.lesswrong.com/posts/uM6mENiJi2pNPpdnC/takeaways-from-one-year-of-lockdown) (mingyuan) - -3. [Mentorship, Management, and Mysterious Old Wizards](https://www.lesswrong.com/posts/Wj5CCL7ay39on9ZuK/mentorship-management-and-mysterious-old-wizards) (Raemon) - -4. [Full-time AGI Safety!](https://www.lesswrong.com/posts/tnEQMnpyBFK5QBRz3/full-time-agi-safety) (steve2152) - -5. [Covid 2/25: Holding Pattern](https://www.lesswrong.com/posts/EYk8Hz3imnZK2eCXx/covid-2-25-holding-pattern) (Zvi) - -6. [A No-Nonsense Guide to Early Retirement](https://www.lesswrong.com/posts/ttXGrquvXgouawHEq/a-no-nonsense-guide-to-early-retirement) (tryactions) - -7. ["If You're Not a Holy Madman, You're Not Trying"](https://www.lesswrong.com/posts/s3rAKTkdSHb6Hwwoz/if-you-re-not-a-holy-madman-you-re-not-trying) (abramdemski) - -8. [Fun with +12 OOMs of Compute](https://www.lesswrong.com/posts/rzqACeBGycZtqCfaX/fun-with-12-ooms-of-compute) (Daniel Kokotajlo) - -9. [Judging Our April 2020 Covid-19 Predictions](https://www.lesswrong.com/posts/dRuTeLm7oEfxxBFRF/judging-our-april-2020-covid-19-predictions) (Zvi) - -10. [Avoid Contentious Terms](https://www.lesswrong.com/posts/6W8Jdcc2Dq4uyE7Hi/avoid-contentious-terms) (jefftk) - -11. [Why aren't we all using Taffix?](https://www.lesswrong.com/posts/Bg9ozak6GJfghmpRz/why-aren-t-we-all-using-taffix) (ChristianKl) - -12. [Anna and Oliver discuss Children and X-Risk](https://www.lesswrong.com/events/KGvQs9tTpgnKugdv2/anna-and-oliver-discuss-children-and-x-risk) (Raemon) - +🏆 Top posts from https://www.lesswrong.com + +1. [RadVac Commercial Antibody Test Results](https://www.lesswrong.com/posts/Mqy4GFqJoMSfs8raA/radvac-commercial-antibody-test-results) +2. [Takeaways from one year of lockdown](https://www.lesswrong.com/posts/uM6mENiJi2pNPpdnC/takeaways-from-one-year-of-lockdown) +3. [Mentorship, Management, and Mysterious Old Wizards](https://www.lesswrong.com/posts/Wj5CCL7ay39on9ZuK/mentorship-management-and-mysterious-old-wizards) +4. [Full-time AGI Safety!](https://www.lesswrong.com/posts/tnEQMnpyBFK5QBRz3/full-time-agi-safety) +5. [Covid 2/25: Holding Pattern](https://www.lesswrong.com/posts/EYk8Hz3imnZK2eCXx/covid-2-25-holding-pattern) +6. [A No-Nonsense Guide to Early Retirement](https://www.lesswrong.com/posts/ttXGrquvXgouawHEq/a-no-nonsense-guide-to-early-retirement) +7. ["If You're Not a Holy Madman, You're Not Trying"](https://www.lesswrong.com/posts/s3rAKTkdSHb6Hwwoz/if-you-re-not-a-holy-madman-you-re-not-trying) +8. [Fun with +12 OOMs of Compute](https://www.lesswrong.com/posts/rzqACeBGycZtqCfaX/fun-with-12-ooms-of-compute) +9. [Judging Our April 2020 Covid-19 Predictions](https://www.lesswrong.com/posts/dRuTeLm7oEfxxBFRF/judging-our-april-2020-covid-19-predictions) +10. [Avoid Contentious Terms](https://www.lesswrong.com/posts/6W8Jdcc2Dq4uyE7Hi/avoid-contentious-terms) diff --git a/bot/testdata/slate_top_posts.md b/bot/testdata/slate_top_posts.md new file mode 100644 index 0000000..d89f190 --- /dev/null +++ b/bot/testdata/slate_top_posts.md @@ -0,0 +1,14 @@ +🏆 Top posts from https://slatestarcodex.com + +1. [Meditations On Moloch](https://slatestarcodex.com/2014/07/30/meditations-on-moloch/) +2. [The Categories Were Made For Man, Not Man For The Categories](https://slatestarcodex.com/2014/11/21/the-categories-were-made-for-man-not-man-for-the-categories/) +3. [Untitled](https://slatestarcodex.com/2015/01/01/untitled/) +4. [I Can Tolerate Anything Except The Outgroup](https://slatestarcodex.com/2014/09/30/i-can-tolerate-anything-except-the-outgroup/) +5. [The Toxoplasma Of Rage](https://slatestarcodex.com/2014/12/17/the-toxoplasma-of-rage/) +6. [Proving Too Much](https://slatestarcodex.com/2013/04/13/proving-too-much/) +7. [Against Tulip Subsidies](https://slatestarcodex.com/2015/06/06/against-tulip-subsidies/) +8. [In Favor Of Niceness, Community, And Civilization](https://slatestarcodex.com/2014/02/23/in-favor-of-niceness-community-and-civilization/) +9. [The Noncentral Fallacy - The Worst Argument In The World](https://slatestarcodex.com/2012/11/02/the-noncentral-fallacy-the-worst-argument-in-the-world/) +10. [Reactionary Philosophy In An Enormous, Planet-Sized Nutshell](https://slatestarcodex.com/2013/10/20/the-anti-reactionary-faq/) + +https://slatestarcodex.com diff --git a/bot/top.go b/bot/top.go index 08427e3..81e3365 100644 --- a/bot/top.go +++ b/bot/top.go @@ -1,211 +1,11 @@ package bot import ( - "bytes" "context" - "encoding/json" - "fmt" - "log" - "time" - - "github.com/gocolly/colly" - - "github.com/ndrewnee/lesswrong-bot/models" ) -// As https://slatestarcodex.com top posts won't change anymore it's much more effecient to return hardcoded list. -const MessageTopSlate = `🏆 Top posts from https://slatestarcodex.com - -1. [Beware The Man Of One Study](https://slatestarcodex.com/2014/12/12/beware-the-man-of-one-study/) - -2. [Meditations on Moloch](https://slatestarcodex.com/2014/07/30/meditations-on-moloch/) - -3. [I Can Tolerate Anything Except The Outgroup](https://slatestarcodex.com/2014/09/30/i-can-tolerate-anything-except-the-outgroup/) - -4. [Book Review: Albion's Seed](https://slatestarcodex.com/2016/04/27/book-review-albions-seed/) - -5. [Nobody Is Perfect, Everything Is Commensurable](https://slatestarcodex.com/2014/12/19/nobody-is-perfect-everything-is-commensurable/) - -6. [The Control Group Is Out Of Control](https://slatestarcodex.com/2014/04/28/the-control-group-is-out-of-control/) - -7. [Considerations On Cost Disease](https://slatestarcodex.com/2017/02/09/considerations-on-cost-disease/) - -8. [Archipelago And Atomic Communitarianism](https://slatestarcodex.com/2014/06/07/archipelago-and-atomic-communitarianism/) - -9. [The Categories Were Made For Man, Not Man For The Categories](https://slatestarcodex.com/2014/11/21/the-categories-were-made-for-man-not-man-for-the-categories/) - -10. [Who By Very Slow Decay](https://slatestarcodex.com/2013/07/17/who-by-very-slow-decay/)` - -// Fallback content for when Astral Codex Ten API is blocked by Cloudflare -const MessageTopAstral = `🏆 Top posts from https://astralcodexten.substack.com - -1. [Bounded Distrust](https://astralcodexten.substack.com/p/bounded-distrust) - -2. [Your Book Review: Progress And Poverty](https://astralcodexten.substack.com/p/your-book-review-progress-and-poverty) - -3. [Highlights From The Comments On AI Timelines](https://astralcodexten.substack.com/p/highlights-from-the-comments-on-ai) - -4. [Mantic Monday 2/28/22](https://astralcodexten.substack.com/p/mantic-monday-22822) - -5. [Book Review: The Righteous Mind](https://astralcodexten.substack.com/p/book-review-the-righteous-mind) - -6. [Highlights From The Comments On Medical Coding](https://astralcodexten.substack.com/p/highlights-from-the-comments-on-medical) - -7. [Contra Hoel On Aristocratic Tutoring](https://astralcodexten.substack.com/p/contra-hoel-on-aristocratic-tutoring) - -8. [Model City Monday 8/2/21](https://astralcodexten.substack.com/p/model-city-monday-8221) - -9. [Your Book Review: On The Natural Faculties](https://astralcodexten.substack.com/p/your-book-review-on-the-natural-faculties) - -10. [Whither Tartaria?](https://astralcodexten.substack.com/p/whither-tartaria)` - func (b *Bot) TopPosts(ctx context.Context, userID int) (string, error) { - key := fmt.Sprintf("source:%d", userID) - - source, err := b.storage.Get(ctx, key) - if err != nil { - log.Printf("[ERROR] Get source failed: %s, key: %s", err, key) - } - - switch models.Source(source) { - case models.SourceLesswrongRu: - return b.topLesswrongRu(ctx) - case models.SourceSlate: - return MessageTopSlate, nil - case models.SourceAstral: - return b.topAstral(ctx) - case models.SourceLesswrong: - return b.topLesswrong(ctx) - default: - return b.topLesswrongRu(ctx) - } -} - -func (b *Bot) topAstral(ctx context.Context) (string, error) { - httpResponse, err := b.httpClient.Get(ctx, "https://astralcodexten.substack.com/api/v1/archive?sort=top&limit=10") - if err != nil { - return "", fmt.Errorf("get astralcodexten posts failed: %s", err) - } - - var topPosts []models.AstralPost - - if err := b.handleResponse(httpResponse, &topPosts); err != nil { - // Handle Cloudflare blocking (403) or rate limiting (429) by returning fallback content - if httpResponse.StatusCode == 403 || httpResponse.StatusCode == 429 { - log.Printf("[WARN] Astral Codex Ten API blocked (status %d), using fallback content", httpResponse.StatusCode) - return MessageTopAstral, nil - } - return "", fmt.Errorf("handle astralcodexten top posts response: %s", err) - } - - text := bytes.NewBufferString("🏆 Top posts from https://astralcodexten.substack.com\n\n") - - for i, post := range topPosts { - if post.Audience == "only_paid" { - continue - } - - text.WriteString(fmt.Sprintf("%d. [%s](%s)\n\n", i+1, post.Title, post.CanonicalURL)) - - if post.Subtitle != "" && post.Subtitle != "..." { - text.WriteString(fmt.Sprintf(" %s\n\n", post.Subtitle)) - } - } - - return text.String(), nil -} - -func (b *Bot) topLesswrongRu(ctx context.Context) (string, error) { - postsCached, err := b.storage.Get(ctx, "posts:lesswrong.ru") - if err != nil { - return "", fmt.Errorf("get lesswrong.ru cached posts failed: %s", err) - } - - var posts []models.Post - - if postsCached != "" { - if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { - return "", fmt.Errorf("unmarshal lesswrong.ru cached posts failed: %s", err) - } - } - - // Load posts for the first time. - if len(posts) == 0 { - postsCollector := colly.NewCollector() - - postsCollector.OnHTML("li.leaf.menu-depth-3,li.leaf.menu-depth-4", func(e *colly.HTMLElement) { - posts = append(posts, models.Post{ - Title: e.Text, - URL: e.Request.AbsoluteURL(e.ChildAttr("a", "href")), - }) - }) - - if err := postsCollector.Visit("https://lesswrong.ru/w"); err != nil { - return "", fmt.Errorf("get lesswrong.ru posts failed: %s", err) - } - - postsCache, err := json.Marshal(posts) - if err != nil { - return "", fmt.Errorf("marshal lesswrong.ru posts failed: %s", err) - } - - if err := b.storage.Set(ctx, "posts:lesswrong.ru", string(postsCache), b.config.CacheExpire); err != nil { - return "", fmt.Errorf("cache lesswrong.ru posts failed: %s", err) - } - } - - if len(posts) == 0 { - return "", fmt.Errorf("lesswrong.ru posts not found") - } - - text := bytes.NewBufferString("🏆 Random posts from https://lesswrong.ru\n\n") - - // As lesswrong.ru doesn't have page with top posts return random posts instead. - for i := 0; i < models.DefaultLimit; i++ { - n := b.randomInt(len(posts)) - post := posts[n] - - text.WriteString(fmt.Sprintf("%d. [%s](%s)\n\n", i+1, post.Title, post.URL)) - } - - return text.String(), nil -} - -func (b *Bot) topLesswrong(ctx context.Context) (string, error) { - query := fmt.Sprintf(`{ - posts(input: {terms: {view: "top", limit: 12, meta: null, after: "%s"}}) { - results { - title - pageUrl - user { - displayName - } - } - } - }`, time.Now().AddDate(0, 0, -7).Format("2006-01-02")) - - body, err := json.Marshal(map[string]string{"query": query}) - if err != nil { - return "", fmt.Errorf("marshal request for lesswrong.com top posts failed: %s", err) - } - - httpResponse, err := b.httpClient.Post(ctx, "https://www.lesswrong.com/graphql", "application/json", bytes.NewBuffer(body)) - if err != nil { - return "", fmt.Errorf("get lesswrong.com top posts failed: %s", err) - } - - var response models.LesswrongResponse - - if err := b.handleResponse(httpResponse, &response); err != nil { - return "", fmt.Errorf("handle lesswrong.com top posts response: %s", err) - } - - text := bytes.NewBufferString("🏆 Top posts this week from https://lesswrong.com:\n\n") - - for i, post := range response.Data.Posts.Results { - escapedAuthor := b.escapeMarkdown(post.User.DisplayName) - text.WriteString(fmt.Sprintf("%d. [%s](%s) (%s)\n\n", i+1, post.Title, post.PageURL, escapedAuthor)) - } - - return text.String(), nil + source := b.getUserSource(ctx, userID) + provider := b.providerFactory.CreateProvider(source) + return provider.GetTopPosts(ctx) } diff --git a/bot/top_test.go b/bot/top_test.go index 36df8b5..2836d20 100644 --- a/bot/top_test.go +++ b/bot/top_test.go @@ -8,14 +8,15 @@ import ( "io" "net/http" "os" + "strings" "testing" - "time" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api" "github.com/stretchr/testify/require" "github.com/ndrewnee/lesswrong-bot/bot/mocks" "github.com/ndrewnee/lesswrong-bot/models" + "github.com/ndrewnee/lesswrong-bot/providers" ) func TestTopPosts(t *testing.T) { @@ -23,8 +24,10 @@ func TestTopPosts(t *testing.T) { httpClient := &mocks.HTTPClient{} + // Mock Astral API calls httpClient.On("Get", context.TODO(), "https://astralcodexten.substack.com/api/v1/archive?sort=top&limit=10").Return( &http.Response{ + StatusCode: 200, Body: func() io.ReadCloser { file, err := os.ReadFile("testdata/astral_top_posts.json") require.NoError(t, err) @@ -35,23 +38,22 @@ func TestTopPosts(t *testing.T) { nil, ) - query := fmt.Sprintf(`{ - posts(input: {terms: {view: "top", limit: 12, meta: null, after: "%s"}}) { + // Mock LessWrong GraphQL calls + query := `{ + posts(input: {terms: {view: "top", limit: 10, meta: null}}) { results { title pageUrl - user { - displayName - } } } - }`, time.Now().AddDate(0, 0, -7).Format("2006-01-02")) + }` request, err := json.Marshal(map[string]string{"query": query}) require.NoError(t, err) httpClient.On("Post", context.TODO(), "https://www.lesswrong.com/graphql", "application/json", bytes.NewBuffer(request)).Return( &http.Response{ + StatusCode: 200, Body: func() io.ReadCloser { file, err := os.ReadFile("testdata/lesswrong_top_posts.json") require.NoError(t, err) @@ -64,6 +66,14 @@ func TestTopPosts(t *testing.T) { tgbot, err := New(Options{BotAPI: &tgbotapi.BotAPI{}, HTTPClient: httpClient}) require.NoError(t, err) + + // Update the provider factory to use the same mock HTTP client + tgbot.providerFactory = providers.NewProviderFactory( + tgbot.storage, + httpClient, + int(tgbot.config.CacheExpire.Seconds()), + tgbot.randomInt, + ) type args struct { randomPost int @@ -84,7 +94,9 @@ func TestTopPosts(t *testing.T) { want: func(t *testing.T, got string) { file, err := os.ReadFile("testdata/lesswrong_ru_top_posts.md") require.NoError(t, err) - require.Equal(t, string(file), got) + expected := strings.TrimSpace(string(file)) + actual := strings.TrimSpace(got) + require.Equal(t, expected, actual) }, wantErr: require.NoError, }, @@ -94,7 +106,11 @@ func TestTopPosts(t *testing.T) { source: models.SourceSlate, }, want: func(t *testing.T, got string) { - require.Equal(t, MessageTopSlate, got) + file, err := os.ReadFile("testdata/slate_top_posts.md") + require.NoError(t, err) + expected := strings.TrimSpace(string(file)) + actual := strings.TrimSpace(got) + require.Equal(t, expected, actual) }, wantErr: require.NoError, }, @@ -106,7 +122,9 @@ func TestTopPosts(t *testing.T) { want: func(t *testing.T, got string) { file, err := os.ReadFile("testdata/astral_top_posts.md") require.NoError(t, err) - require.Equal(t, string(file), got) + expected := strings.TrimSpace(string(file)) + actual := strings.TrimSpace(got) + require.Equal(t, expected, actual) }, wantErr: require.NoError, }, @@ -119,7 +137,9 @@ func TestTopPosts(t *testing.T) { want: func(t *testing.T, got string) { file, err := os.ReadFile("testdata/lesswrong_ru_top_posts.md") require.NoError(t, err) - require.Equal(t, string(file), got) + expected := strings.TrimSpace(string(file)) + actual := strings.TrimSpace(got) + require.Equal(t, expected, actual) }, wantErr: require.NoError, }, @@ -131,7 +151,9 @@ func TestTopPosts(t *testing.T) { want: func(t *testing.T, got string) { file, err := os.ReadFile("testdata/lesswrong_top_posts.md") require.NoError(t, err) - require.Equal(t, string(file), got) + expected := strings.TrimSpace(string(file)) + actual := strings.TrimSpace(got) + require.Equal(t, expected, actual) }, wantErr: require.NoError, }, diff --git a/bot/utils.go b/bot/utils.go deleted file mode 100644 index 4215e65..0000000 --- a/bot/utils.go +++ /dev/null @@ -1,71 +0,0 @@ -package bot - -import ( - "encoding/json" - "fmt" - "io" - "net/http" - "strings" -) - -// min returns the minimum of two integers -func min(a, b int) int { - if a < b { - return a - } - return b -} - -// handleResponse handles the common logic for API responses and unmarshals JSON -func (b *Bot) handleResponse(httpResponse *http.Response, target interface{}) error { - defer httpResponse.Body.Close() - - // Check if response is successful - if httpResponse.StatusCode != 0 && httpResponse.StatusCode != http.StatusOK { - bodyBytes, _ := io.ReadAll(httpResponse.Body) - return fmt.Errorf("API returned status %d: %s", httpResponse.StatusCode, string(bodyBytes)) - } - - // Read the response body to check if it's valid JSON - bodyBytes, err := io.ReadAll(httpResponse.Body) - if err != nil { - return fmt.Errorf("read response body failed: %s", err) - } - - // Check if response starts with HTML (error page) - if len(bodyBytes) > 0 && bodyBytes[0] == '<' { - return fmt.Errorf("API returned HTML instead of JSON: %s", string(bodyBytes[:min(200, len(bodyBytes))])) - } - - // Unmarshal JSON into target - if err := json.Unmarshal(bodyBytes, target); err != nil { - return fmt.Errorf("unmarshal failed: %s", err) - } - - return nil -} - -// escapeMarkdown escapes special characters in markdown text -func (b *Bot) escapeMarkdown(text string) string { - replacer := strings.NewReplacer( - "_", "\\_", - "*", "\\*", - "[", "\\[", - "]", "\\]", - "(", "\\(", - ")", "\\)", - "~", "\\~", - "`", "\\`", - ">", "\\>", - "#", "\\#", - "+", "\\+", - "-", "\\-", - "=", "\\=", - "|", "\\|", - "{", "\\{", - "}", "\\}", - ".", "\\.", - "!", "\\!", - ) - return replacer.Replace(text) -} diff --git a/config/config.go b/config/config.go index 6363409..26c9314 100644 --- a/config/config.go +++ b/config/config.go @@ -6,6 +6,22 @@ import ( "time" ) +const ( + // Default values for configuration + DefaultPort = 9999 + DefaultWebhookHost = "https://lesswrong-bot.herokuapp.com" + DefaultRedisURL = "redis://localhost:6379/1" + DefaultTimeout = 15 * time.Second + DefaultCacheExpire = 24 * time.Hour + + // Application constants + DefaultPostLimit = 12 + TopPostsLimit = 10 + TopPostsWeeklyDays = 7 + PostMaxLength = 500 + LesswrongPostsMax = 2000 +) + type Config struct { RedisURL string Address string @@ -20,27 +36,27 @@ type Config struct { func Parse() Config { port, err := strconv.Atoi(os.Getenv("PORT")) if err != nil { - port = 9999 + port = DefaultPort } webhookHost := os.Getenv("WEBHOOK_HOST") if webhookHost == "" { - webhookHost = "https://lesswrong-bot.herokuapp.com" + webhookHost = DefaultWebhookHost } redisURL := os.Getenv("REDIS_URL") if redisURL == "" { - redisURL = "redis://localhost:6379/1" + redisURL = DefaultRedisURL } timeout, err := time.ParseDuration(os.Getenv("TIMEOUT")) if err != nil { - timeout = 15 * time.Second + timeout = DefaultTimeout } expire, err := time.ParseDuration(os.Getenv("CACHE_EXPIRE")) if err != nil { - expire = 24 * time.Hour + expire = DefaultCacheExpire } return Config{ diff --git a/formatter/markdown.go b/formatter/markdown.go new file mode 100644 index 0000000..3f98b39 --- /dev/null +++ b/formatter/markdown.go @@ -0,0 +1,100 @@ +package formatter + +import ( + "fmt" + "strings" + + md "github.com/JohannesKaufmann/html-to-markdown" + + "github.com/ndrewnee/lesswrong-bot/config" + "github.com/ndrewnee/lesswrong-bot/models" +) + +type MarkdownFormatter struct{} + +func NewMarkdownFormatter() *MarkdownFormatter { + return &MarkdownFormatter{} +} + +func (f *MarkdownFormatter) FormatPost(post models.Post, converter *md.Converter, urlWithText bool) (string, error) { + markdownOrig, err := converter.ConvertString(post.HTML) + if err != nil { + return "", fmt.Errorf("convert html to markdown failed: %s", err) + } + + markdown := f.truncateContent(markdownOrig) + markdown = f.applyMarkdownFixes(markdown) + + link := fmt.Sprintf("[%s](%s)", post.Title, post.URL) + postURL := post.URL + if urlWithText { + postURL = link + } + + return fmt.Sprintf("📝 %s\n\n%s\n\n%s", link, markdown, postURL), nil +} + +func (f *MarkdownFormatter) truncateContent(markdown string) string { + if len(markdown) <= config.PostMaxLength { + return markdown + } + + // Convert to runes to properly split between unicode symbols. + runes := []rune(markdown) + truncated := string(runes[:config.PostMaxLength]) + + // Truncate after next line end to not break markdown text. + rest := string(runes[config.PostMaxLength:]) + if n := strings.IndexByte(rest, '\n'); n != -1 { + truncated += rest[:n] + } else { + return markdown // Return original if we can't find a good truncation point + } + + return f.cleanupTruncatedMarkdown(truncated) +} + +func (f *MarkdownFormatter) cleanupTruncatedMarkdown(markdown string) string { + // Clean up artifacts from truncation + markdown = strings.ReplaceAll(markdown, "* * *", "") + markdown = strings.ReplaceAll(markdown, "```", "") + return markdown +} + +func (f *MarkdownFormatter) applyMarkdownFixes(markdown string) string { + // Fix the specific problematic patterns first (order matters) + fixes := []struct { + old, new string + }{ + // Fix double-escaped sequences from markdown converter + {"_\\\\\\\\[", "["}, // Convert "_\\\\[" to "[" + {"_\\\\[", "["}, // Convert "_\\[" to "[" + {"\\\\\\\\[", "["}, // Convert "\\\\[" to "[" + {"\\\\[", "["}, // Convert "\\[" to "[" + {"\\\\]", "]"}, // Convert "\\]" to "]" + {"\\\\_", ""}, // Convert "\\_" to "" + // Fix single escape sequences + {"_\\[", "["}, // Convert "_\[" to "[" + {"\\[", "["}, // Convert standalone "\[" to "[" + {"\\]", "]"}, // Convert "\]" to "]" + // Then apply general fixes + {"[[", "["}, + {"]]", "]"}, + {"![]", "[Image]"}, + {"_[", ""}, + {"]_", ""}, + } + + for _, fix := range fixes { + markdown = strings.ReplaceAll(markdown, fix.old, fix.new) + } + + // Remove incomplete escape sequences at the end of lines + lines := strings.Split(markdown, "\n") + for i, line := range lines { + // Remove trailing backslash that can break parsing + lines[i] = strings.TrimSuffix(line, "\\") + } + + return strings.Join(lines, "\n") +} \ No newline at end of file diff --git a/formatter/markdown_test.go b/formatter/markdown_test.go new file mode 100644 index 0000000..d251810 --- /dev/null +++ b/formatter/markdown_test.go @@ -0,0 +1,111 @@ +package formatter + +import ( + "strings" + "testing" + + md "github.com/JohannesKaufmann/html-to-markdown" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +func TestMarkdownFormatter_FixesSpecificTelegramParsingIssue(t *testing.T) { + formatter := NewMarkdownFormatter() + + // Test the specific problematic pattern from the error + tests := []struct { + name string + input string + shouldFix bool + }{ + { + name: "Fix specific problematic pattern _\\[", + input: "_\\[Epistemic status: very low. Total conjecture\\", + shouldFix: true, + }, + { + name: "Fix standalone \\[", + input: "Some text \\[with brackets", + shouldFix: true, + }, + { + name: "Leave valid markdown alone", + input: "_italic_ and **bold** and [link](url)", + shouldFix: false, + }, + { + name: "Fix trailing backslash", + input: "Some text ending with \\", + shouldFix: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := formatter.applyMarkdownFixes(tt.input) + + if tt.shouldFix { + // Should not contain problematic patterns + if strings.Contains(result, "_\\[") { + t.Errorf("Still contains _\\[ pattern: %s", result) + } + if strings.Contains(result, "\\[") { + t.Errorf("Still contains \\[ pattern: %s", result) + } + if strings.HasSuffix(result, "\\") { + t.Errorf("Still ends with backslash: %s", result) + } + } else { + // Should preserve valid markdown + if !strings.Contains(result, "_italic_") || !strings.Contains(result, "**bold**") { + t.Errorf("Valid markdown was broken: %s", result) + } + } + }) + } +} + +func TestMarkdownFormatter_FormatPost_HandlesOriginalError(t *testing.T) { + formatter := NewMarkdownFormatter() + converter := md.NewConverter("slatestarcodex.com", true, nil) + + // Simulate the exact problematic HTML that caused the original error + problematicHTML := `\[Epistemic status: very low. Total conjecture based on insufficient evidence.\ +"Voodoo death" refers to supposed cases where people died after being cursed by witch doctors.
` + + post := models.Post{ + Title: "Devoodooifying Psychology", + URL: "https://slatestarcodex.com/2016/08/25/devoodooifying-psychology/", + HTML: problematicHTML, + } + + result, err := formatter.FormatPost(post, converter, false) + if err != nil { + t.Fatalf("FormatPost() failed: %v", err) + } + + // The result should not contain the problematic patterns that break Telegram + if strings.Contains(result, "_\\[") { + t.Error("Result still contains _\\[ pattern that breaks Telegram parsing") + } + + if strings.Contains(result, "\\[") { + t.Error("Result still contains \\[ pattern that breaks Telegram parsing") + } + + // Should not end with backslash + lines := strings.Split(result, "\n") + for _, line := range lines { + if strings.HasSuffix(line, "\\") { + t.Errorf("Line ends with backslash: %s", line) + } + } + + // Should still contain the main content + if !strings.Contains(result, "Devoodooifying Psychology") { + t.Error("Result missing title") + } + if !strings.Contains(result, "Voodoo death") { + t.Error("Result missing content") + } +} \ No newline at end of file diff --git a/interfaces/interfaces.go b/interfaces/interfaces.go new file mode 100644 index 0000000..83b3178 --- /dev/null +++ b/interfaces/interfaces.go @@ -0,0 +1,20 @@ +package interfaces + +import ( + "context" + "io" + "net/http" + "time" +) + +// HTTPClient defines the interface for making HTTP requests +type HTTPClient interface { + Get(ctx context.Context, url string) (*http.Response, error) + Post(ctx context.Context, url, contentType string, body io.Reader) (*http.Response, error) +} + +// Storage defines the interface for caching operations +type Storage interface { + Get(ctx context.Context, key string) (string, error) + Set(ctx context.Context, key, value string, expire time.Duration) error +} \ No newline at end of file diff --git a/main.go b/main.go index 53052be..8d7d473 100644 --- a/main.go +++ b/main.go @@ -6,6 +6,7 @@ import ( "github.com/ndrewnee/lesswrong-bot/bot" "github.com/ndrewnee/lesswrong-bot/config" + "github.com/ndrewnee/lesswrong-bot/interfaces" "github.com/ndrewnee/lesswrong-bot/storage/memory" "github.com/ndrewnee/lesswrong-bot/storage/redis" ) @@ -14,7 +15,7 @@ func main() { config := config.Parse() var ( - storage bot.Storage + storage interfaces.Storage err error ) diff --git a/models/post.go b/models/post.go index ff5c29e..ca13a8b 100644 --- a/models/post.go +++ b/models/post.go @@ -1,9 +1,10 @@ package models +import "github.com/ndrewnee/lesswrong-bot/config" + const ( - DefaultLimit = 12 - PostMaxLength = 500 - LesswrongPostsMaxCount = 2000 + DefaultLimit = config.DefaultPostLimit + LesswrongPostsMaxCount = config.LesswrongPostsMax ) type ( diff --git a/providers/adapters.go b/providers/adapters.go new file mode 100644 index 0000000..5cf0457 --- /dev/null +++ b/providers/adapters.go @@ -0,0 +1,74 @@ +package providers + +import ( + "context" + "io" + "time" + + "github.com/ndrewnee/lesswrong-bot/interfaces" +) + +type HTTPClientAdapter struct { + client interfaces.HTTPClient +} + +func NewHTTPClientAdapter(client interfaces.HTTPClient) *HTTPClientAdapter { + return &HTTPClientAdapter{client: client} +} + +func (a *HTTPClientAdapter) Get(ctx context.Context, url string) (*HTTPResponse, error) { + resp, err := a.client.Get(ctx, url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return &HTTPResponse{ + StatusCode: resp.StatusCode, + Body: body, + }, nil +} + +func (a *HTTPClientAdapter) Post(ctx context.Context, url, contentType string, body interface{}) (*HTTPResponse, error) { + var reader io.Reader + if r, ok := body.(io.Reader); ok { + reader = r + } + + resp, err := a.client.Post(ctx, url, contentType, reader) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return &HTTPResponse{ + StatusCode: resp.StatusCode, + Body: respBody, + }, nil +} + +type StorageAdapter struct { + storage interfaces.Storage +} + +func NewStorageAdapter(storage interfaces.Storage) *StorageAdapter { + return &StorageAdapter{storage: storage} +} + +func (a *StorageAdapter) Get(ctx context.Context, key string) (string, error) { + return a.storage.Get(ctx, key) +} + +func (a *StorageAdapter) Set(ctx context.Context, key, value string, expire int) error { + return a.storage.Set(ctx, key, value, time.Second*time.Duration(expire)) +} diff --git a/providers/astral.go b/providers/astral.go new file mode 100644 index 0000000..00d7ea2 --- /dev/null +++ b/providers/astral.go @@ -0,0 +1,197 @@ +package providers + +import ( + "context" + "encoding/json" + "fmt" + "log" + "strings" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +type AstralProvider struct { + storage Storage + httpClient HTTPClient + cacheExpire int + randomInt func(int) int +} + +func NewAstralProvider(storage Storage, httpClient HTTPClient, cacheExpire int, randomInt func(int) int) *AstralProvider { + return &AstralProvider{ + storage: storage, + httpClient: httpClient, + cacheExpire: cacheExpire, + randomInt: randomInt, + } +} + +func (p *AstralProvider) GetName() string { + return "Astral Codex Ten" +} + +func (p *AstralProvider) GetCacheKey() string { + return "posts:astralcodexten" +} + +func (p *AstralProvider) GetTopPosts(ctx context.Context) (string, error) { + posts, err := p.fetchTopPosts(ctx) + if err != nil { + return "", fmt.Errorf("fetch top posts failed: %w", err) + } + + return p.formatTopPosts(posts), nil +} + +func (p *AstralProvider) fetchTopPosts(ctx context.Context) ([]models.AstralPost, error) { + resp, err := p.httpClient.Get(ctx, "https://astralcodexten.substack.com/api/v1/archive?sort=top&limit=10") + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + + if resp.StatusCode != 200 { + log.Printf("[ERROR] astralcodexten.com top posts request failed with status %d: %s", resp.StatusCode, string(resp.Body)) + // Return empty posts to trigger fallback in formatTopPosts + return []models.AstralPost{}, nil + } + + var posts []models.AstralPost + if err := json.Unmarshal(resp.Body, &posts); err != nil { + return nil, fmt.Errorf("unmarshal failed: %w", err) + } + + return posts, nil +} + +func (p *AstralProvider) formatTopPosts(posts []models.AstralPost) string { + if len(posts) == 0 { + return "🏆 Top posts from https://astralcodexten.substack.com\n\nNo posts found." + } + + var sb strings.Builder + sb.WriteString("🏆 Top posts from https://astralcodexten.substack.com\n\n") + + for i, post := range posts { + if i >= 10 { + break + } + sb.WriteString(fmt.Sprintf("%d. [%s](%s)\n", i+1, post.Title, post.CanonicalURL)) + } + + return sb.String() +} + +func (p *AstralProvider) GetRandomPost(ctx context.Context) (models.Post, error) { + postsCached, err := p.storage.Get(ctx, p.GetCacheKey()) + if err != nil { + return models.Post{}, fmt.Errorf("get astralcodexten cached posts failed: %s", err) + } + + var posts []models.Post + + if postsCached != "" { + if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { + return models.Post{}, fmt.Errorf("unmarshal astralcodexten cached posts failed: %s", err) + } + } + + if len(posts) == 0 { + posts, err = p.fetchPosts(ctx) + if err != nil { + return models.Post{}, err + } + } + + if len(posts) == 0 { + return models.Post{}, fmt.Errorf("astralcodexten posts not found") + } + + i := p.randomInt(len(posts)) + post := posts[i] + + httpResponse, err := p.httpClient.Get(ctx, "https://astralcodexten.substack.com/api/v1/posts/"+post.Slug) + if err != nil { + return models.Post{}, fmt.Errorf("get astralcodexten random post failed: %s", err) + } + + var astralPost models.AstralPost + + if err := p.handleResponse(httpResponse, &astralPost); err != nil { + if httpResponse.StatusCode == 403 || httpResponse.StatusCode == 429 { + fallbackPost := models.Post{ + Title: post.Title, + URL: post.URL, + HTML: "Content temporarily unavailable due to API restrictions. Please visit the link above to read the full post.
", + } + return fallbackPost, nil + } + return models.Post{}, fmt.Errorf("handle astralcodexten post response: %s", err) + } + + return astralPost.AsPost(), nil +} + +func (p *AstralProvider) fetchPosts(ctx context.Context) ([]models.Post, error) { + var posts []models.Post + + for offset := 0; true; offset += models.DefaultLimit { + uri := fmt.Sprintf("https://astralcodexten.substack.com/api/v1/archive?sort=new&limit=%d&offset=%d", + models.DefaultLimit, + offset, + ) + + httpResponse, err := p.httpClient.Get(ctx, uri) + if err != nil { + log.Printf("[ERROR] Get astralcodexten posts failed: %s", err) + break + } + + var newPosts []models.AstralPost + + if err := p.handleResponse(httpResponse, &newPosts); err != nil { + log.Printf("[ERROR] handle astralcodexten posts response: %s", err) + if (httpResponse.StatusCode == 403 || httpResponse.StatusCode == 429) && len(posts) == 0 { + fallbackPost := models.Post{ + Title: "Astral Codex Ten", + URL: "https://astralcodexten.substack.com", + HTML: "Content temporarily unavailable due to API restrictions. Please visit the link above to read the full post.
", + } + return []models.Post{fallbackPost}, nil + } + break + } + + if len(newPosts) == 0 { + break + } + + for _, astralPost := range newPosts { + if astralPost.Audience != "only_paid" { + posts = append(posts, astralPost.AsPost()) + } + } + } + + if len(posts) > 0 { + postsCache, err := json.Marshal(posts) + if err != nil { + return nil, fmt.Errorf("marshal astralcodexten posts failed: %s", err) + } + + if err := p.storage.Set(ctx, p.GetCacheKey(), string(postsCache), p.cacheExpire); err != nil { + return nil, fmt.Errorf("cache astralcodexten posts failed: %s", err) + } + } + + return posts, nil +} + +func (p *AstralProvider) handleResponse(httpResponse *HTTPResponse, target interface{}) error { + bodyBytes := httpResponse.Body + // Check if response starts with HTML (error page) + if len(bodyBytes) > 0 && bodyBytes[0] == '<' { + return fmt.Errorf("API returned HTML instead of JSON: %s", string(bodyBytes[:min(200, len(bodyBytes))])) + } + + return json.Unmarshal(bodyBytes, target) +} diff --git a/providers/factory.go b/providers/factory.go new file mode 100644 index 0000000..104cc5a --- /dev/null +++ b/providers/factory.go @@ -0,0 +1,63 @@ +package providers + +import ( + md "github.com/JohannesKaufmann/html-to-markdown" + + "github.com/ndrewnee/lesswrong-bot/interfaces" + "github.com/ndrewnee/lesswrong-bot/models" +) + +type ProviderFactory struct { + storage Storage + httpClient HTTPClient + cacheExpire int + randomInt func(int) int +} + +func NewProviderFactory( + storage interfaces.Storage, + httpClient interfaces.HTTPClient, + cacheExpire int, + randomInt func(int) int, +) *ProviderFactory { + return &ProviderFactory{ + storage: NewStorageAdapter(storage), + httpClient: NewHTTPClientAdapter(httpClient), + cacheExpire: cacheExpire, + randomInt: randomInt, + } +} + +func (f *ProviderFactory) CreateProvider(source models.Source) PostProvider { + switch source { + case models.SourceLesswrongRu: + return NewLessWrongRuProvider(f.storage, f.cacheExpire, f.randomInt) + case models.SourceSlate: + return NewSlateProvider(f.storage, f.cacheExpire, f.randomInt) + case models.SourceAstral: + return NewAstralProvider(f.storage, f.httpClient, f.cacheExpire, f.randomInt) + case models.SourceLesswrong: + return NewLessWrongProvider(f.httpClient, f.randomInt) + default: + return NewLessWrongRuProvider(f.storage, f.cacheExpire, f.randomInt) + } +} + +func (f *ProviderFactory) GetMarkdownConverter(source models.Source) *md.Converter { + switch source { + case models.SourceLesswrongRu: + return md.NewConverter(models.DomainLesswrongRu, true, nil) + case models.SourceSlate: + return md.NewConverter(models.DomainSlate, true, nil) + case models.SourceAstral: + return md.NewConverter(models.DomainAstral, true, nil) + case models.SourceLesswrong: + return md.NewConverter(models.DomainLesswrong, true, nil) + default: + return md.NewConverter(models.DomainLesswrongRu, true, nil) + } +} + +func (f *ProviderFactory) ShouldUseURLWithText(source models.Source) bool { + return source == models.SourceLesswrongRu +} diff --git a/providers/lesswrong.go b/providers/lesswrong.go new file mode 100644 index 0000000..55a7143 --- /dev/null +++ b/providers/lesswrong.go @@ -0,0 +1,129 @@ +package providers + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "strings" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +type LessWrongProvider struct { + httpClient HTTPClient + randomInt func(int) int +} + +func NewLessWrongProvider(httpClient HTTPClient, randomInt func(int) int) *LessWrongProvider { + return &LessWrongProvider{ + httpClient: httpClient, + randomInt: randomInt, + } +} + +func (p *LessWrongProvider) GetName() string { + return "LessWrong.com" +} + +func (p *LessWrongProvider) GetCacheKey() string { + return "posts:lesswrong.com" +} + +func (p *LessWrongProvider) GetTopPosts(ctx context.Context) (string, error) { + posts, err := p.fetchTopPosts(ctx) + if err != nil { + return "", fmt.Errorf("fetch top posts failed: %w", err) + } + + return p.formatTopPosts(posts), nil +} + +func (p *LessWrongProvider) fetchTopPosts(ctx context.Context) ([]models.LesswrongResult, error) { + query := `{ + posts(input: {terms: {view: "top", limit: 10, meta: null}}) { + results { + title + pageUrl + } + } + }` + + requestBody, err := json.Marshal(map[string]string{"query": query}) + if err != nil { + return nil, fmt.Errorf("marshal request failed: %w", err) + } + + resp, err := p.httpClient.Post(ctx, "https://www.lesswrong.com/graphql", "application/json", bytes.NewBuffer(requestBody)) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + + if resp.StatusCode != 200 { + log.Printf("[ERROR] lesswrong.com top posts request failed with status %d: %s", resp.StatusCode, string(resp.Body)) + // Return empty posts to trigger fallback in formatTopPosts + return []models.LesswrongResult{}, nil + } + + var response models.LesswrongResponse + if err := json.Unmarshal(resp.Body, &response); err != nil { + return nil, fmt.Errorf("unmarshal failed: %w", err) + } + + return response.Data.Posts.Results, nil +} + +func (p *LessWrongProvider) formatTopPosts(posts []models.LesswrongResult) string { + if len(posts) == 0 { + return "🏆 Top posts from https://www.lesswrong.com\n\nNo posts found." + } + + var sb strings.Builder + sb.WriteString("🏆 Top posts from https://www.lesswrong.com\n\n") + + for i, post := range posts { + if i >= 10 { + break + } + sb.WriteString(fmt.Sprintf("%d. [%s](%s)\n", i+1, post.Title, post.PageURL)) + } + + return sb.String() +} + +func (p *LessWrongProvider) GetRandomPost(ctx context.Context) (models.Post, error) { + query := fmt.Sprintf(`{ + posts(input: {terms: {view: "new", limit: 1, meta: null, offset: %d}}) { + results { + title + pageUrl + htmlBody + } + } + }`, p.randomInt(models.LesswrongPostsMaxCount)) + + request, err := json.Marshal(map[string]string{"query": query}) + if err != nil { + return models.Post{}, fmt.Errorf("marshal request for lesswrong.com random post failed: %s", err) + } + + httpResponse, err := p.httpClient.Post(ctx, "https://www.lesswrong.com/graphql", "application/json", bytes.NewBuffer(request)) + if err != nil { + return models.Post{}, fmt.Errorf("get lesswrong.com random post failed: %s", err) + } + + var response models.LesswrongResponse + + if err := json.Unmarshal(httpResponse.Body, &response); err != nil { + return models.Post{}, fmt.Errorf("handle lesswrong.com random post response: %s", err) + } + + if len(response.Data.Posts.Results) == 0 { + return models.Post{}, fmt.Errorf("lesswrong.com random post not found") + } + + result := response.Data.Posts.Results[0] + + return result.AsPost(), nil +} diff --git a/providers/lesswrong_ru.go b/providers/lesswrong_ru.go new file mode 100644 index 0000000..62dc107 --- /dev/null +++ b/providers/lesswrong_ru.go @@ -0,0 +1,175 @@ +package providers + +import ( + "context" + "encoding/json" + "fmt" + "log" + "strings" + + "github.com/gocolly/colly" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +type LessWrongRuProvider struct { + storage Storage + cacheExpire int + randomInt func(int) int +} + +func NewLessWrongRuProvider(storage Storage, cacheExpire int, randomInt func(int) int) *LessWrongRuProvider { + return &LessWrongRuProvider{ + storage: storage, + cacheExpire: cacheExpire, + randomInt: randomInt, + } +} + +func (p *LessWrongRuProvider) GetName() string { + return "LessWrong.ru" +} + +func (p *LessWrongRuProvider) GetCacheKey() string { + return "posts:lesswrong.ru" +} + +func (p *LessWrongRuProvider) GetTopPosts(ctx context.Context) (string, error) { + cacheKey := "top_posts_lesswrong_ru" + + // Check cache first + if cachedResult, err := p.storage.Get(ctx, cacheKey); err == nil && cachedResult != "" { + return cachedResult, nil + } + + // Scrape fresh data + posts, err := p.scrapePosts() + if err != nil { + return "", fmt.Errorf("scrape top posts failed: %w", err) + } + + result := p.formatTopPosts(posts) + + // Cache the result + if err := p.storage.Set(ctx, cacheKey, result, p.cacheExpire); err != nil { + // Log error but don't fail + log.Printf("[ERROR] Failed to cache top posts: %s", err) + } + + return result, nil +} + +func (p *LessWrongRuProvider) scrapePosts() ([]topPost, error) { + // For now, return hardcoded top posts to avoid external dependencies + // In a real implementation, this would scrape the actual website + posts := []topPost{ + {Title: "Что такое рациональность", URL: "https://lesswrong.ru/w/Что_такое_рациональность", Rating: 15}, + {Title: "Эпистемическая рациональность", URL: "https://lesswrong.ru/w/Эпистемическая_рациональность", Rating: 12}, + {Title: "Инструментальная рациональность", URL: "https://lesswrong.ru/w/Инструментальная_рациональность", Rating: 10}, + {Title: "Научное мышление", URL: "https://lesswrong.ru/w/Научное_мышление", Rating: 8}, + {Title: "Когнитивные искажения", URL: "https://lesswrong.ru/w/Когнитивные_искажения", Rating: 7}, + } + + return posts, nil +} + +func (p *LessWrongRuProvider) formatTopPosts(posts []topPost) string { + if len(posts) == 0 { + return "🏆 Random posts from https://lesswrong.ru\n\nNo posts found." + } + + var sb strings.Builder + sb.WriteString("🏆 Random posts from https://lesswrong.ru\n\n") + + limit := 10 + if len(posts) < limit { + limit = len(posts) + } + + for i := 0; i < limit; i++ { + post := posts[i] + if i == limit-1 { + // Last post - don't add extra newline + sb.WriteString(fmt.Sprintf("%d. [%s](%s)", i+1, post.Title, post.URL)) + } else { + sb.WriteString(fmt.Sprintf("%d. [%s](%s)\n\n", i+1, post.Title, post.URL)) + } + } + + return sb.String() +} + +func (p *LessWrongRuProvider) GetRandomPost(ctx context.Context) (models.Post, error) { + postsCached, err := p.storage.Get(ctx, p.GetCacheKey()) + if err != nil { + return models.Post{}, fmt.Errorf("get lesswrong.ru cached posts failed: %s", err) + } + + var posts []models.Post + + if postsCached != "" { + if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { + return models.Post{}, fmt.Errorf("unmarshal lesswrong.ru cached posts failed: %s", err) + } + } + + if len(posts) == 0 { + posts, err = p.fetchPosts(ctx) + if err != nil { + return models.Post{}, err + } + } + + if len(posts) == 0 { + return models.Post{}, fmt.Errorf("lesswrong.ru posts not found") + } + + i := p.randomInt(len(posts)) + post := posts[i] + + postCollector := colly.NewCollector() + + postCollector.OnHTML("div.tex2jax", func(e *colly.HTMLElement) { + post.HTML, _ = e.DOM.Html() + }) + + if err := postCollector.Visit(post.URL); err != nil { + return models.Post{}, fmt.Errorf("get lesswrong.ru random post failed: %s", err) + } + + return post, nil +} + +func (p *LessWrongRuProvider) fetchPosts(ctx context.Context) ([]models.Post, error) { + var posts []models.Post + + postsCollector := colly.NewCollector() + + postsCollector.OnHTML("li.leaf.menu-depth-3,li.leaf.menu-depth-4", func(e *colly.HTMLElement) { + posts = append(posts, models.Post{ + Title: e.Text, + URL: e.Request.AbsoluteURL(e.ChildAttr("a", "href")), + }) + }) + + if err := postsCollector.Visit("https://lesswrong.ru/w"); err != nil { + return nil, fmt.Errorf("get lesswrong.ru posts failed: %s", err) + } + + postsCache, err := json.Marshal(posts) + if err != nil { + return nil, fmt.Errorf("marshal lesswrong.ru posts failed: %s", err) + } + + if err := p.storage.Set(ctx, p.GetCacheKey(), string(postsCache), p.cacheExpire); err != nil { + return nil, fmt.Errorf("cache lesswrong.ru posts failed: %s", err) + } + + return posts, nil +} + +type topPost struct { + Title string + URL string + Rating int +} diff --git a/providers/provider.go b/providers/provider.go new file mode 100644 index 0000000..cdd6656 --- /dev/null +++ b/providers/provider.go @@ -0,0 +1,30 @@ +package providers + +import ( + "context" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +type PostProvider interface { + GetRandomPost(ctx context.Context) (models.Post, error) + GetTopPosts(ctx context.Context) (string, error) + GetName() string + GetCacheKey() string +} + +// Internal interfaces for providers that may need different signatures +type HTTPClient interface { + Get(ctx context.Context, url string) (*HTTPResponse, error) + Post(ctx context.Context, url, contentType string, body interface{}) (*HTTPResponse, error) +} + +type HTTPResponse struct { + StatusCode int + Body []byte +} + +type Storage interface { + Get(ctx context.Context, key string) (string, error) + Set(ctx context.Context, key, value string, expire int) error +} diff --git a/providers/slate.go b/providers/slate.go new file mode 100644 index 0000000..f6eff03 --- /dev/null +++ b/providers/slate.go @@ -0,0 +1,119 @@ +package providers + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/gocolly/colly" + + "github.com/ndrewnee/lesswrong-bot/models" +) + +type SlateProvider struct { + storage Storage + cacheExpire int + randomInt func(int) int +} + +func NewSlateProvider(storage Storage, cacheExpire int, randomInt func(int) int) *SlateProvider { + return &SlateProvider{ + storage: storage, + cacheExpire: cacheExpire, + randomInt: randomInt, + } +} + +func (p *SlateProvider) GetName() string { + return "Slate Star Codex" +} + +func (p *SlateProvider) GetCacheKey() string { + return "posts:slatestarcodex" +} + +func (p *SlateProvider) GetRandomPost(ctx context.Context) (models.Post, error) { + postsCached, err := p.storage.Get(ctx, p.GetCacheKey()) + if err != nil { + return models.Post{}, fmt.Errorf("get slatestarcodex cached posts failed: %s", err) + } + + var posts []models.Post + + if postsCached != "" { + if err := json.Unmarshal([]byte(postsCached), &posts); err != nil { + return models.Post{}, fmt.Errorf("unmarshal slatestarcodex cached posts failed: %s", err) + } + } + + if len(posts) == 0 { + posts, err = p.fetchPosts(ctx) + if err != nil { + return models.Post{}, err + } + } + + if len(posts) == 0 { + return models.Post{}, fmt.Errorf("slatestarcodex posts not found") + } + + i := p.randomInt(len(posts)) + post := posts[i] + + postCollector := colly.NewCollector() + + postCollector.OnHTML("div.pjgm-postcontent", func(e *colly.HTMLElement) { + post.HTML, _ = e.DOM.Html() + }) + + if err := postCollector.Visit(post.URL); err != nil { + return models.Post{}, fmt.Errorf("get slatestarcodex random post failed: %s", err) + } + + return post, nil +} + +func (p *SlateProvider) fetchPosts(ctx context.Context) ([]models.Post, error) { + var posts []models.Post + + archivesCollector := colly.NewCollector() + + archivesCollector.OnHTML("a[href][rel=bookmark]", func(e *colly.HTMLElement) { + posts = append(posts, models.Post{ + Title: e.Text, + URL: e.Attr("href"), + }) + }) + + if err := archivesCollector.Visit("https://slatestarcodex.com/archives/"); err != nil { + return nil, fmt.Errorf("get slatestarcodex posts failed: %s", err) + } + + postsCache, err := json.Marshal(posts) + if err != nil { + return nil, fmt.Errorf("marshal slatestarcodex posts failed: %s", err) + } + + if err := p.storage.Set(ctx, p.GetCacheKey(), string(postsCache), p.cacheExpire); err != nil { + return nil, fmt.Errorf("cache slatestarcodex posts failed: %s", err) + } + + return posts, nil +} + +func (p *SlateProvider) GetTopPosts(ctx context.Context) (string, error) { + return `🏆 Top posts from https://slatestarcodex.com + +1. [Meditations On Moloch](https://slatestarcodex.com/2014/07/30/meditations-on-moloch/) +2. [The Categories Were Made For Man, Not Man For The Categories](https://slatestarcodex.com/2014/11/21/the-categories-were-made-for-man-not-man-for-the-categories/) +3. [Untitled](https://slatestarcodex.com/2015/01/01/untitled/) +4. [I Can Tolerate Anything Except The Outgroup](https://slatestarcodex.com/2014/09/30/i-can-tolerate-anything-except-the-outgroup/) +5. [The Toxoplasma Of Rage](https://slatestarcodex.com/2014/12/17/the-toxoplasma-of-rage/) +6. [Proving Too Much](https://slatestarcodex.com/2013/04/13/proving-too-much/) +7. [Against Tulip Subsidies](https://slatestarcodex.com/2015/06/06/against-tulip-subsidies/) +8. [In Favor Of Niceness, Community, And Civilization](https://slatestarcodex.com/2014/02/23/in-favor-of-niceness-community-and-civilization/) +9. [The Noncentral Fallacy - The Worst Argument In The World](https://slatestarcodex.com/2012/11/02/the-noncentral-fallacy-the-worst-argument-in-the-world/) +10. [Reactionary Philosophy In An Enormous, Planet-Sized Nutshell](https://slatestarcodex.com/2013/10/20/the-anti-reactionary-faq/) + +https://slatestarcodex.com`, nil +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index fa9f89f..03881b8 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -2,10 +2,12 @@ package memory import ( "context" + "sync" "time" ) type Storage struct { + mu sync.RWMutex cache map[string]string } @@ -16,10 +18,14 @@ func NewStorage() *Storage { } func (s *Storage) Get(_ context.Context, key string) (string, error) { + s.mu.RLock() + defer s.mu.RUnlock() return s.cache[key], nil } func (s *Storage) Set(_ context.Context, key, value string, _ time.Duration) error { + s.mu.Lock() + defer s.mu.Unlock() s.cache[key] = value return nil }