diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 32e07bc..ea9a8a7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -48,3 +48,59 @@ jobs:
- name: Run Go tests
run: go test -v ./feedfetcher/...
+
+ test-go-integration:
+ runs-on: ubuntu-latest
+
+ services:
+ redis:
+ image: redis:7-alpine
+ ports:
+ - 6379:6379
+ options: >-
+ --health-cmd "redis-cli ping"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ minio:
+ image: minio/minio:latest
+ ports:
+ - 9000:9000
+ env:
+ MINIO_ROOT_USER: minioadmin
+ MINIO_ROOT_PASSWORD: minioadmin
+ options: >-
+ --health-cmd "curl -f http://localhost:9000/minio/health/live"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Go
+ uses: actions/setup-go@v4
+ with:
+ go-version: '1.23'
+
+ - name: Install MinIO Client
+ run: |
+ wget https://dl.min.io/client/mc/release/linux-amd64/mc
+ chmod +x mc
+ sudo mv mc /usr/local/bin/
+
+ - name: Configure MinIO
+ run: |
+ mc alias set local http://localhost:9000 minioadmin minioadmin
+ mc mb local/feedreader2018-articles || true
+
+ - name: Run integration tests
+ env:
+ REDIS_HOST: localhost
+ REDIS_PORT: 6379
+ S3_ENDPOINT: http://localhost:9000
+ S3_ACCESS_KEY: minioadmin
+ S3_SECRET_KEY: minioadmin
+ S3_BUCKET: feedreader2018-articles
+ run: go test -v -tags=integration ./feedfetcher/...
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..53db908
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
+version: '3.8'
+
+services:
+ redis:
+ image: redis:7-alpine
+ ports:
+ - "6379:6379"
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 5s
+ timeout: 3s
+ retries: 5
+
+ minio:
+ image: minio/minio:latest
+ ports:
+ - "9000:9000"
+ - "9001:9001"
+ environment:
+ MINIO_ROOT_USER: minioadmin
+ MINIO_ROOT_PASSWORD: minioadmin
+ command: server /data --console-address ":9001"
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+ interval: 5s
+ timeout: 3s
+ retries: 5
+ volumes:
+ - minio-data:/data
+
+volumes:
+ minio-data:
diff --git a/feedfetcher/fetcher.go b/feedfetcher/fetcher.go
new file mode 100644
index 0000000..0fe3a86
--- /dev/null
+++ b/feedfetcher/fetcher.go
@@ -0,0 +1,220 @@
+package feedfetcher
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "strconv"
+
+ "github.com/aws/aws-sdk-go-v2/aws"
+ "github.com/aws/aws-sdk-go-v2/service/s3"
+ "github.com/go-redis/redis/v8"
+ "github.com/mmcdole/gofeed"
+)
+
+type Fetcher struct {
+ redisClient *redis.Client
+ s3Client *s3.Client
+ httpClient *http.Client
+ s3Bucket string
+}
+
+type FeedResponse struct {
+ Success bool `json:"success"`
+ Title string `json:"title,omitempty"`
+ Link string `json:"link,omitempty"`
+ LastModified string `json:"lastModified,omitempty"`
+ Etag string `json:"etag,omitempty"`
+ Articles []string `json:"articles"`
+ StatusCode int `json:"statusCode,omitempty"`
+ StatusMessage string `json:"statusMessage,omitempty"`
+}
+
+func NewFetcher(redisClient *redis.Client, s3Client *s3.Client, s3Bucket string) *Fetcher {
+ return &Fetcher{
+ redisClient: redisClient,
+ s3Client: s3Client,
+ httpClient: &http.Client{},
+ s3Bucket: s3Bucket,
+ }
+}
+
+func (f *Fetcher) FetchFeed(ctx context.Context, feedURI string) (*FeedResponse, error) {
+ keys := BuildRedisKeys(feedURI)
+
+ // Fetch stored feed metadata from Redis
+ storedFeed, err := f.redisClient.HGetAll(ctx, keys.FeedKey).Result()
+ if err != nil && err != redis.Nil {
+ return nil, fmt.Errorf("failed to get stored feed: %w", err)
+ }
+
+ // Build request headers with cached etag/lastModified
+ headers := BuildRequestHeaders(storedFeed["lastModified"], storedFeed["etag"])
+
+ // Fetch the feed
+ req, err := http.NewRequestWithContext(ctx, "GET", feedURI, nil)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request: %w", err)
+ }
+
+ for key, value := range headers {
+ req.Header.Set(key, value)
+ }
+
+ resp, err := f.httpClient.Do(req)
+ if err != nil {
+ return &FeedResponse{
+ Success: false,
+ StatusMessage: err.Error(),
+ }, nil
+ }
+ defer resp.Body.Close()
+
+ // Handle HTTP status codes
+ if resp.StatusCode == http.StatusNotModified {
+ // Feed not modified, return cached articles
+ articles, err := f.getArticleIds(ctx, keys.ArticlesKey)
+ if err != nil {
+ return nil, err
+ }
+ return &FeedResponse{
+ Success: true,
+ Title: storedFeed["title"],
+ Link: storedFeed["link"],
+ LastModified: storedFeed["lastModified"],
+ Etag: storedFeed["etag"],
+ Articles: articles,
+ StatusCode: resp.StatusCode,
+ }, nil
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return &FeedResponse{
+ Success: false,
+ StatusCode: resp.StatusCode,
+ StatusMessage: resp.Status,
+ }, nil
+ }
+
+ // Parse the feed
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read response body: %w", err)
+ }
+
+ fp := gofeed.NewParser()
+ feed, err := fp.ParseString(string(body))
+ if err != nil {
+ return &FeedResponse{
+ Success: false,
+ StatusMessage: fmt.Sprintf("failed to parse feed: %v", err),
+ }, nil
+ }
+
+ // Store feed metadata in Redis
+ lastModified := resp.Header.Get("Last-Modified")
+ etag := resp.Header.Get("Etag")
+
+ feedMeta := map[string]interface{}{
+ "title": feed.Title,
+ "link": feed.Link,
+ "lastModified": lastModified,
+ "etag": etag,
+ }
+
+ if err := f.redisClient.HMSet(ctx, keys.FeedKey, feedMeta).Err(); err != nil {
+ return nil, fmt.Errorf("failed to store feed metadata: %w", err)
+ }
+
+ // Process articles
+ for _, item := range feed.Items {
+ article := Article{
+ GUID: item.GUID,
+ Title: item.Title,
+ Description: item.Description,
+ }
+
+ // Use published date or updated date
+ if item.PublishedParsed != nil {
+ article.PubDate = item.PublishedParsed.Format("2006-01-02T15:04:05Z07:00")
+ } else if item.UpdatedParsed != nil {
+ article.PubDate = item.UpdatedParsed.Format("2006-01-02T15:04:05Z07:00")
+ }
+
+ // Validate article
+ if !IsValidArticle(&article) {
+ continue
+ }
+
+ // Process article
+ processedArticle := ProcessArticle(article, feedURI)
+ articleKey := BuildArticleKey(processedArticle.Hash)
+
+ // Get old score from Redis
+ oldScoreStr, err := f.redisClient.ZScore(ctx, keys.ArticlesKey, articleKey).Result()
+ var oldScore *string
+ if err == nil {
+ scoreStr := strconv.FormatInt(int64(oldScoreStr), 10)
+ oldScore = &scoreStr
+ }
+
+ // Add article to sorted set
+ err = f.redisClient.ZAdd(ctx, keys.ArticlesKey, &redis.Z{
+ Score: float64(processedArticle.Score),
+ Member: articleKey,
+ }).Err()
+ if err != nil {
+ return nil, fmt.Errorf("failed to add article to sorted set: %w", err)
+ }
+
+ // Store article in S3 if score changed
+ if ShouldStoreArticle(oldScore, processedArticle.Score) {
+ if err := f.storeArticleInS3(ctx, processedArticle); err != nil {
+ return nil, fmt.Errorf("failed to store article in S3: %w", err)
+ }
+ }
+ }
+
+ // Get all article IDs
+ articles, err := f.getArticleIds(ctx, keys.ArticlesKey)
+ if err != nil {
+ return nil, err
+ }
+
+ return &FeedResponse{
+ Success: true,
+ Title: feed.Title,
+ Link: feed.Link,
+ LastModified: lastModified,
+ Etag: etag,
+ Articles: articles,
+ StatusCode: resp.StatusCode,
+ }, nil
+}
+
+func (f *Fetcher) storeArticleInS3(ctx context.Context, article Article) error {
+ body, err := GenerateArticleBody(article)
+ if err != nil {
+ return err
+ }
+
+ _, err = f.s3Client.PutObject(ctx, &s3.PutObjectInput{
+ Bucket: aws.String(f.s3Bucket),
+ Key: aws.String(article.Hash + ".json"),
+ Body: bytes.NewReader([]byte(body)),
+ ContentType: aws.String("application/json"),
+ })
+
+ return err
+}
+
+func (f *Fetcher) getArticleIds(ctx context.Context, articlesKey string) ([]string, error) {
+ allArticles, err := f.redisClient.ZRevRange(ctx, articlesKey, 0, -1).Result()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get articles from sorted set: %w", err)
+ }
+
+ return ExtractArticleIds(allArticles), nil
+}
diff --git a/feedfetcher/fetcher_integration_test.go b/feedfetcher/fetcher_integration_test.go
new file mode 100644
index 0000000..eb39088
--- /dev/null
+++ b/feedfetcher/fetcher_integration_test.go
@@ -0,0 +1,362 @@
+//go:build integration
+// +build integration
+
+package feedfetcher
+
+import (
+ "context"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+
+ "github.com/aws/aws-sdk-go-v2/aws"
+ "github.com/aws/aws-sdk-go-v2/config"
+ "github.com/aws/aws-sdk-go-v2/credentials"
+ "github.com/aws/aws-sdk-go-v2/service/s3"
+ "github.com/go-redis/redis/v8"
+ "gopkg.in/yaml.v3"
+)
+
+type FetchFeedTestCases struct {
+ FeedGetTests []struct {
+ Description string `yaml:"description"`
+ FeedFixture string `yaml:"feed_fixture"`
+ FeedURI string `yaml:"feed_uri"`
+ ExpectedFeedMetadata struct {
+ Title string `yaml:"title"`
+ Link string `yaml:"link"`
+ } `yaml:"expected_feed_metadata"`
+ ExpectedArticlesCount int `yaml:"expected_articles_count"`
+ ExpectedArticles []struct {
+ GUID string `yaml:"guid"`
+ Title string `yaml:"title"`
+ Hash string `yaml:"hash"`
+ Score int64 `yaml:"score"`
+ FeedURL string `yaml:"feedurl"`
+ } `yaml:"expected_articles"`
+ CachingTest *struct {
+ ResponseHeaders struct {
+ LastModified string `yaml:"last_modified"`
+ Etag string `yaml:"etag"`
+ } `yaml:"response_headers"`
+ ExpectedArticlesCount int `yaml:"expected_articles_count"`
+ ShouldReturn304OnSecondRequest bool `yaml:"should_return_304_on_second_request"`
+ } `yaml:"caching_test"`
+ } `yaml:"feed_get_tests"`
+}
+
+func setupRedisClient() (*redis.Client, error) {
+ host := os.Getenv("REDIS_HOST")
+ if host == "" {
+ host = "localhost"
+ }
+ port := os.Getenv("REDIS_PORT")
+ if port == "" {
+ port = "6379"
+ }
+
+ client := redis.NewClient(&redis.Options{
+ Addr: fmt.Sprintf("%s:%s", host, port),
+ })
+
+ ctx := context.Background()
+ if err := client.Ping(ctx).Err(); err != nil {
+ return nil, fmt.Errorf("failed to connect to Redis: %w", err)
+ }
+
+ return client, nil
+}
+
+func setupS3Client() (*s3.Client, error) {
+ endpoint := os.Getenv("S3_ENDPOINT")
+ if endpoint == "" {
+ endpoint = "http://localhost:9000"
+ }
+
+ accessKey := os.Getenv("S3_ACCESS_KEY")
+ if accessKey == "" {
+ accessKey = "minioadmin"
+ }
+
+ secretKey := os.Getenv("S3_SECRET_KEY")
+ if secretKey == "" {
+ secretKey = "minioadmin"
+ }
+
+ cfg, err := config.LoadDefaultConfig(context.Background(),
+ config.WithRegion("us-east-1"),
+ config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, "")),
+ )
+ if err != nil {
+ return nil, fmt.Errorf("failed to load AWS config: %w", err)
+ }
+
+ client := s3.NewFromConfig(cfg, func(o *s3.Options) {
+ o.BaseEndpoint = aws.String(endpoint)
+ o.UsePathStyle = true
+ })
+
+ return client, nil
+}
+
+func TestFetchFeedIntegration(t *testing.T) {
+ // Load test cases
+ data, err := os.ReadFile("../testdata/feed-get-tests.yaml")
+ if err != nil {
+ t.Fatalf("Failed to read test data: %v", err)
+ }
+
+ var testCases FetchFeedTestCases
+ if err := yaml.Unmarshal(data, &testCases); err != nil {
+ t.Fatalf("Failed to parse test data: %v", err)
+ }
+
+ // Setup Redis and S3 clients
+ redisClient, err := setupRedisClient()
+ if err != nil {
+ t.Fatalf("Failed to setup Redis client: %v", err)
+ }
+ defer redisClient.Close()
+
+ s3Client, err := setupS3Client()
+ if err != nil {
+ t.Fatalf("Failed to setup S3 client: %v", err)
+ }
+
+ bucket := os.Getenv("S3_BUCKET")
+ if bucket == "" {
+ bucket = "feedreader2018-articles"
+ }
+
+ // Run tests
+ for _, tc := range testCases.FeedGetTests {
+ t.Run(tc.Description, func(t *testing.T) {
+ ctx := context.Background()
+
+ // Clear Redis data for this test
+ keys := BuildRedisKeys(tc.FeedURI)
+ redisClient.Del(ctx, keys.FeedKey, keys.ArticlesKey)
+
+ // Load feed fixture
+ feedData, err := os.ReadFile(tc.FeedFixture)
+ if err != nil {
+ t.Fatalf("Failed to read feed fixture: %v", err)
+ }
+
+ // Create test HTTP server to serve the feed
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/xml")
+ w.Write(feedData)
+ }))
+ defer server.Close()
+
+ // Create Fetcher and fetch the feed
+ fetcher := NewFetcher(redisClient, s3Client, bucket)
+ result, err := fetcher.FetchFeed(ctx, server.URL)
+ if err != nil {
+ t.Fatalf("Fetcher.FetchFeed() failed: %v", err)
+ }
+
+ // Verify success
+ if !result.Success {
+ t.Errorf("Expected success=true, got success=%v, message=%s", result.Success, result.StatusMessage)
+ }
+
+ // Verify feed metadata
+ if tc.ExpectedFeedMetadata.Title != "" {
+ if result.Title != tc.ExpectedFeedMetadata.Title {
+ t.Errorf("Title mismatch: got %s, want %s", result.Title, tc.ExpectedFeedMetadata.Title)
+ }
+ }
+
+ if tc.ExpectedFeedMetadata.Link != "" {
+ if result.Link != tc.ExpectedFeedMetadata.Link {
+ t.Errorf("Link mismatch: got %s, want %s", result.Link, tc.ExpectedFeedMetadata.Link)
+ }
+ }
+
+ // Verify article count
+ if len(result.Articles) != tc.ExpectedArticlesCount {
+ t.Errorf("Article count mismatch: got %d, want %d", len(result.Articles), tc.ExpectedArticlesCount)
+ }
+
+ // Verify specific articles if provided
+ for _, expectedArticle := range tc.ExpectedArticles {
+ articleKey := BuildArticleKey(expectedArticle.Hash)
+ found := false
+ for _, article := range result.Articles {
+ if article == expectedArticle.Hash {
+ found = true
+ break
+ }
+ }
+
+ if !found {
+ t.Errorf("Expected article %s not found in results", expectedArticle.Hash)
+ }
+
+ // Verify article is in Redis sorted set
+ score, err := redisClient.ZScore(ctx, keys.ArticlesKey, articleKey).Result()
+ if err != nil {
+ t.Errorf("Article %s not found in Redis sorted set: %v", articleKey, err)
+ } else if int64(score) != expectedArticle.Score {
+ t.Errorf("Article score mismatch: got %d, want %d", int64(score), expectedArticle.Score)
+ }
+
+ // Verify article is in S3
+ _, err = s3Client.HeadObject(ctx, &s3.HeadObjectInput{
+ Bucket: aws.String(bucket),
+ Key: aws.String(expectedArticle.Hash + ".json"),
+ })
+ if err != nil {
+ t.Errorf("Article %s not found in S3: %v", expectedArticle.Hash, err)
+ }
+ }
+ })
+ }
+}
+
+func TestFetchFeedCaching(t *testing.T) {
+ ctx := context.Background()
+
+ // Load test cases
+ data, err := os.ReadFile("../testdata/feed-get-tests.yaml")
+ if err != nil {
+ t.Fatalf("Failed to read test data: %v", err)
+ }
+
+ var testCases FetchFeedTestCases
+ if err := yaml.Unmarshal(data, &testCases); err != nil {
+ t.Fatalf("Failed to parse test data: %v", err)
+ }
+
+ // Find the caching test case
+ var cachingTest *struct {
+ Description string `yaml:"description"`
+ FeedFixture string `yaml:"feed_fixture"`
+ FeedURI string `yaml:"feed_uri"`
+ ExpectedFeedMetadata struct {
+ Title string `yaml:"title"`
+ Link string `yaml:"link"`
+ } `yaml:"expected_feed_metadata"`
+ ExpectedArticlesCount int `yaml:"expected_articles_count"`
+ ExpectedArticles []struct {
+ GUID string `yaml:"guid"`
+ Title string `yaml:"title"`
+ Hash string `yaml:"hash"`
+ Score int64 `yaml:"score"`
+ FeedURL string `yaml:"feedurl"`
+ } `yaml:"expected_articles"`
+ CachingTest *struct {
+ ResponseHeaders struct {
+ LastModified string `yaml:"last_modified"`
+ Etag string `yaml:"etag"`
+ } `yaml:"response_headers"`
+ ExpectedArticlesCount int `yaml:"expected_articles_count"`
+ ShouldReturn304OnSecondRequest bool `yaml:"should_return_304_on_second_request"`
+ } `yaml:"caching_test"`
+ }
+
+ for _, tc := range testCases.FeedGetTests {
+ if tc.CachingTest != nil {
+ cachingTest = &tc
+ break
+ }
+ }
+
+ if cachingTest == nil {
+ t.Skip("No caching test case found in YAML")
+ }
+
+ // Setup clients
+ redisClient, err := setupRedisClient()
+ if err != nil {
+ t.Fatalf("Failed to setup Redis client: %v", err)
+ }
+ defer redisClient.Close()
+
+ s3Client, err := setupS3Client()
+ if err != nil {
+ t.Fatalf("Failed to setup S3 client: %v", err)
+ }
+
+ bucket := os.Getenv("S3_BUCKET")
+ if bucket == "" {
+ bucket = "feedreader2018-articles"
+ }
+
+ // Load feed fixture
+ feedData, err := os.ReadFile(cachingTest.FeedFixture)
+ if err != nil {
+ t.Fatalf("Failed to read feed fixture: %v", err)
+ }
+
+ requestCount := 0
+ lastModified := cachingTest.CachingTest.ResponseHeaders.LastModified
+ etag := cachingTest.CachingTest.ResponseHeaders.Etag
+
+ // Create test server
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ requestCount++
+
+ // On second request, check for caching headers and return 304
+ if requestCount > 1 {
+ if r.Header.Get("If-Modified-Since") != lastModified {
+ t.Errorf("Expected If-Modified-Since header: %s, got: %s", lastModified, r.Header.Get("If-Modified-Since"))
+ }
+ if r.Header.Get("If-None-Match") != etag {
+ t.Errorf("Expected If-None-Match header: %s, got: %s", etag, r.Header.Get("If-None-Match"))
+ }
+ w.WriteHeader(http.StatusNotModified)
+ return
+ }
+
+ // First request returns full feed
+ w.Header().Set("Content-Type", "application/xml")
+ w.Header().Set("Last-Modified", lastModified)
+ w.Header().Set("Etag", etag)
+ w.Write(feedData)
+ }))
+ defer server.Close()
+
+ fetcher := NewFetcher(redisClient, s3Client, bucket)
+
+ // Clear Redis
+ keys := BuildRedisKeys(server.URL)
+ redisClient.Del(ctx, keys.FeedKey, keys.ArticlesKey)
+
+ // First request
+ result1, err := fetcher.FetchFeed(ctx, server.URL)
+ if err != nil {
+ t.Fatalf("First request failed: %v", err)
+ }
+
+ if !result1.Success {
+ t.Errorf("First request should succeed")
+ }
+
+ expectedCount := cachingTest.CachingTest.ExpectedArticlesCount
+ if len(result1.Articles) != expectedCount {
+ t.Errorf("Expected %d articles, got %d", expectedCount, len(result1.Articles))
+ }
+
+ // Second request (should use caching)
+ result2, err := fetcher.FetchFeed(ctx, server.URL)
+ if err != nil {
+ t.Fatalf("Second request failed: %v", err)
+ }
+
+ if !result2.Success {
+ t.Errorf("Second request should succeed")
+ }
+
+ if result2.StatusCode != http.StatusNotModified {
+ t.Errorf("Expected 304 Not Modified, got %d", result2.StatusCode)
+ }
+
+ if len(result2.Articles) != expectedCount {
+ t.Errorf("Expected %d articles from cache, got %d", expectedCount, len(result2.Articles))
+ }
+}
diff --git a/go.mod b/go.mod
index e83a97a..12d968e 100644
--- a/go.mod
+++ b/go.mod
@@ -3,3 +3,36 @@ module github.com/feedreaderco/api
go 1.25.2
require gopkg.in/yaml.v3 v3.0.1
+
+require (
+ github.com/PuerkitoBio/goquery v1.8.0 // indirect
+ github.com/andybalholm/cascadia v1.3.1 // indirect
+ github.com/aws/aws-sdk-go-v2 v1.39.2 // indirect
+ github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 // indirect
+ github.com/aws/aws-sdk-go-v2/config v1.31.12 // indirect
+ github.com/aws/aws-sdk-go-v2/credentials v1.18.16 // indirect
+ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9 // indirect
+ github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9 // indirect
+ github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9 // indirect
+ github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
+ github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9 // indirect
+ github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect
+ github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0 // indirect
+ github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9 // indirect
+ github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9 // indirect
+ github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4 // indirect
+ github.com/aws/aws-sdk-go-v2/service/sso v1.29.6 // indirect
+ github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1 // indirect
+ github.com/aws/aws-sdk-go-v2/service/sts v1.38.6 // indirect
+ github.com/aws/smithy-go v1.23.0 // indirect
+ github.com/cespare/xxhash/v2 v2.1.2 // indirect
+ github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+ github.com/go-redis/redis/v8 v8.11.5 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mmcdole/gofeed v1.3.0 // indirect
+ github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.2 // indirect
+ golang.org/x/net v0.4.0 // indirect
+ golang.org/x/text v0.5.0 // indirect
+)
diff --git a/go.sum b/go.sum
index a62c313..95f8a69 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,76 @@
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/aws/aws-sdk-go-v2 v1.39.2 h1:EJLg8IdbzgeD7xgvZ+I8M1e0fL0ptn/M47lianzth0I=
+github.com/aws/aws-sdk-go-v2 v1.39.2/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 h1:i8p8P4diljCr60PpJp6qZXNlgX4m2yQFpYk+9ZT+J4E=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1/go.mod h1:ddqbooRZYNoJ2dsTwOty16rM+/Aqmk/GOXrK8cg7V00=
+github.com/aws/aws-sdk-go-v2/config v1.31.12 h1:pYM1Qgy0dKZLHX2cXslNacbcEFMkDMl+Bcj5ROuS6p8=
+github.com/aws/aws-sdk-go-v2/config v1.31.12/go.mod h1:/MM0dyD7KSDPR+39p9ZNVKaHDLb9qnfDurvVS2KAhN8=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.16 h1:4JHirI4zp958zC026Sm+V4pSDwW4pwLefKrc0bF2lwI=
+github.com/aws/aws-sdk-go-v2/credentials v1.18.16/go.mod h1:qQMtGx9OSw7ty1yLclzLxXCRbrkjWAM7JnObZjmCB7I=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9 h1:Mv4Bc0mWmv6oDuSWTKnk+wgeqPL5DRFu5bQL9BGPQ8Y=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9/go.mod h1:IKlKfRppK2a1y0gy1yH6zD+yX5uplJ6UuPlgd48dJiQ=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9 h1:se2vOWGD3dWQUtfn4wEjRQJb1HK1XsNIt825gskZ970=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9/go.mod h1:hijCGH2VfbZQxqCDN7bwz/4dzxV+hkyhjawAtdPWKZA=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9 h1:6RBnKZLkJM4hQ+kN6E7yWFveOTg8NLPHAkqrs4ZPlTU=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9/go.mod h1:V9rQKRmK7AWuEsOMnHzKj8WyrIir1yUJbZxDuZLFvXI=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9 h1:w9LnHqTq8MEdlnyhV4Bwfizd65lfNCNgdlNC6mM5paE=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9/go.mod h1:LGEP6EK4nj+bwWNdrvX/FnDTFowdBNwcSPuZu/ouFys=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0 h1:X0FveUndcZ3lKbSpIC6rMYGRiQTcUVRNH6X4yYtIrlU=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0/go.mod h1:IWjQYlqw4EX9jw2g3qnEPPWvCE6bS8fKzhMed1OK7c8=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9 h1:5r34CgVOD4WZudeEKZ9/iKpiT6cM1JyEROpXjOcdWv8=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9/go.mod h1:dB12CEbNWPbzO2uC6QSWHteqOg4JfBVJOojbAoAUb5I=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9 h1:wuZ5uW2uhJR63zwNlqWH2W4aL4ZjeJP3o92/W+odDY4=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9/go.mod h1:/G58M2fGszCrOzvJUkDdY8O9kycodunH4VdT5oBAqls=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4 h1:mUI3b885qJgfqKDUSj6RgbRqLdX0wGmg8ruM03zNfQA=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4/go.mod h1:6v8ukAxc7z4x4oBjGUsLnH7KGLY9Uhcgij19UJNkiMg=
+github.com/aws/aws-sdk-go-v2/service/sso v1.29.6 h1:A1oRkiSQOWstGh61y4Wc/yQ04sqrQZr1Si/oAXj20/s=
+github.com/aws/aws-sdk-go-v2/service/sso v1.29.6/go.mod h1:5PfYspyCU5Vw1wNPsxi15LZovOnULudOQuVxphSflQA=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1 h1:5fm5RTONng73/QA73LhCNR7UT9RpFH3hR6HWL6bIgVY=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1/go.mod h1:xBEjWD13h+6nq+z4AkqSfSvqRKFgDIQeaMguAJndOWo=
+github.com/aws/aws-sdk-go-v2/service/sts v1.38.6 h1:p3jIvqYwUZgu/XYeI48bJxOhvm47hZb5HUQ0tn6Q9kA=
+github.com/aws/aws-sdk-go-v2/service/sts v1.38.6/go.mod h1:WtKK+ppze5yKPkZ0XwqIVWD4beCwv056ZbPQNoeHqM8=
+github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE=
+github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
+github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
+github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI=
+github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4=
+github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU=
+golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/src/lib/feeds.integration.test.js b/src/lib/feeds.integration.test.js
new file mode 100644
index 0000000..c29f31f
--- /dev/null
+++ b/src/lib/feeds.integration.test.js
@@ -0,0 +1,339 @@
+// Integration tests for feed fetching
+// Run with: INTEGRATION=true node src/lib/feeds.integration.test.js
+// Requires Redis and MinIO to be running (use docker-compose up)
+
+const fs = require('fs');
+const http = require('http');
+const yaml = require('js-yaml');
+const redis = require('redis');
+const AWS = require('aws-sdk');
+
+// Exit if not running in integration mode
+if (process.env.INTEGRATION !== 'true') {
+ console.log('Skipping integration tests (set INTEGRATION=true to run)');
+ process.exit(0);
+}
+
+// Load test cases from YAML
+const testCasesYaml = fs.readFileSync('./testdata/feed-get-tests.yaml', 'utf8');
+const testCases = yaml.load(testCasesYaml);
+
+// Setup Redis client
+const redisHost = process.env.REDIS_HOST || 'localhost';
+const redisPort = process.env.REDIS_PORT || '6379';
+const redisClient = redis.createClient({
+ host: redisHost,
+ port: redisPort,
+});
+
+// Setup S3 client (MinIO)
+const s3Endpoint = process.env.S3_ENDPOINT || 'http://localhost:9000';
+const s3AccessKey = process.env.S3_ACCESS_KEY || 'minioadmin';
+const s3SecretKey = process.env.S3_SECRET_KEY || 'minioadmin';
+const s3Bucket = process.env.S3_BUCKET || 'feedreader2018-articles';
+
+AWS.config.update({
+ accessKeyId: s3AccessKey,
+ secretAccessKey: s3SecretKey,
+ s3ForcePathStyle: true,
+ signatureVersion: 'v4',
+});
+
+const s3 = new AWS.S3({
+ endpoint: s3Endpoint,
+ params: { Bucket: s3Bucket },
+});
+
+// Import feed utilities
+const { buildRedisKeys, buildArticleKey } = require('./feedUtils.js');
+
+// Simple test runner
+let passed = 0;
+let failed = 0;
+let testServer = null;
+
+function test(name, fn) {
+ return new Promise((resolve) => {
+ fn()
+ .then(() => {
+ passed++;
+ console.log(`✓ ${name}`);
+ resolve();
+ })
+ .catch((error) => {
+ failed++;
+ console.error(`✗ ${name}`);
+ console.error(` ${error.message}`);
+ if (error.stack) {
+ console.error(` ${error.stack}`);
+ }
+ resolve();
+ });
+ });
+}
+
+// Helper to call feed.get with mock req/res
+function callFeedGet(feedURI) {
+ return new Promise((resolve, reject) => {
+ // Mock Express request object
+ const req = {
+ url: '/api/feed/' + encodeURIComponent(feedURI),
+ };
+
+ // Mock Express response object
+ let responseData = null;
+ let statusCode = 200;
+
+ const res = {
+ json: (data) => {
+ responseData = data;
+ resolve(responseData);
+ },
+ status: (code) => {
+ statusCode = code;
+ return res; // Allow chaining
+ },
+ };
+
+ // Import feeds.js and call feed.get
+ // We need to set up the module with our Redis client
+ const feedsModule = require('../feeds.js');
+
+ // Call the feed.get function
+ try {
+ feedsModule.feed.get(req, res);
+ } catch (error) {
+ reject(error);
+ }
+ });
+}
+
+// Helper to clear Redis keys
+function clearRedisKeys(feedURI, callback) {
+ const { feedKey, articlesKey } = buildRedisKeys(feedURI);
+ redisClient.del([feedKey, articlesKey], callback);
+}
+
+// Run all integration tests
+async function runTests() {
+ console.log('\n=== Testing Feed Fetching (Integration) ===\n');
+
+ // Test: Fetch and process Atom feed
+ const atomTest = testCases.feed_get_tests.find(tc => tc.description.includes('Atom'));
+ if (atomTest) {
+ await test(atomTest.description, () => {
+ return new Promise((resolve, reject) => {
+ // Clear Redis
+ clearRedisKeys('http://localhost:8888/xkcd', (clearErr) => {
+ if (clearErr) return reject(clearErr);
+
+ // Read feed fixture
+ const feedData = fs.readFileSync(atomTest.feed_fixture, 'utf8');
+
+ // Create test HTTP server
+ testServer = http.createServer((req, res) => {
+ res.writeHead(200, { 'Content-Type': 'application/xml' });
+ res.end(feedData);
+ });
+
+ testServer.listen(8888, async () => {
+ try {
+ const result = await callFeedGet('http://localhost:8888/xkcd');
+
+ // Verify success
+ if (!result.success) {
+ throw new Error(`Expected success=true, got ${result.success}`);
+ }
+
+ // Verify feed metadata
+ if (result.title !== atomTest.expected_feed_metadata.title) {
+ throw new Error(`Title mismatch: got ${result.title}, want ${atomTest.expected_feed_metadata.title}`);
+ }
+
+ if (result.link !== atomTest.expected_feed_metadata.link) {
+ throw new Error(`Link mismatch: got ${result.link}, want ${atomTest.expected_feed_metadata.link}`);
+ }
+
+ // Verify article count
+ if (result.articles.length !== atomTest.expected_articles_count) {
+ throw new Error(`Article count mismatch: got ${result.articles.length}, want ${atomTest.expected_articles_count}`);
+ }
+
+ // Verify specific articles
+ for (const expectedArticle of atomTest.expected_articles) {
+ if (!result.articles.includes(expectedArticle.hash)) {
+ throw new Error(`Expected article ${expectedArticle.hash} not found`);
+ }
+
+ // Verify article is in Redis
+ const { articlesKey } = buildRedisKeys('http://localhost:8888/xkcd');
+ const articleKey = buildArticleKey(expectedArticle.hash);
+
+ await new Promise((res, rej) => {
+ redisClient.zscore(articlesKey, articleKey, (err, score) => {
+ if (err) return rej(err);
+ if (parseInt(score) !== expectedArticle.score) {
+ return rej(new Error(`Score mismatch: got ${score}, want ${expectedArticle.score}`));
+ }
+ res();
+ });
+ });
+
+ // Verify article is in S3
+ await new Promise((res, rej) => {
+ s3.headObject({ Key: expectedArticle.hash + '.json' }, (err) => {
+ if (err) return rej(new Error(`Article ${expectedArticle.hash} not found in S3`));
+ res();
+ });
+ });
+ }
+
+ testServer.close();
+ resolve();
+ } catch (error) {
+ testServer.close();
+ reject(error);
+ }
+ });
+ });
+ });
+ });
+ }
+
+ // Test: Fetch and process RSS feed
+ const rssTest = testCases.feed_get_tests.find(tc => tc.description.includes('RSS'));
+ if (rssTest) {
+ await test(rssTest.description, () => {
+ return new Promise((resolve, reject) => {
+ clearRedisKeys('http://localhost:8889/hn', (clearErr) => {
+ if (clearErr) return reject(clearErr);
+
+ const feedData = fs.readFileSync(rssTest.feed_fixture, 'utf8');
+
+ testServer = http.createServer((req, res) => {
+ res.writeHead(200, { 'Content-Type': 'application/xml' });
+ res.end(feedData);
+ });
+
+ testServer.listen(8889, async () => {
+ try {
+ const result = await callFeedGet('http://localhost:8889/hn');
+
+ if (!result.success) {
+ throw new Error(`Expected success=true, got ${result.success}`);
+ }
+
+ if (result.title !== rssTest.expected_feed_metadata.title) {
+ throw new Error(`Title mismatch: got ${result.title}, want ${rssTest.expected_feed_metadata.title}`);
+ }
+
+ if (result.articles.length !== rssTest.expected_articles_count) {
+ throw new Error(`Article count mismatch: got ${result.articles.length}, want ${rssTest.expected_articles_count}`);
+ }
+
+ testServer.close();
+ resolve();
+ } catch (error) {
+ testServer.close();
+ reject(error);
+ }
+ });
+ });
+ });
+ });
+ }
+
+ // Test: HTTP 304 caching
+ const cachingTest = testCases.feed_get_tests.find(tc => tc.caching_test);
+ if (cachingTest) {
+ await test(cachingTest.description, () => {
+ return new Promise((resolve, reject) => {
+ clearRedisKeys('http://localhost:8890/cache-test', (clearErr) => {
+ if (clearErr) return reject(clearErr);
+
+ const feedData = fs.readFileSync(cachingTest.feed_fixture, 'utf8');
+ const { last_modified, etag } = cachingTest.caching_test.response_headers;
+ const expectedCount = cachingTest.caching_test.expected_articles_count;
+
+ let requestCount = 0;
+
+ testServer = http.createServer((req, res) => {
+ requestCount++;
+
+ // Second request should get 304
+ if (requestCount > 1) {
+ if (req.headers['if-modified-since'] !== last_modified) {
+ testServer.close();
+ return reject(new Error(`Expected If-Modified-Since: ${last_modified}, got ${req.headers['if-modified-since']}`));
+ }
+ if (req.headers['if-none-match'] !== etag) {
+ testServer.close();
+ return reject(new Error(`Expected If-None-Match: ${etag}, got ${req.headers['if-none-match']}`));
+ }
+ res.writeHead(304);
+ res.end();
+ return;
+ }
+
+ // First request returns full feed
+ res.writeHead(200, {
+ 'Content-Type': 'application/xml',
+ 'Last-Modified': last_modified,
+ 'Etag': etag,
+ });
+ res.end(feedData);
+ });
+
+ testServer.listen(8890, async () => {
+ try {
+ // First request
+ const result1 = await callFeedGet('http://localhost:8890/cache-test');
+
+ if (!result1.success) {
+ throw new Error('First request should succeed');
+ }
+
+ if (result1.articles.length !== expectedCount) {
+ throw new Error(`Expected ${expectedCount} articles, got ${result1.articles.length}`);
+ }
+
+ // Second request (should use cache and get 304)
+ const result2 = await callFeedGet('http://localhost:8890/cache-test');
+
+ if (!result2.success) {
+ throw new Error('Second request should succeed');
+ }
+
+ if (result2.articles.length !== expectedCount) {
+ throw new Error(`Expected ${expectedCount} cached articles, got ${result2.articles.length}`);
+ }
+
+ testServer.close();
+ resolve();
+ } catch (error) {
+ testServer.close();
+ reject(error);
+ }
+ });
+ });
+ });
+ });
+ }
+
+ // Print summary
+ console.log(`\n=== Test Summary ===`);
+ console.log(`Passed: ${passed}`);
+ console.log(`Failed: ${failed}`);
+ console.log(`Total: ${passed + failed}\n`);
+
+ redisClient.quit();
+ process.exit(failed > 0 ? 1 : 0);
+}
+
+// Run tests
+runTests().catch((error) => {
+ console.error('Test runner error:', error);
+ if (testServer) testServer.close();
+ redisClient.quit();
+ process.exit(1);
+});
diff --git a/testdata/feed-get-tests.yaml b/testdata/feed-get-tests.yaml
new file mode 100644
index 0000000..0c71d07
--- /dev/null
+++ b/testdata/feed-get-tests.yaml
@@ -0,0 +1,92 @@
+# Test cases for feed fetching and processing
+# These tests use real feed fixtures and verify Redis/S3 storage
+
+feed_get_tests:
+ - description: "Fetch and process Atom feed (XKCD)"
+ feed_fixture: "testdata/feeds/xkcd.atom.xml"
+ feed_uri: "https://xkcd.com/atom.xml"
+ expected_feed_metadata:
+ title: "xkcd.com"
+ link: "https://xkcd.com/"
+ expected_articles_count: 3
+ expected_articles:
+ - guid: "https://xkcd.com/3153/"
+ title: "Test Comic 1"
+ hash: "13a0bebeed5b348147d880a1a4917587"
+ score: 1728518400000
+ feedurl: "https://xkcd.com/atom.xml"
+ - guid: "https://xkcd.com/3152/"
+ title: "Test Comic 2"
+ hash: "21664da7ee05988c62d1f516f3442411"
+ score: 1728345600000
+ feedurl: "https://xkcd.com/atom.xml"
+ - guid: "https://xkcd.com/3151/"
+ title: "Test Comic 3"
+ hash: "3fa08ba1591ba3683e87265ee9300946"
+ score: 1728172800000
+ feedurl: "https://xkcd.com/atom.xml"
+
+ - description: "Fetch and process RSS feed (Hacker News)"
+ feed_fixture: "testdata/feeds/hn.rss.xml"
+ feed_uri: "https://news.ycombinator.com/rss"
+ expected_feed_metadata:
+ title: "Hacker News"
+ link: "https://news.ycombinator.com/"
+ expected_articles_count: 3
+ expected_articles:
+ - guid: "https://news.ycombinator.com/item?id=12345"
+ title: "Show HN: My Project"
+ hash: "e6d98eb69fa44b3807ce21bea815869b"
+ score: 1728475200000
+ feedurl: "https://news.ycombinator.com/rss"
+ - guid: "https://news.ycombinator.com/item?id=12346"
+ title: "Ask HN: What are you working on?"
+ hash: "11eeb3caf6e5906bc165618b0f00575f"
+ score: 1728471600000
+ feedurl: "https://news.ycombinator.com/rss"
+ - guid: "https://news.ycombinator.com/item?id=12347"
+ title: "New JavaScript Framework Released"
+ hash: "73b3dc5e5cb0b970a6eaf06b29b96c4a"
+ score: 1728468000000
+ feedurl: "https://news.ycombinator.com/rss"
+
+ - description: "Skip invalid articles without guid"
+ feed_fixture: "testdata/feeds/invalid.xml"
+ feed_uri: "https://example.com/invalid.xml"
+ expected_articles_count: 0
+
+ - description: "Use cached feed metadata on subsequent fetch"
+ feed_fixture: "testdata/feeds/xkcd.atom.xml"
+ feed_uri: "https://xkcd.com/atom.xml"
+ cached_feed:
+ lastModified: "Wed, 09 Oct 2024 12:00:00 GMT"
+ etag: "\"abc123\""
+ expected_request_headers:
+ If-Modified-Since: "Wed, 09 Oct 2024 12:00:00 GMT"
+ If-None-Match: "\"abc123\""
+
+ - description: "Update article score when changed"
+ feed_fixture: "testdata/feeds/xkcd.atom.xml"
+ feed_uri: "https://xkcd.com/atom.xml"
+ existing_articles:
+ - hash: "13a0bebeed5b348147d880a1a4917587"
+ score: 1728432000000 # Different score
+ should_update_s3: true
+
+ - description: "Skip S3 update when score unchanged"
+ feed_fixture: "testdata/feeds/xkcd.atom.xml"
+ feed_uri: "https://xkcd.com/atom.xml"
+ existing_articles:
+ - hash: "13a0bebeed5b348147d880a1a4917587"
+ score: 1728518400000 # Same score
+ should_update_s3: false
+
+ - description: "HTTP 304 caching with lastModified and etag"
+ feed_fixture: "testdata/feeds/xkcd.atom.xml"
+ feed_uri: "https://xkcd.com/atom.xml"
+ caching_test:
+ response_headers:
+ last_modified: "Wed, 09 Oct 2024 12:00:00 GMT"
+ etag: "\"test-etag-123\""
+ expected_articles_count: 3
+ should_return_304_on_second_request: true
diff --git a/testdata/feeds/hn.rss.xml b/testdata/feeds/hn.rss.xml
new file mode 100644
index 0000000..20aa879
--- /dev/null
+++ b/testdata/feeds/hn.rss.xml
@@ -0,0 +1,33 @@
+
+
+
+ Hacker News
+ https://news.ycombinator.com/
+ Links for the intellectually curious, ranked by readers.
+ Wed, 09 Oct 2024 12:00:00 GMT
+
+ -
+ Show HN: My Project
+ https://news.ycombinator.com/item?id=12345
+ https://news.ycombinator.com/item?id=12345
+ Wed, 09 Oct 2024 12:00:00 GMT
+ A description of my project
+
+
+ -
+ Ask HN: What are you working on?
+ https://news.ycombinator.com/item?id=12346
+ https://news.ycombinator.com/item?id=12346
+ Wed, 09 Oct 2024 11:00:00 GMT
+ Monthly thread about projects
+
+
+ -
+ New JavaScript Framework Released
+ https://news.ycombinator.com/item?id=12347
+ https://news.ycombinator.com/item?id=12347
+ Wed, 09 Oct 2024 10:00:00 GMT
+ Yet another JavaScript framework
+
+
+
diff --git a/testdata/feeds/invalid.xml b/testdata/feeds/invalid.xml
new file mode 100644
index 0000000..fb5319e
--- /dev/null
+++ b/testdata/feeds/invalid.xml
@@ -0,0 +1,23 @@
+
+
+ Invalid Feed
+
+ https://example.com/
+ 2024-10-10T00:00:00Z
+
+
+ Article without GUID
+
+ 2024-10-10T00:00:00Z
+
+ This article has no GUID and should be skipped
+
+
+
+
+ Article without description
+
+ 2024-10-10T00:00:00Z
+ https://example.com/article2
+
+
diff --git a/testdata/feeds/xkcd.atom.xml b/testdata/feeds/xkcd.atom.xml
new file mode 100644
index 0000000..39b918b
--- /dev/null
+++ b/testdata/feeds/xkcd.atom.xml
@@ -0,0 +1,31 @@
+
+
+ xkcd.com
+
+ https://xkcd.com/
+ 2024-10-10T00:00:00Z
+
+
+ Test Comic 1
+
+ 2024-10-10T00:00:00Z
+ https://xkcd.com/3153/
+ First test comic description
+
+
+
+ Test Comic 2
+
+ 2024-10-08T00:00:00Z
+ https://xkcd.com/3152/
+ Second test comic description
+
+
+
+ Test Comic 3
+
+ 2024-10-06T00:00:00Z
+ https://xkcd.com/3151/
+ Third test comic description
+
+