---

2026-06-24 16:12:16 +02:00
commit 9fed7b2cbc
19 changed files with 2968 additions and 0 deletions
@@ -0,0 +1 @@
 .env
@@ -0,0 +1,5 @@
 OPENAI_API_KEY=
 PINECONE_API_KEY=
 PINECONE_INDEX=
 PINECONE_NAMESPACE=
 API_PORT=
@@ -0,0 +1,4 @@
 node_modules
 .env
 .env.production
 redeploy.sh
@@ -0,0 +1,15 @@
 FROM node:latest
 WORKDIR /usr/src/app
 COPY package.json .
 COPY package-lock.json .
 RUN npm install
 COPY . .
 COPY .env.production .env
 ENV NODE_ENV production
 EXPOSE 4000
 CMD ["npm", "run", "start"]
@@ -0,0 +1 @@
 This backend server doesn't store any user data. Data sent to the server is only used temporarily to fetch the related news articles. Chat content might be logged by OpenAI, which is not the creator of this GPT.
@@ -0,0 +1 @@
 # Backend for FoxGPT 🦊
@@ -0,0 +1,65 @@
 openapi: 3.1.0
 info:
  title: Fox News API for GPT
  description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
  version: 1.0.0
 servers:
  - url: https://foxgpt-backend.elliot-at-zuri.ch
    description: Server to query Fox News articles
 paths:
  /query:
    post:
      operationId: getArticles
      summary: Get newest news articles from Fox News.
      description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                keywords:
                  type: array
                  items:
                    type: string
                  description: The keywords to search for relevant news articles.
                topK:
                  type: integer
                  default: 1
                  description: The number of top articles to retrieve.
      responses:
        '200':
          description: A list of news articles.
          content:
            application/json:
              schema:
                type: object
                properties:
                  matches:
                    type: array
                    items:
                      type: object
                      properties:
                        headline:
                          type: string
                          description: Title of the news article.
                        articleBody:
                          type: string
                          description: Content of the news article.
                        datePublished:
                          type: string
                          description: The time the article was published.
                        dateModified:
                          type: string
                          description: The time the article was last modified.
                        description:
                          type: string
                          description: A short summary of the news article.
                        url:
                          type: string
                          description: The URL at which the article is published.
        '400':
          description: Invalid request
        '500':
          description: Internal server error
@@ -0,0 +1,13 @@
 declare global {
  namespace NodeJS {
    interface ProcessEnv {
      OPENAI_API_KEY: string;
      PINECONE_API_KEY: string;
      PINECONE_INDEX: string;
      PINECONE_NAMESPACE: string;
      API_PORT: string;
    }
  }
 }
 export {}
@@ -0,0 +1,13 @@
 #!/bin/bash
 docker build --no-cache -t elliotathelsinki/foxgpt:latest .
 docker push elliotathelsinki/foxgpt:latest
 ssh -i /home/elliot/.ssh/id_rsa ubuntu@elliot-at-zuri.ch "
  sudo docker pull elliotathelsinki/foxgpt:latest && 
  sudo dokku apps:create foxgpt &&
  sudo dokku domains:set foxgpt foxgpt-backend.elliot-at-zuri.ch &&
  sudo dokku ports:set foxgpt http:80:4000 &&
  sudo dokku letsencrypt:enable foxgpt &&
  sudo dokku git:from-image foxgpt elliotathelsinki/foxgpt:latest && 
  sudo dokku ps:rebuild foxgpt
 "
@@ -0,0 +1 @@
 FoxGPT is your go-to news host from Fox News, designed to keep you informed about the latest events and stories. With access to the latest news articles through the getArticles action, FoxGPT provides timely updates and answers your questions with detailed summaries of the latest news. Whether you're looking for a quick headline, an in-depth summary, or need specific information about current events, FoxGPT is here to deliver reliable and up-to-date news, keeping the tone professional, informative, and engaging. FoxGPT can also retrieve and summarize the latest articles for you upon request. FoxGPT should automatically correct any spelling or grammatical error in the retrieved articles. FoxGPT should start the conversation by asking the user what they would like to know about.
@@ -0,0 +1,30 @@
 {
  "name": "foxgpt-backend",
  "type": "module",
  "scripts": {
    "dev": "tsx watch --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
    "start": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/index.ts",
    "fetch": "tsx --env-file=.env --require reflect-metadata --require tsconfig-paths/register --require dotenv-safe/config src/fetch.ts",
    "env:generate": "gen-env-types .env -o env.d.ts -e ."
  },
  "dependencies": {
    "@langchain/openai": "^0.2.6",
    "@pinecone-database/pinecone": "^3.0.0",
    "cheerio": "^1.0.0",
    "dotenv-safe": "^9.1.0",
    "express": "^4.19.2",
    "node-cron": "^3.0.3",
    "reflect-metadata": "^0.2.2",
    "tsconfig-paths": "^4.2.0",
    "tsx": "^4.17.0",
    "uuid": "^10.0.0"
  },
  "devDependencies": {
    "@types/dotenv-safe": "^8.1.6",
    "@types/express": "^4.17.21",
    "@types/node": "^20.14.10",
    "@types/node-cron": "^3.0.11",
    "gen-env-types": "^1.3.4",
    "typescript": "^5.5.3"
  }
 }
@@ -0,0 +1 @@
 export const __prod__ = process.env.NODE_ENV ==='production'
@@ -0,0 +1,82 @@
 import * as cheerio from 'cheerio'
 import { embeddings, ns, pc } from '@/src/libs'
 import * as fs from 'fs/promises'
 import path, { dirname } from 'path'
 import { fileURLToPath } from 'url'
 const __filename = fileURLToPath(import.meta.url)
 const __dirname = dirname(__filename)
 const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
 const $ = cheerio.load(htmlContent)
 const articleURLs: string[] = []
 $('article a').each((_, element) => {
  let url = $(element).attr('href')
  if (url && url.startsWith('/')) {
    // Ensure it is a full URL, Fox News uses relative URLs
    url = `https://www.foxnews.com${url}`
    if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
      articleURLs.push(url)
    }
  }
 })
 const objects: { url: string, json: string }[] = []
 const contentStrings: string[] = []
 for (let i = 0;i < articleURLs.length;i++) {
  const url = articleURLs[i]
  console.log(`Fetching ${url}`)
  const html = await (await fetch(url)).text()
  const $ = cheerio.load(html)
  const scriptTags = $('script[type="application/ld+json"]')
  let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
  scriptTags.each((_, element) => {
    const jsonScriptTag = $(element).html()
    if (jsonScriptTag) {
      try {
        const jsonData = JSON.parse(jsonScriptTag)
        if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
          extractedData = {
            headline: jsonData.headline,
            articleBody: jsonData.articleBody,
            datePublished: jsonData.datePublished,
            dateModified: jsonData.dateModified,
            description: jsonData.description
          }
          contentStrings.push(JSON.stringify(extractedData))
          objects.push({ json: JSON.stringify(extractedData), url })
        }
      } catch (error) {
        console.error('Failed to parse JSON:', error)
      }
    }
  })
 }
 const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
 await fs.writeFile(path.join(__dirname, './embeddings.json'), JSON.stringify(vectorEmbeddings))
 const indexes = (await pc.listIndexes()).indexes
 if (!indexes || !indexes.find(i => i.name == process.env.PINECONE_INDEX)) {
  console.log(await pc.createIndex({
    name: process.env.PINECONE_INDEX,
    metric: 'cosine',
    dimension: 1536,
    spec: {
      serverless: {
        cloud: 'aws',
        region: 'us-east-1'
      }
    }
  }))
 }
 await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
@@ -0,0 +1,119 @@
 import { __prod__ } from '@/src/constants'
 import { embeddings, ns } from '@/src/libs'
 import * as cheerio from 'cheerio'
 import express from 'express'
 import http from 'http'
 import cron from 'node-cron'
 const app = express()
 http.createServer(app)
 app.use(express.json())
 app.listen(parseInt(process.env.API_PORT), () => {
  if (!__prod__) {
    console.log(`Server started on localhost:${process.env.API_PORT}.`)
  }
  else {
    console.log(`Server started at ${process.env.BACKEND_ORIGIN}.`)
  }
 })
 app.get('/', (_, res) => {
  res.send('Welcome to Express.')
 })
 app.post('/query', async (req, res) => {
  let keywords = []
  if (!req.body.keywords || req.body.keywords.length == 0) {
    keywords.push('Latest news')
  }
  else {
    keywords = req.body.keywords
  }
  const embedding = (await embeddings.embedQuery(JSON.stringify(keywords)))
  const result = await ns.query({
    topK: req.body.topK,
    vector: embedding,
    includeValues: true,
    includeMetadata: true
  })
  const payloads = result.matches.map(m => {
    const url = m.id
    const obj = JSON.parse(m.metadata?.payload as string)
    return { ...obj, url }
  })
  res.json(payloads)
 })
 cron.schedule('0 0 * * *', async () => {
  const htmlContent = await (await fetch('https://www.foxnews.com/world')).text()
  const $ = cheerio.load(htmlContent)
  const articleURLs: string[] = []
  $('article a').each((_, element) => {
    let url = $(element).attr('href')
    if (url && url.startsWith('/')) {
      url = `https://www.foxnews.com${url}`
      if (!url.startsWith('https://www.foxnews.com/video/') && !articleURLs.includes(url)) {
        articleURLs.push(url)
      }
    }
  })
  const newArticleURLs = []
  for (let i = 0;i < articleURLs.length;i++) {
    const url = articleURLs[i]
    const result = await ns.query({ id: url, topK: 1 })
    if (result.matches.length == 0) {
      newArticleURLs.push(url)
    }
  }
  const objects: { url: string, json: string }[] = []
  const contentStrings: string[] = []
  for (let i = 0;i < newArticleURLs.length;i++) {
    const url = newArticleURLs[i]
    console.log(`Fetching ${url}`)
    const html = await (await fetch(url)).text()
    const $ = cheerio.load(html)
    const scriptTags = $('script[type="application/ld+json"]')
    let extractedData: { headline?: string, articleBody?: string, datePublished?: string, dateModified?: string, description?: string } = {}
    scriptTags.each((_, element) => {
      const jsonScriptTag = $(element).html()
      if (jsonScriptTag) {
        try {
          const jsonData = JSON.parse(jsonScriptTag)
          if (jsonData.headline && jsonData.articleBody && jsonData.datePublished && jsonData.dateModified && jsonData.description) {
            extractedData = {
              headline: jsonData.headline,
              articleBody: jsonData.articleBody,
              datePublished: jsonData.datePublished,
              dateModified: jsonData.dateModified,
              description: jsonData.description
            }
            contentStrings.push(JSON.stringify(extractedData))
            objects.push({ json: JSON.stringify(extractedData), url })
          }
        } catch (error) {
          console.error('Failed to parse JSON:', error)
        }
      }
    })
  }
  const vectorEmbeddings = await embeddings.embedDocuments(contentStrings)
  await ns.upsert(objects.map((o, i, _) => ({ id: o.url, values: vectorEmbeddings[i], metadata: { payload: o.json } })))
 })
@@ -0,0 +1,14 @@
 import { OpenAIEmbeddings } from '@langchain/openai'
 import { Pinecone } from '@pinecone-database/pinecone'
 export const embeddings = new OpenAIEmbeddings({
  apiKey: process.env.OPENAI_API_KEY,
  model: 'text-embedding-3-small',
  dimensions: 1536
 })
 export const pc = new Pinecone({
  apiKey: process.env.PINECONE_API_KEY
 })
 export const ns = pc.index(process.env.PINECONE_INDEX).namespace(process.env.PINECONE_NAMESPACE)
@@ -0,0 +1,65 @@
 openapi: 3.1.0
 info:
  title: Fox News API for GPT
  description: This API allows the GPT to retrieve the latest Fox News articles stored in a vector database.
  version: 1.0.0
 servers:
  - url: https://foxgpt-backend.elliot-at-zuri.ch
    description: Server to query Fox News articles
 paths:
  /query:
    post:
      operationId: getArticles
      summary: Get newest news articles from Fox News.
      description: This endpoint retrieves relevant news articles based on keywords to retrieve the top `k` similar items.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                keywords:
                  type: array
                  items:
                    type: string
                  description: The keywords to search for relevant news articles.
                topK:
                  type: integer
                  default: 1
                  description: The number of top articles to retrieve.
      responses:
        "200":
          description: A list of news articles.
          content:
            application/json:
              schema:
                type: object
                properties:
                  matches:
                    type: array
                    items:
                      type: object
                      properties:
                        headline:
                          type: string
                          description: Title of the news article.
                        articleBody:
                          type: string
                          description: Content of the news article.
                        datePublished:
                          type: string
                          description: The time the article was published.
                        dateModified:
                          type: string
                          description: The time the article was last modified.
                        description:
                          type: string
                          description: A short summary of the news article.
                        url:
                          type: string
                          description: The URL at which the article is published.
        "400":
          description: Invalid request
        "500":
          description: Internal server error
@@ -0,0 +1,39 @@
 {
  "compilerOptions": {
    "target": "ESNext",
    "module": "ESNext",
    "lib": [
      "ESNext"
    ],
    "skipLibCheck": true,
    "sourceMap": true,
    "moduleResolution": "node",
    "removeComments": true,
    "noImplicitAny": true,
    "strictNullChecks": true,
    "strictFunctionTypes": true,
    "noImplicitThis": true,
    "noUnusedLocals": false,
    "noUnusedParameters": false,
    "noImplicitReturns": true,
    "noFallthroughCasesInSwitch": true,
    "allowSyntheticDefaultImports": true,
    "esModuleInterop": true,
    "emitDecoratorMetadata": true,
    "experimentalDecorators": true,
    "resolveJsonModule": true,
    "baseUrl": ".",
    "paths": {
      "@/*": [
        "./*"
      ]
    }
  },
  "exclude": [
    "node_modules"
  ],
  "include": [
    "env.d.ts",
    "./src/**/*.ts"
  ]
 }
		`@@ -0,0 +1 @@`
							`This backend server doesn't store any user data. Data sent to the server is only used temporarily to fetch the related news articles. Chat content might be logged by OpenAI, which is not the creator of this GPT.`
		`@@ -0,0 +1 @@`
							FoxGPT is your go-to news host from Fox News, designed to keep you informed about the latest events and stories. With access to the latest news articles through the getArticles action, FoxGPT provides timely updates and answers your questions with detailed summaries of the latest news. Whether you're looking for a quick headline, an in-depth summary, or need specific information about current events, FoxGPT is here to deliver reliable and up-to-date news, keeping the tone professional, informative, and engaging. FoxGPT can also retrieve and summarize the latest articles for you upon request. FoxGPT should automatically correct any spelling or grammatical error in the retrieved articles. FoxGPT should start the conversation by asking the user what they would like to know about.
		`@@ -0,0 +1 @@`
							`export const __prod__ = process.env.NODE_ENV ==='production'`