🚀 Built my first no-code RAG pipeline for my portfolio!

mkhristi32 · September 23, 2025, 6:05am

Powered by SambaNova + Pinecone + Velatir + n8n + Lovable
Now my site has a live chatbot that answers questions about my journey.

Excited to push this further!

hashtag#AI hashtag#RAG hashtag#NoCode hashtag#Portfolio

david.keane · September 25, 2025, 6:41pm

Very awesome - thanks for sharing - for anybody interested - we build it using E5-Mistral-7B-Instruct for embeddings with dimension handling (1024d → 1536d) and Llama 4 Maverick for chat with RAG tool integration

First, we set up Supabase with the vector extension:

```-- Enable vector extension
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;

-- Store document chunks with embeddings
CREATE TABLE file_chunks (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  file_id TEXT NOT NULL REFERENCES user_files(id) ON DELETE CASCADE,
  user_id TEXT NOT NULL,
  chunk_index INTEGER NOT NULL,
  content TEXT NOT NULL,
  embedding VECTOR(1536), -- SambaNova embeddings padded to 1536d
  metadata JSONB DEFAULT '{}',
  created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

-- Indexes for fast vector similarity search
CREATE INDEX ON file_chunks USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);

-- Full-text search index
CREATE INDEX ON file_chunks USING gin (to_tsvector('english', content));

-- Fuzzy text matching
CREATE INDEX ON file_chunks USING gin (content gin_trgm_ops);

-- Add RAG status to user files
ALTER TABLE user_files ADD COLUMN rag_status TEXT DEFAULT 'pending';
ALTER TABLE user_files ADD COLUMN rag_processed_at TIMESTAMP WITH TIME ZONE;
ALTER TABLE user_files ADD COLUMN chunk_count INTEGER DEFAULT 0;
```

then built the RAG processor that handles PDF text extraction and embedding generation:

// lib/services/rag-processor.ts
import OpenAI from 'openai';
import { createClient } from '@supabase/supabase-js';
import * as pdfjsLib from 'pdfjs-dist';

export class RAGProcessor {
  private supabase: SupabaseClient;
  private sambanova: OpenAI;

  constructor() {
    this.supabase = createClient(
      process.env.NEXT_PUBLIC_SUPABASE_URL!,
      process.env.SUPABASE_SERVICE_ROLE_KEY!
    );
    
    // Use OpenAI SDK with SambaNova base URL for consistent API
    this.sambanova = new OpenAI({
      baseURL: 'https://api.sambanova.ai/v1',
      apiKey: process.env.SAMBANOVA_API_KEY!
    });
  }

  async processFile(fileId: string): Promise<void> {
    console.log(`[RAG] Processing file ${fileId}`);
    
    try {
      // Get file info from database
      const { data: file } = await this.supabase
        .from('user_files')
        .select('*')
        .eq('id', fileId)
        .single();

      if (!file) throw new Error('File not found');

      // Extract text based on file type
      let text: string;
      if (file.file_type === 'application/pdf') {
        text = await this.extractPDFText(file.file_url);
      } else {
        throw new Error(`Unsupported file type: ${file.file_type}`);
      }

      // Create chunks with overlap for better context
      const chunks = this.createChunks(text);
      console.log(`[RAG] Created ${chunks.length} chunks`);

      // Generate embeddings using SambaNova
      const embeddings = await this.generateEmbeddings(chunks);

      // Store in Supabase vector database
      await this.storeChunks(fileId, file.user_id, chunks, embeddings);

      // Update file status
      await this.updateFileStatus(fileId, 'completed', chunks.length);

    } catch (error) {
      console.error(`[RAG] Error processing file ${fileId}:`, error);
      await this.updateFileStatus(fileId, 'failed', 0, error.message);
    }
  }

  private async extractPDFText(fileUrl: string): Promise<string> {
    const response = await fetch(fileUrl);
    const arrayBuffer = await response.arrayBuffer();
    const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
    
    let fullText = '';
    for (let i = 1; i <= pdf.numPages; i++) {
      const page = await pdf.getPage(i);
      const textContent = await page.getTextContent();
      const pageText = textContent.items
        .filter(item => 'str' in item)
        .map(item => item.str)
        .join(' ');
      fullText += pageText + '\n';
    }
    return fullText;
  }

  private createChunks(text: string, chunkSize: number = 1000, overlap: number = 200): string[] {
    const chunks: string[] = [];
    const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
    
    let currentChunk = '';
    let currentSize = 0;
    
    for (const sentence of sentences) {
      const sentenceSize = sentence.length;
      
      if (currentSize + sentenceSize > chunkSize && currentChunk.trim()) {
        chunks.push(currentChunk.trim());
        
        // Create overlap by keeping last part of current chunk
        const words = currentChunk.split(' ');
        const overlapWords = words.slice(-Math.floor(overlap / 10));
        currentChunk = overlapWords.join(' ') + ' ';
        currentSize = currentChunk.length;
      }
      
      currentChunk += sentence + '. ';
      currentSize += sentenceSize + 2;
    }
    
    if (currentChunk.trim()) {
      chunks.push(currentChunk.trim());
    }
    
    return chunks;
  }

  private async generateEmbeddings(chunks: string[]): Promise<number[][]> {
    const embeddings: number[][] = [];
    const batchSize = 10;
    
    for (let i = 0; i < chunks.length; i += batchSize) {
      const batch = chunks.slice(i, i + batchSize);
      
      const response = await this.sambanova.embeddings.create({
        model: 'E5-Mistral-7B-Instruct',
        input: batch,
      });
      
      // Handle SambaNova embedding dimensions (1024d → 1536d for consistency)
      const paddedEmbeddings = response.data.map(d => {
        let embedding = d.embedding;
        
        if (embedding.length > 1536) {
          // Truncate if larger (e.g., 4096d)
          embedding = embedding.slice(0, 1536);
        } else if (embedding.length < 1536) {
          // Pad if smaller (e.g., 1024d)
          const padded = [...embedding];
          while (padded.length < 1536) {
            padded.push(0);
          }
          embedding = padded;
        }
        return embedding;
      });
      
      embeddings.push(...paddedEmbeddings);
    }
    
    console.log(`[RAG] Generated ${embeddings.length} embeddings (1536d)`);
    return embeddings;
  }

  private async storeChunks(
    fileId: string, 
    userId: string, 
    chunks: string[], 
    embeddings: number[][]
  ): Promise<void> {
    const chunkData = chunks.map((content, index) => ({
      file_id: fileId,
      user_id: userId,
      chunk_index: index,
      content,
      embedding: `[${embeddings[index].join(',')}]`, // Supabase vector format
    }));

    const { error } = await this.supabase
      .from('file_chunks')
      .insert(chunkData);

    if (error) throw error;
  }
}
```

Then implemented Hybrid Search with Semantic + Keyword Matching, our search service combines vector similarity with traditional keyword search:

// lib/services/rag-search.ts
export class RAGSearchService {
  private supabase: SupabaseClient;
  private sambanova: OpenAI;

  constructor() {
    this.supabase = createClient(
      process.env.NEXT_PUBLIC_SUPABASE_URL!,
      process.env.SUPABASE_SERVICE_ROLE_KEY!
    );
    
    this.sambanova = new OpenAI({
      baseURL: 'https://api.sambanova.ai/v1',
      apiKey: process.env.SAMBANOVA_API_KEY!
    });
  }

  async search(params: {
    query: string;
    userId: string;
    maxResults?: number;
    searchType?: 'semantic' | 'keyword' | 'hybrid';
    fileIds?: string[];
  }) {
    const { query, userId, maxResults = 10, searchType = 'hybrid', fileIds } = params;
    
    let chunks: any[] = [];
    
    if (searchType === 'semantic' || searchType === 'hybrid') {
      // Generate query embedding using SambaNova
      const embeddingResponse = await this.sambanova.embeddings.create({
        model: 'E5-Mistral-7B-Instruct',
        input: query,
      });
      
      let queryEmbedding = embeddingResponse.data[0].embedding;
      
      // Pad to 1536d if needed (same as document embeddings)
      if (queryEmbedding.length < 1536) {
        const padded = [...queryEmbedding];
        while (padded.length < 1536) {
          padded.push(0);
        }
        queryEmbedding = padded;
      }
      
      if (searchType === 'hybrid') {
        // Hybrid search: combine semantic + keyword
        chunks = await this.hybridSearchChunks(
          queryEmbedding, query, userId, maxResults, fileIds
        );
      } else {
        // Pure semantic search
        chunks = await this.semanticSearchChunks(
          queryEmbedding, userId, maxResults, fileIds
        );
      }
    } else {
      // Keyword-only search
      chunks = await this.keywordSearchChunks(query, userId, maxResults, fileIds);
    }

    // Format results with file information
    const formattedChunks = await this.formatChunks(chunks);
    const context = this.buildContext(formattedChunks, query);
    const citations = this.extractCitations(formattedChunks);

    return {
      chunks: formattedChunks,
      formattedContext: context,
      citations
    };
  }

  private async hybridSearchChunks(
    queryEmbedding: number[],
    queryText: string,
    userId: string,
    maxResults: number,
    fileIds?: string[]
  ) {
    // Use Supabase RPC function for hybrid search
    const { data, error } = await this.supabase.rpc('hybrid_search_chunks', {
      query_embedding: `[${queryEmbedding.join(',')}]`,
      query_text: queryText,
      user_id_param: userId,
      match_threshold: 0.5,
      match_count: maxResults,
      file_ids: fileIds || null
    });

    if (error) throw error;
    return data || [];
  }

  private buildContext(chunks: any[], query?: string): string {
    if (!chunks.length) return '';
    
    let context = 'Based on the uploaded documents:\n\n';
    
    chunks.forEach((chunk, index) => {
      const excerpt = query ? 
        this.extractRelevantExcerpt(chunk.content, query, 150) : 
        chunk.content.substring(0, 200) + '...';
        
      context += `**From ${chunk.filename}:**\n${excerpt}\n\n`;
    });
    
    return context;
  }
}
```

then we expose RAG as a tool that SambaNova models can call:

// lib/tools/rag-search-tool.ts
import { tool } from 'ai';
import { z } from 'zod';

export const ragSearchTool = tool({
  description: 'Search through uploaded documents to find relevant information.',
  parameters: z.object({
    query: z.string().describe('The search query'),
    search_type: z.enum(['semantic', 'keyword', 'hybrid']).default('hybrid'),
    max_results: z.number().default(10),
    file_ids: z.array(z.string()).optional().describe('Specific files to search'),
  }),
  execute: async ({ query, search_type, max_results, file_ids }) => {
    try {
      const user = await getUser();
      if (!user) {
        return {
          success: false,
          message: 'User authentication required',
          context: '',
          citations: []
        };
      }

      const results = await ragSearchService.search({
        query,
        userId: user.id,
        maxResults: max_results,
        searchType: search_type,
        fileIds: file_ids
      });

      if (results.chunks.length === 0) {
        return {
          success: false,
          message: 'The search query was too specific. Try more general terms.',
          context: '',
          citations: []
        };
      }

      return {
        success: true,
        context: results.formattedContext,
        citations: results.citations,
        chunks_found: results.chunks.length
      };

    } catch (error) {
      return {
        success: false,
        error: error.message,
        context: '',
        citations: []
      };
    }
  }
});
```

If any body is interested in more data let me know