<?php
/********************************************************************
* FluxOracle Embeddings Core System
* Author: AiFlux (Тони Ангелчовски)
********************************************************************/
class FluxOracleEmbeddings
{
private $cfg;
private $db;
public function __construct()
{
$this->cfg = include __DIR__ . '/config.php';
$this->db = fluxoracle_db();
}
/****************************************************************
* GENERATE EMBEDDINGS VIA OPENAI
****************************************************************/
public function generateEmbedding($text)
{
if (empty(trim($text))) {
return null;
}
// Ограничаваме текста до 6000 символа
$text = substr($text, 0, 6000);
$payload = [
"model" => "text-embedding-3-small",
"input" => $text,
"encoding_format" => "float"
];
$ch = curl_init("https://api.openai.com/v1/embeddings");
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => [
"Content-Type: application/json",
"Authorization: Bearer " . $this->cfg['openai_api_key']
],
CURLOPT_POST => true,
CURLOPT_POSTFIELDS => json_encode($payload),
CURLOPT_TIMEOUT => 30
]);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
$this->log("[EMBEDDING] OpenAI Error: $httpCode - " . substr($response, 0, 200));
return null;
}
$data = json_decode($response, true);
if (isset($data['data'][0]['embedding'])) {
$this->log("[EMBEDDING] Generated embedding for text: " . substr($text, 0, 50) . "...");
return $data['data'][0]['embedding'];
}
$this->log("[EMBEDDING] No embedding in response");
return null;
}
/****************************************************************
* STORE EMBEDDING IN DATABASE
****************************************************************/
public function storeEmbedding($content, $sourceType = 'forum_post', $sourceId = null, $metadata = [])
{
$embedding = $this->generateEmbedding($content);
if (!$embedding) {
$this->log("[EMBEDDING] Failed to generate embedding");
return false;
}
// Сериализираме embedding вектора
$vectorBlob = serialize($embedding);
$metadataJson = json_encode($metadata);
try {
$stmt = $this->db->getPdo()->prepare("
INSERT INTO embeddings (xf_post_id, vector, source, metadata)
VALUES (?, ?, ?, ?)
ON DUPLICATE KEY UPDATE vector = ?, source = ?, metadata = ?
");
$success = $stmt->execute([
$sourceId, $vectorBlob, $sourceType, $metadataJson,
$vectorBlob, $sourceType, $metadataJson
]);
if ($success) {
$this->log("[EMBEDDING] Successfully stored embedding for $sourceType ID: $sourceId");
} else {
$this->log("[EMBEDDING] Failed to store embedding for $sourceType ID: $sourceId");
}
return $success;
} catch (Exception $e) {
$this->log("[EMBEDDING] DB Error: " . $e->getMessage());
return false;
}
}
/****************************************************************
* SEMANTIC SEARCH - FIND SIMILAR CONTENT
****************************************************************/
public function semanticSearch($query, $limit = 5, $minSimilarity = 0.7)
{
$queryEmbedding = $this->generateEmbedding($query);
if (!$queryEmbedding) {
$this->log("[SEMANTIC] Failed to generate query embedding");
return [];
}
// Вземаме всички embeddings от базата
try {
$stmt = $this->db->pdo->prepare("
SELECT id, xf_post_id, vector, source, metadata
FROM embeddings
ORDER BY id DESC
LIMIT 1000 -- Ограничаваме за производителност
");
$stmt->execute();
$allEmbeddings = $stmt->fetchAll();
} catch (Exception $e) {
$this->log("[SEMANTIC] DB Error: " . $e->getMessage());
return [];
}
$results = [];
foreach ($allEmbeddings as $item) {
$storedEmbedding = unserialize($item['vector']);
if (!$storedEmbedding) continue;
// Изчисляваме косинусово подобие
$similarity = $this->cosineSimilarity($queryEmbedding, $storedEmbedding);
if ($similarity >= $minSimilarity) {
$results[] = [
'post_id' => $item['xf_post_id'],
'similarity' => $similarity,
'source' => $item['source'],
'metadata' => json_decode($item['metadata'] ?? '{}', true)
];
}
}
// Сортираме по подобие (най-високо първо)
usort($results, function($a, $b) {
return $b['similarity'] <=> $a['similarity'];
});
// Връщаме топ резултати
return array_slice($results, 0, $limit);
}
/****************************************************************
* COSINE SIMILARITY CALCULATION
****************************************************************/
private function cosineSimilarity($vecA, $vecB)
{
if (count($vecA) !== count($vecB)) {
return 0;
}
$dotProduct = 0;
$normA = 0;
$normB = 0;
for ($i = 0; $i < count($vecA); $i++) {
$dotProduct += $vecA[$i] * $vecB[$i];
$normA += $vecA[$i] * $vecA[$i];
$normB += $vecB[$i] * $vecB[$i];
}
if ($normA == 0 || $normB == 0) {
return 0;
}
return $dotProduct / (sqrt($normA) * sqrt($normB));
}
/****************************************************************
* GET RELATED CONTEXT FOR GPT
****************************************************************/
public function getSemanticContext($query, $maxResults = 3)
{
$similarPosts = $this->semanticSearch($query, $maxResults);
if (empty($similarPosts)) {
return "";
}
$context = "\n\n[СВЪРЗАНИ ДИСКУСИИ ОТ АРХИВА]";
foreach ($similarPosts as $post) {
$similarityPercent = round($post['similarity'] * 100, 1);
// Опитваме се да вземем съдържанието на поста
$postContent = $this->getPostContentForContext($post['post_id']);
if ($postContent) {
$context .= "\n\n🔍 **Сходство {$similarityPercent}%**:\n";
$context .= "```\n" . $postContent . "\n```";
}
}
return $context . "\n";
}
/****************************************************************
* GET POST CONTENT FOR CONTEXT
****************************************************************/
private function getPostContentForContext($postId)
{
// Тук може да се имплементира вземане на съдържание от:
// 1. XenForo API
// 2. Локална cache таблица
// 3. Предварително запазени данни
// За сега връщаме само ID-то
return "Пост #$postId от архива (сходно съдържание)";
}
/****************************************************************
* BATCH PROCESSING FOR EXISTING CONTENT
****************************************************************/
public function processExistingContent($contentItems, $batchSize = 10)
{
$processed = 0;
$failed = 0;
foreach ($contentItems as $item) {
$success = $this->storeEmbedding(
$item['content'],
$item['source_type'] ?? 'forum_post',
$item['source_id'] ?? null,
$item['metadata'] ?? []
);
if ($success) {
$processed++;
} else {
$failed++;
}
// Пауза за да не претоварим API-то
usleep(500000); // 0.5 секунда
}
return [
'processed' => $processed,
'failed' => $failed,
'total' => count($contentItems)
];
}
/****************************************************************
* LOGGING
****************************************************************/
private function log($message)
{
// Try to use fluxoracle_log if available
if (function_exists('fluxoracle_log')) {
fluxoracle_log($message);
return;
}
// Fallback: simple file logging
$logFile = __DIR__ . '/embeddings-log.txt';
file_put_contents($logFile, date('[Y-m-d H:i:s] ') . $message . "\n", FILE_APPEND);
}
}
// Helper function
function fluxoracle_embeddings()
{
static $embeddings = null;
if ($embeddings === null) {
$embeddings = new FluxOracleEmbeddings();
}
return $embeddings;
}
?>