~tsep-dev/tsep/tsep2

« back to all changes in this revision

Viewing changes to src/TSEP/Bundle/AdminBundle/Job/ProcessIndexingRequestJob.php

  • Committer: xaav
  • Date: 2011-09-27 01:31:36 UTC
  • Revision ID: git-v1:3c3f2e8d21ccd506f3cd12b2650591f6532368fb
First commit'

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?php
 
2
 
 
3
namespace TSEP\Bundle\AdminBundle\Job;
 
4
 
 
5
use Symfony\Component\DependencyInjection\Container;
 
6
use Xaav\QueueBundle\JobQueue\Job\AbstractJob;
 
7
use TSEP\Component\Indexer\Engine\CrawlingEngine;
 
8
use TSEP\Component\Indexer\Engine\IndexingEngine;
 
9
use Xaav\QueueBundle\JobQueue\Job\JobInterface;
 
10
use TSEP\Bundle\SearchBundle\Entity\Profile;
 
11
 
 
12
class ProcessIndexingRequestJob extends AbstractJob implements JobInterface
 
13
{
 
14
    protected $profile;
 
15
 
 
16
    /**
 
17
     * @var CrawlingEngine
 
18
     */
 
19
    protected $crawlingEngine;
 
20
 
 
21
    /**
 
22
     * @var IndexingEngine
 
23
     */
 
24
    protected $indexingEngine;
 
25
 
 
26
    /**
 
27
     * @var Container
 
28
     */
 
29
    protected $container;
 
30
 
 
31
    public function __construct(Profile $profile)
 
32
    {
 
33
        $this->profile = $profile;
 
34
    }
 
35
 
 
36
    public function setContainer(Container $container = null)
 
37
    {
 
38
        $this->container = $container;
 
39
    }
 
40
 
 
41
    protected function init()
 
42
    {
 
43
        $this->indexingEngine = new IndexingEngine();
 
44
        $this->crawlingEngine = new CrawlingEngine();
 
45
 
 
46
 
 
47
        //TODO: Add this to the profile
 
48
        $this->crawlingEngine->setUserAgent('The Search Engine Project version 2.0');
 
49
 
 
50
        $this->crawlingEngine->setRegularExpression($this->profile->getRegex());
 
51
        $this->crawlingEngine->addQueuedURL($this->profile->getUrl());
 
52
 
 
53
        //TODO: Add custom stopwords
 
54
        $this->indexingEngine->setStopwords(array());
 
55
    }
 
56
 
 
57
    public function process($count)
 
58
    {
 
59
        //TODO: Save state between passes
 
60
 
 
61
        $pages = array();
 
62
 
 
63
        if ($page = $this->crawlingEngine->crawl()) {
 
64
            $page = $this->indexingEngine->parse($page);
 
65
 
 
66
            $em = $this->get('doctrine')->getEntityManager();
 
67
            $em->persist($page);
 
68
            $em->flush();
 
69
 
 
70
            return true;
 
71
        }
 
72
 
 
73
        return false;
 
74
    }
 
75
}
 
 
b'\\ No newline at end of file'