1
/***************************************************************************
2
* Copyright (C) 2008 by Jacob Kanev <j_kanev@arcor.de>, *
3
* Thomas Fischer <fischer@unix-ag.uni-kl.de> *
5
* This program is free software; you can redistribute it and/or modify *
6
* it under the terms of the GNU General Public License as published by *
7
* the Free Software Foundation; either version 2 of the License, or *
8
* (at your option) any later version. *
10
* This program is distributed in the hope that it will be useful, *
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13
* GNU General Public License for more details. *
15
* You should have received a copy of the GNU General Public License *
16
* along with this program; if not, write to the *
17
* Free Software Foundation, Inc., *
18
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19
***************************************************************************/
20
#ifndef KBIBTEXWEBQUERYCITESEERX_H
21
#define KBIBTEXWEBQUERYCITESEERX_H
24
#include <qstringlist.h>
28
#include <fileimporterbibtex.h>
38
@author Thomas Fischer <fischer@unix-ag.uni-kl.de>
41
class WebQueryCiteSeerXWidget : public WebQueryWidget
46
WebQueryCiteSeerXWidget( QWidget *parent, const char *name = 0 );
50
/// Convinience typedef for member pointer to parsing function
52
class WebQueryCiteSeerX;
53
typedef void ( WebQueryCiteSeerX::* DataParser )( const QString & );
55
/// Query the citeseer database.
56
/** This class is used for querying the citeseer data base. CiteSeerX is still beta, so this class has to be adapted as soon as the CiteSeer people change their web interface. After entering the search term, citeseer returns a page with 10 links (one for each paper), and one link for the next 10 hits. This class uses a queue to schedule each reading job, and two parsing functions, one for the summary page and one for each paper result. BibTeX fields abstract, title, author, year, journal, and pages are found.
57
@author Jacob Kanev <j_kanev@arcor.de> */
59
class WebQueryCiteSeerX : public WebQuery
72
WebQueryCiteSeerX( QWidget* parent );
75
virtual ~WebQueryCiteSeerX();
77
/// Main function: start query.
83
/// Return disclaimer.
86
/// Return disclaimer URL.
87
QString disclaimerURL();
89
/// Return GUI element.
90
WebQueryWidget *widget();
93
/// Callback for cancelling.
98
/// Callback when the job is finished.
99
/**Reads the data from the job, and hands it over to the currently set parser. */
100
void getData( KIO::Job *job );
104
/// Parses the main page and schedules single-paper reading jobs.
105
/** Function parses the summary page, and schedules one job for each paper link, and one job for the "Next 10" summary page. */
106
void parseSummaryPage( const QString &data );
108
/// Parses single-paper pages.
109
/** Function reads the "Abstract:" and the "@entrytype{" strings found in the html page. */
110
void parsePaperPage( const QString &data );
112
/// Execute next waiting job.
113
/** Takes the next query out of the queue, sets the appropriate parser, and schedules getData with the URL. */
116
/// Find single bibtex field in html page and add to entry.
117
/** Function uses the first collected text from the description (a regular expression), and adds it as type "type" to the "entry". */
118
void parseForSingleExpression( QString description, const QString &data, BibTeX::Entry *entry, BibTeX::EntryField::FieldType type );
120
/// The currently active parser.
121
DataParser m_currentParser;
123
/// The internet address of CiteSeerX.
124
QString m_citeSeerXServer;
126
/// Number of hits desired by user.
129
/// Number of hits read from summary pages.
132
/// List with waiting queries
133
std::deque<DataRequest> m_queryQueue;
135
/// Pointer to QT-dialog.
136
WebQueryCiteSeerXWidget *m_widget;