4
from pysqlite2 import dbapi2 as sqlite3
9
import threading, Queue, time, datetime
11
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
12
from urlparse import urlparse
13
from os import curdir, sep, makedirs
16
global_app_data_dir = "./"
18
# --------------------------------
21
def log_error(messagetext):
22
"""Log an error message."""
23
print "Error: ", messagetext
26
def log_message(messagetext):
27
"""Log a general message."""
30
def verbose_print(verbose, messagetext):
31
"""Output a general message if verbose is turned on."""
35
# --------------------------------
39
def get_app_data_dir():
40
if sys.platform.startswith("darwin"):
41
path = os.path.expanduser("~/Library/Application Support/MySQL/DocLibrary/")
42
elif sys.platform.startswith("win"):
43
path = os.path.expandvars("${APPDATA}/MySQL/DocLibrary/")
45
path = os.path.expanduser("~/.mysql/DocLibrary/")
47
if not os.path.exists(path):
52
def get_lib_db_path():
53
path = os.path.join(get_app_data_dir(), "mysqldoclib.sqlite")
54
if os.path.exists(path):
56
return os.path.join(global_app_data_dir, "mysqldoclib.sqlite")
59
def get_webui_db_path():
60
path = os.path.join(get_app_data_dir(), "mysqldoclib_webui.sqlite")
61
if os.path.exists(path):
63
return os.path.join(global_app_data_dir, "mysqldoclib_webui.sqlite")
65
def get_user_db_path():
66
path = os.path.join(get_app_data_dir(), "mysqldoclib_usr.sqlite")
67
if not os.path.exists(path):
69
# Connect to the database
70
db_conn = sqlite3.connect(path)
77
CREATE TABLE IF NOT EXISTS page_rating (
78
id_page_rating INTEGER PRIMARY KEY AUTOINCREMENT,
79
id_manual_type INTEGER NOT NULL,
83
c.execute("CREATE INDEX IF NOT EXISTS idx_page_rating ON page_rating(title)")
86
CREATE TABLE IF NOT EXISTS page_view (
87
id_page_view INTEGER PRIMARY KEY AUTOINCREMENT,
91
c.execute("CREATE INDEX IF NOT EXISTS idx_page_view ON page_view(path)")
94
CREATE TABLE IF NOT EXISTS lib_search (
95
id_lib_search INTEGER PRIMARY KEY AUTOINCREMENT,
96
search_text TEXT NOT NULL,
99
c.execute("CREATE INDEX IF NOT EXISTS idx_lib_search ON lib_search(search_text)")
102
CREATE TABLE IF NOT EXISTS lib_status (
103
id_lib_file INTEGER PRIMARY KEY AUTOINCREMENT,
108
c.execute("INSERT INTO lib_status(id_lib, downloaded, is_selected) VALUES (1, 1, 1)")
114
log_error("Error while creating the usr database. %r" % e)
119
log_error("An error occurred while creating the usr database at %s. %r" % (path, e))
123
def get_module_installation_dir():
126
def read_file_content(filename, encoding="utf-8"):
127
"""Read the contents of a text file"""
129
# Open file in read mode using the correct encoding
130
f = codecs.open(filename, "r", encoding)
132
# Return the file contents
135
log_error("An error occurred reading from the file %s." % filename)
139
log_error("An error occurred opening the file %s." % filename)
141
def save_file_content(filename, content, encoding="utf-8"):
142
"""Read the contents of a text file"""
144
# Open file in read mode using the correct encoding
145
f = codecs.open(filename, "w", encoding)
147
# Return the file contents
150
log_error("An error occurred reading from the file %s." % filename)
154
log_error("An error occurred opening the file %s." % filename)
156
# --------------------------------
157
# Library creation functions
159
def execute_sql_script(db_conn, filename, encoding = "UTF-8"):
160
"""Executes a SQL script file"""
163
sql_commands = read_file_content(filename, encoding)
171
# Run the SQL Script to create the database
172
c.executescript(sql_commands)
179
log_error("An error occurred while executing the SQL script. %r" % e);
184
log_error("An error occurred aquiring a database cursor. %r" % e);
187
# --------------------------------
188
# Manual page caching functions
190
def html_remove_tags(data):
191
"""Removes all HTML tags from a given string"""
192
# other expression: '<[^>]*?>'
193
p = re.compile(r'<[^<]*?/?>')
194
return p.sub(' ', data)
196
def html_remove_extra_spaces(data):
197
"""Removes all extra spaces from a given string"""
198
p = re.compile(r'\s+')
199
return p.sub(' ', data)
201
def html_get_page_title(data):
202
"""Returns the chapter and title of a manual html page string"""
203
p = re.compile(r'\<title\>(?P<Chapter>(Chapter\s)?[A-Z]?[\d\.]+)\s*(?P<Title>.*)\<\/title\>')
204
match = p.search(data)
207
return match.group('Chapter'), match.group('Title')
209
p = re.compile(r'\<title\>(?P<Title>.*)\<\/title\>')
210
match = p.search(data)
212
return "", match.group('Title')
216
def html_apply_page_modifications(data):
217
"""Make required changes to the html"""
218
p = re.compile(r'\<\/title\>')
219
data = p.sub('</title><link rel="stylesheet" type="text/css" href="/webui/stylesheets/docs.css" />', data)
221
p = re.compile(r'\starget=\"_top\"')
222
data = p.sub('', data)
225
class ManualPageData():
226
"""Data class that holds information about a manual page"""
233
class ScanManualPageThread(threading.Thread):
235
manual_page_data_queue = 0
240
# get a filename from the queue, do not block
241
filename = self.filename_queue.get(False)
242
#full_filename = os.path.join(self.path, filename)
245
# Open HTML file as a utf-8 file
246
#html_string = read_file_content(full_filename)
250
html_file = self.lib_zip_file.open(filename)
251
html_string = unicode(html_file.read(), "utf-8")
255
# Make file modifications
256
#save_file_content(full_filename, html_apply_page_modifications(html_string))
258
# Add new page data object to the queue
259
manual_page_data = ManualPageData()
260
manual_page_data.filename = filename
261
manual_page_data.chapter, manual_page_data.title = html_get_page_title(html_string)
262
manual_page_data.content = html_remove_extra_spaces(html_remove_tags(html_string))
263
manual_page_data.html_content = html_apply_page_modifications(html_string)
265
self.manual_page_data_queue.put(manual_page_data)
267
log_error("An error processing the page. %r" % e)
273
def process_manual_page_data_queue(db_conn, lib_zip_file, path, id_manual, file_nr, file_count, manual_page_data_queue):
275
# Get database cursor
279
# Check if there are manual_page_data objects in the queue and if so, process them
282
# Fetch manual_page_data object from the queue if available, do not block
283
manual_page_data = manual_page_data_queue.get(False)
287
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
288
["/" + path + "/" + os.path.basename(manual_page_data.filename), "text/html", manual_page_data.html_content, 0])
290
# Insert manual page and content
291
c.execute("INSERT INTO page(id_manual, id_web_object, title, chapter) VALUES (?, ?, ?, ?)",
292
[id_manual, c.lastrowid, manual_page_data.title, manual_page_data.chapter])
293
c.execute("INSERT INTO page_content(id_page, title, content) VALUES (?, ?, ?)",
294
[c.lastrowid, manual_page_data.title, manual_page_data.content])
297
if (files_processed % 100 == 0):
298
log_message("%d file(s) of %d processed..." % (file_nr + files_processed, file_count));
301
log_error("An error occurred while inserting the page values for %s. %r" % (manual_page_data.filename, e))
305
return files_processed
307
log_error("An error occurred aquiring a database cursor. %r" % e)
310
def cache_pages(db_conn, manual_ids):
312
# Get database cursor
315
# Get all available manuals versions
316
c.execute("""-- Select all manuals
317
SELECT m.id_manual, m.directory, m.description
319
ORDER BY m.id_manual""")
323
log_error("An error occurred while executing the SQL commands. %r" % e)
327
# Loop over all manuals and cache the contents of the file directory
328
for id_manual, directory, description in rows:
330
# if the number of manuals has been limited
332
# only include the given manual
333
if not str(id_manual) in manual_ids:
334
log_message("Skipping manual %s." % description)
337
zip_file = directory + ".zip"
339
# Locate the zip file, first in the user app dir
340
# zip_file_path = os.path.join(os.path.join(get_app_data_dir(), 'repository'), zip_file)
341
# if not os.path.exists(zip_file_path):
342
# then in the ./repository dir
343
zip_file_path = os.path.join(os.path.join('.', 'repository'), zip_file)
344
if not os.path.exists(zip_file_path):
345
log_error("The zip file %s cannot be found." % zip_file_path)
348
log_message("Processing %s ..." % zip_file_path)
350
lib_zip_file = zipfile.ZipFile(zip_file_path, 'r')
352
#path = os.path.join('./', directory)
353
#files = [file for file in os.listdir(path) if file.lower().endswith(".html")]
354
files = [file for file in lib_zip_file.namelist() if file.lower().endswith(".html")]
355
file_count = len(files)
358
log_message("Caching manual %s, processing %d file(s) ..." % (description, file_count))
360
# Generate synchronization objects
361
filename_queue = Queue.Queue()
362
manual_page_data_queue = Queue.Queue()
364
# Fill filename queue
365
for f in files: #[:1]:
366
filename_queue.put(f)
368
time_start = datetime.datetime.now()
373
thread = ScanManualPageThread()
374
thread.filename_queue = filename_queue
375
thread.manual_page_data_queue = manual_page_data_queue
377
thread.lib_zip_file = lib_zip_file
382
# Wait for threads to complete
384
# Process all objects in queue
385
file_nr += process_manual_page_data_queue(db_conn, lib_zip_file, directory, id_manual, file_nr, file_count, manual_page_data_queue)
387
# Check if there are still threads that are alive
388
for index, the_thread in enumerate(Pool):
389
if the_thread.isAlive():
395
# Process all objects still left in queue after the threads have all been closed
396
file_nr += process_manual_page_data_queue(db_conn, lib_zip_file, directory, id_manual, file_nr, file_count, manual_page_data_queue)
398
# Get database cursor
401
# Update manual to be installed
402
generation_date = datetime.datetime.now().strftime("%Y-%m-%d")
403
c.execute("UPDATE manual SET installed=1, generation_date=? WHERE id_manual=?", (generation_date, id_manual))
406
log_error("An error occurred while updating the manual entry. %r" % e)
412
time_duration = datetime.datetime.now() - time_start
414
log_message("%d file(s) of %d processed. Duration %d.%d seconds." % (file_nr, file_count,
415
time_duration.seconds, time_duration.microseconds))
418
# Add the images as web_objects
419
files = [file for file in lib_zip_file.namelist() if file.lower().endswith(".png")]
421
log_message("Processing %d image file(s) ..." % len(files))
423
for filename in files:
425
# Get database cursor
429
image_file = lib_zip_file.open(filename)
430
image_file_string = image_file.read()
431
if not image_file_string:
435
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
436
["/" + directory + "/images/" + os.path.basename(filename), "image/png", sqlite3.Binary(image_file_string), 0])
439
log_error("An error occurred while inserting the image file %s. %r" % (filename, e))
441
log_error("An error occurred aquiring a database cursor. %r" % e)
445
log_error("An error occurred while executing the SQL commands. %r" % e)
450
log_error("An error occurred aquiring a database cursor. %r" % e)
453
def rebuild_lib(verbose, doclib_db_name, manual_ids):
455
# Check which database name to use
456
if not doclib_db_name or doclib_db_name == "":
457
doclib_db_name = get_lib_db_path()
459
# Connect to the database
460
db_conn = sqlite3.connect(doclib_db_name)
463
log_message("Creating the documentation library structure...")
465
if execute_sql_script(db_conn, './mysqldoclib.sql'):
466
log_message("Documentation library created successfully.")
468
cache_pages(db_conn, manual_ids)
470
log_message( "Documentation library cached has been filled.")
472
log_message("The documentation library structure has not been created.")
477
log_error("An error occurred while opening the database connection. %r" % e)
480
def rebuild_webui(verbose, webui_db_name):
482
# Check which database name to use
483
if not webui_db_name or webui_db_name == "":
484
webui_db_name = get_lib_db_path()
486
# Connect to the database
487
db_conn = sqlite3.connect(webui_db_name)
489
log_message("Creating the documentation library webui structure...")
491
execute_sql_script(db_conn, './mysqldoclib_webui.sql')
495
for path, dirs, files in os.walk("webui"):
497
if name.endswith(".html") or name.endswith(".wbp") or name.endswith(".css"):
498
# Open HTML file as a utf-8 file
499
html_string = read_file_content(os.path.join(path, name))
503
log_message("Path: %s, File: %s" % (path, name) )
505
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
506
["/" + path + "/" + os.path.basename(name), "text/html", html_string, 1])
509
if name.endswith(".png"):
510
content_type = "image/png"
511
elif name.endswith(".gif"):
512
content_type = "image/gif"
513
elif name.endswith(".ico"):
514
content_type = "image/vnd.microsoft.icon"
516
img_file = open(os.path.join(path, name), "rb")
518
log_message("Path: %s, File: %s" % (path, name) )
520
c.execute("INSERT OR REPLACE INTO web_object(path, content_type, content, allow_embedded_code_execution) VALUES(?, ?, ?, ?)",
521
["/" + path + "/" + os.path.basename(name), content_type, sqlite3.Binary(img_file.read()), 0])
529
log_error("An error occurred while opening the database connection. %r" % e)
531
# --------------------------------
532
# HTTP server functions
535
# Connect to the database
536
db_conn = sqlite3.connect(get_lib_db_path())
537
# Attach webui and usr database
541
c.execute("ATTACH DATABASE ? AS webui", (get_webui_db_path(),))
542
c.execute("ATTACH DATABASE ? AS usr", (get_user_db_path(),))
546
log_error("Could not attach webui or usr database. %r" % e)
550
def build_search_result_page(search_string, manual_type):
551
# Escape search string
552
html_escape_table = {
560
search_string_html = "".join(html_escape_table.get(c,c) for c in search_string)
561
search_string = search_string.replace(";", "\\;").replace("'", "\\'")
565
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
566
<link rel="stylesheet" type="text/css" href="stylesheets/ui.css" />
567
<title>MySQL Workbench Documentation Library Search Result</title>
569
<body class="search">'''
572
# Connect to the database
573
db_conn = open_lib_db()
575
# Get database cursor
579
c.execute("SELECT hits FROM usr.lib_search WHERE search_text = ?", (search_string,))
582
c.execute("UPDATE usr.lib_search SET hits = hits + 1 WHERE search_text = ?", (search_string,))
584
c.execute("INSERT INTO usr.lib_search (search_text, hits) VALUES(?, 1)", (search_string,))
590
SELECT p.chapter, p.title, wpo.path, pr.rating, m.directory,
591
offsets(page_content) as fs_offsets, snippet(page_content) as snippet
593
JOIN page p ON p.id_page = pc.id_page
594
JOIN manual m ON m.id_manual = p.id_manual
595
JOIN web_object wpo ON p.id_web_object = wpo.id_web_object
596
LEFT OUTER JOIN page_rating pr ON p.title = pr.title
597
LEFT OUTER JOIN usr.page_rating pru ON p.title = pru.title
598
WHERE page_content MATCH '""" + search_string + "'"
599
if int(manual_type) > 0:
600
sql_select += " AND m.id_manual = " + str(manual_type) + " "
603
ORDER BY pru.rating DESC, substr(fs_offsets, 1, 1), pr.rating DESC
608
c.execute(sql_select)
612
html += "<h2>MySQL Document Manual Search</h2><hr/><p class='search_header'>"
613
html += "Search Result for <b>`%s`</b> returned " % search_string_html
616
html += "more than <b>50</b> matches.<br>Only the first 50 matches are displayed."
618
html += "<b>%d</b> matches." % len(rows)
622
for chapter, title, path, rating, directory, offsets, snippet in rows:
623
html += "<p class='search'><a href='" + path + "'>"
624
html += chapter + " <b>" + title + "</b></a></p>"
625
html += "<p class='search_snippet'>" + snippet + "</p><br>"
628
log_error("An error occurred while executing the SQL command. %r" % e)
629
html += "<br>An error occurred while executing the SQL command.<br>%r" % str(e)
636
log_error("An error occurred while opening the database connection. %r" % e)
643
class DocsLibHandler(BaseHTTPRequestHandler):
650
self.send_response(301)
651
self.send_header("Location", "/webui/index.wbp")
655
url_parsed = urlparse(url_full)
656
url = url_parsed.path
660
if url.endswith(".html"):
661
content_type = "text/html"
662
elif url.endswith(".css"):
663
content_type = "text/css"
664
elif url.endswith(".png"):
665
content_type = "image/png"
666
elif url.endswith(".gif"):
667
content_type = "image/gif"
668
elif url.endswith(".ico"):
669
content_type = "image/vnd.microsoft.icon"
670
elif url.endswith(".wbp"):
671
content_type = "text/html"
673
if len(content_type) > 0:
674
if url.endswith("search.wbp"):
675
log_message("Query: %s" % url_parsed.query)
677
d = dict([(k,v) for k,junk,v in [line.partition("=") for line in url_parsed.query.split("&")]])
679
search_string = d["search"].replace("+", " ").replace("%22", "\"")
680
manual_type = d["manual_type"].strip()
682
log_message("Search started, search_string: %s, manual_type: %d" % (search_string, int(manual_type)))
684
self.send_response(200)
685
self.send_header("Content-type", content_type)
687
self.wfile.write(build_search_result_page(search_string, int(manual_type)).encode("utf-8"))
690
# check for web object in database
691
# Get database cursor
693
c = self.server.db_conn.cursor()
697
if url.startswith("/webui"):
702
SELECT content_type, content, allow_embedded_code_execution
703
FROM """ + database + """web_object
704
WHERE path = ?""", [url])
709
for wo_content_type, wo_content, wo_allow_embedded_code_execution in rows:
710
self.send_response(200)
711
self.send_header("Content-type", wo_content_type)
713
if wo_content_type.startswith("text/"):
714
self.wfile.write(wo_content.encode("utf-8"))
716
self.wfile.write(wo_content)
719
c.execute("SELECT hits FROM usr.page_view WHERE path = ?", (url,))
722
c.execute("UPDATE usr.page_view SET hits = hits + 1 WHERE path = ?", (url,))
724
c.execute("INSERT INTO usr.page_view (path, hits) VALUES(?, 1)", (url,))
726
self.server.db_conn.commit()
729
f = open(curdir + sep + url)
730
self.send_response(200)
731
self.send_header("Content-type", content_type)
733
self.wfile.write(f.read())
736
self.send_error(404, "File Not Found: %s" % url)
737
log_error("File not found. %r" % e)
740
self.send_error(404, "An error occurred. %r" % e)
741
log_error("An error occurred while executing the SQL command. %r" % e)
746
log_error("An error occurred while opening the database connection. %r" % e)
751
self.send_error(404, "File Not Found: %s" % url)
755
ctype, pdict = cgi.parse_header(self.headers.getheader("content-type"))
756
if ctype == "text/plain":
757
log_message("Header Items: %s" % self.headers.items())
759
# Get submitted values
761
if self.headers.has_key('content-length'):
762
length = int( self.headers['content-length'] )
763
values = self.rfile.read(length)
765
# AppleWebKit uses & as separators between values
766
if self.headers.has_key('user-agent'):
767
if "AppleWebKit" in self.headers['user-agent']:
768
values = values.replace("&", "\n")
770
d = dict([(k,v) for k,junk,v in [line.partition("=") for line in values.split("\n")]])
772
search_string = d["search"].strip()
773
manual_type = d["manual_type"].strip()
775
log_message("Search started, search_string: %s, manual_type: %d" % (search_string, int(manual_type)))
777
self.send_response(301)
781
self.wfile.write(build_search_result_page(search_string, int(manual_type)).encode("utf-8"))
784
self.send_error(404, "Wrong content-type" % ctype)
786
self.wfile.write("<HTML>Wrong content-type<BR><BR>")
789
verbose_print(self.server.verbose, "An Exception was raised while processing the POST handler. %r" % e)
791
def log_message(self, message, *args):
792
verbose_print(self.server.verbose, message % args)
794
def log_request(self, code='-', size='-'):
795
verbose_print(self.server.verbose, '"%s" %s %s' % (self.requestline, str(code), str(size)))
797
def log_error(self, message, *args):
798
verbose_print(self.server.verbose, message % args)
800
def serve_docs(port = 8080, verbose = 1, datadir= "./", ready_event=None, bind=''):
801
global global_app_data_dir
802
global_app_data_dir = datadir
805
# Connect to the database
806
db_conn = open_lib_db()
808
server = HTTPServer((bind, port), DocsLibHandler)
809
server.verbose = verbose
810
server.db_conn = db_conn
812
verbose_print(verbose, "Started HTTP server on port %d." % port)
815
server.serve_forever()
819
log_error("An error occurred while opening the database connection. %r" % e)
821
except KeyboardInterrupt:
822
verbose_print(verbose, "Keyboard interrupt received, shutting down HTTP server.")
823
server.socket.close()
826
# --------------------------------
830
print """MySQL Document Library Standalone Application - mysqldoclib.py
831
Usage: wbdocs.py -h -pPort -v [build-lib | rebuild-lib | build-webui | rebuild-webui | serve-docs]
833
This applications serves and maintains a documentation library for MySQL products.
835
build-lib | rebuild-lib
836
These commands create the documentation library repository.
838
build-webui | rebuild-webui
839
These commands rebuild the webui repository that is used to server the web pages.
842
This argument launches a web server to server the documentation library"""
846
opts, args = getopt.getopt(sys.argv[1:], "hp:d:v", ["help", "port=", "db="])
847
except getopt.GetoptError, e:
848
print "Invalid option passed to module. ", str(e)
860
elif o in ("-h", "--help"):
863
elif o in ("-p", "--port"):
867
assert False, "The specified port must be a number."
868
elif o in ("-d", "--db"):
871
assert False, "Unhandled option."
874
if args[0] in ("build-lib", "rebuild-lib"):
875
rebuild_lib(verbose, db_name, args[1:])
876
if args[0] in ("build-webui", "rebuild-webui"):
877
rebuild_webui(verbose, db_name)
878
elif args[0] == "serve-docs":
879
serve_docs(port, verbose)
883
if __name__ == "__main__":
b'\\ No newline at end of file'