~widelands-dev/widelands-website/trunk

« back to all changes in this revision

Viewing changes to pybb/util.py

Committer: franku
Date: 2018-11-18 11:04:34 UTC
mfrom: (504.3.24 update_BeautifulSoup)
Revision ID: somal@arcor.de-20181118110434-ocr288tywuu7je57

updated beautifulsoup to get version compatible with python3

files modified:
mainpage/templatetags/wl_markdown.py

mainpage/views.py

pip_requirements.txt

pybb/util.py

settings.py

Show diffs side-by-side

added added

removed removed

pybb/util.py

import random

import traceback

import json

import re

from BeautifulSoup import BeautifulSoup

from bs4 import BeautifulSoup, NavigableString

from datetime import datetime

from django.shortcuts import render

from django.http import HttpResponse

from django.utils.translation import check_for_language

from django.utils.encoding import force_unicode

from django import forms

from django.template.defaultfilters import urlize as django_urlize

from django.core.paginator import Paginator, EmptyPage, InvalidPage

from django.conf import settings

from pybb import settings as pybb_settings

145

return form

146

147

148

PLAIN_LINK_RE = re.compile(r'(http[s]?:\/\/[-a-zA-Z0-9@:%._\+~#=/?]+)')

149

def exclude_code_tag(bs4_string):

150

if bs4_string.parent.name == 'code':

151

return False

152

m = PLAIN_LINK_RE.search(bs4_string)

153

if m:

154

return True

155

return False

156

157

148

158

def urlize(data):

149

159

"""Urlize plain text links in the HTML contents.

150

160

152

162

153

163

"""

154

164

155

soup = BeautifulSoup(data)

156

for chunk in soup.findAll(text=True):

157

islink = False

158

ptr = chunk.parent

159

while ptr.parent:

160

if ptr.name == 'a' or ptr.name == 'code':

161

islink = True

162

break

163

ptr = ptr.parent

164

if not islink:

165

# Using unescape to prevent conversation of f.e. > to &gt;

166

chunk = chunk.replaceWith(django_urlize(unicode(unescape(chunk))))

165

soup = BeautifulSoup(data, 'lxml')

166

for found_string in soup.find_all(string=exclude_code_tag):

167

new_content = []

168

strings_or_tags = found_string.parent.contents

169

for string_or_tag in strings_or_tags:

170

try:

171

for string in PLAIN_LINK_RE.split(string_or_tag):

172

if string.startswith('http'):

173

# Apply an a-Tag

174

tag = soup.new_tag('a')

175

tag['href'] = string

176

tag.string = string

177

tag['nofollow'] = 'true'

178

new_content.append(tag)

179

else:

180

# This is just a string, apply a bs4-string

181

new_content.append(NavigableString(string))

182

except:

183

# Regex failed, so apply what ever it is

184

new_content.append(string_or_tag)

185

186

# Apply the new content

187

found_string.parent.contents = new_content

167

188

168

189

return unicode(soup)

169

190

Older »