1
package freeguide.build.preparedata;
3
import freeguide.common.lib.general.StringHelper;
4
import freeguide.common.lib.grabber.HtmlHelper;
5
import freeguide.common.lib.grabber.HttpBrowser;
7
import org.w3c.dom.Document;
8
import org.w3c.dom.Element;
10
import org.xml.sax.Attributes;
11
import org.xml.sax.SAXException;
15
import java.text.MessageFormat;
17
import java.util.Arrays;
19
import java.util.TimeZone;
20
import java.util.TreeMap;
21
import java.util.regex.Matcher;
22
import java.util.regex.Pattern;
24
import javax.xml.parsers.DocumentBuilderFactory;
25
import javax.xml.transform.Transformer;
26
import javax.xml.transform.TransformerFactory;
27
import javax.xml.transform.dom.DOMSource;
28
import javax.xml.transform.stream.StreamResult;
31
* Prepare information about all hallmark sites.
33
* @author Alex Buloichik (mailto: alex73 at zaval.org)
35
public class PrepareHallmarkInfo
37
protected static final Pattern RE_CNTRY_URL =
38
Pattern.compile( "http://([a-z]{2}).hallmarkchannel.com" );
39
protected static final Pattern RE_LANG =
40
Pattern.compile( "LANG=([A-Z0-9_]+)" );
41
protected static final String STR_CNTRY_USA =
42
"http://www.hallmarkchannel.com";
43
protected static final String URL_CHOOSE_COUNTRY =
44
"http://www.hallmarkchannel.com/chooseCountry.jsp";
45
protected static final String SRC_INFO_FILE_PATH =
46
"src/resources/plugins/grabber/hallmark/info.xml";
47
protected static final String UTF8_CHARSET = "UTF-8";
52
* @param args DOCUMENT_ME!
54
* @throws Exception DOCUMENT_ME!
56
public static void main( final String[] args ) throws Exception
58
String[] timezones = TimeZone.getAvailableIDs( );
59
Arrays.sort( timezones );
61
for( String tz : timezones )
63
System.out.println( tz );
66
final HttpBrowser browser = new HttpBrowser( );
67
browser.loadURL( URL_CHOOSE_COUNTRY );
69
HandlerCountries countries = new HandlerCountries( );
70
browser.parse( countries );
71
System.out.println( );
74
DocumentBuilderFactory.newInstance( ).newDocumentBuilder( )
76
final Element docHallmark = doc.createElement( "hallmark" );
77
doc.appendChild( docHallmark );
81
for( final Map.Entry<String, String> entry : countries.countries
84
String country = entry.getKey( );
85
String url = entry.getValue( );
89
"Country {0} ({1}/{2}) - {3}", country, i,
90
countries.countries.size( ), url ) );
92
final String id = getCntry( url );
93
System.out.println( "url = " + url + " cntry = " + id );
97
System.out.println( "Error read url: " + url );
102
final Element docCountry = doc.createElement( "country" );
103
docCountry.setAttribute( "id", id );
104
docCountry.setAttribute( "country", country );
105
docCountry.setAttribute( "url", url );
107
for( final Map.Entry<String, String> lang : getLanguages( url, id )
110
final Element docLanguage = doc.createElement( "language" );
111
docLanguage.setAttribute( "name", lang.getKey( ) );
112
docLanguage.setAttribute( "id", lang.getValue( ) );
113
docCountry.appendChild( docLanguage );
116
docHallmark.appendChild( docCountry );
120
final Transformer xformer =
121
TransformerFactory.newInstance( ).newTransformer( );
122
xformer.setOutputProperty( "indent", "yes" );
125
new DOMSource( doc ),
126
new StreamResult( new File( SRC_INFO_FILE_PATH ) ) );
130
protected static String getCntry( final String url )
133
Matcher m = RE_CNTRY_URL.matcher( url );
137
return m.group( 1 ).toUpperCase( );
141
if( STR_CNTRY_USA.equals( url ) )
152
protected static Map<String, String> getLanguages(
153
final String url, final String cntry ) throws Exception
155
final HttpBrowser browser = new HttpBrowser( );
157
url + "/framework.jsp?BODY=weekSchedCal.jsp&CNTRY=" + cntry );
159
HandlerLanguages h = new HandlerLanguages( );
162
Map<String, String> langs = h.getLanguages( );
163
Map<String, String> result = new TreeMap<String, String>( );
165
for( final Map.Entry<String, String> entry : langs.entrySet( ) )
167
Matcher m = RE_LANG.matcher( entry.getValue( ) );
171
result.put( entry.getKey( ), m.group( 1 ) );
175
System.err.println( "Invalid language: " + entry.getValue( ) );
179
/*HallmarkParserSchedule parserTimeZone =
180
new HallmarkParserSchedule( null, null, cntry.equals( "US" ) );
181
browser.parse( parserTimeZone );*/
185
protected static class HandlerCountries extends HtmlHelper.DefaultContentHandler
187
Map<String, String> countries = new TreeMap<String, String>( );
188
protected boolean process = false;
189
protected String currentOptionValue;
190
protected StringBuffer currentText = new StringBuffer( );
195
* @param uri DOCUMENT_ME!
196
* @param localName DOCUMENT_ME!
197
* @param qName DOCUMENT_ME!
198
* @param atts DOCUMENT_ME!
200
* @throws SAXException DOCUMENT_ME!
202
public void startElement(
203
String uri, String localName, String qName, Attributes atts )
207
"select".equals( qName )
208
&& "CNTRY".equals( atts.getValue( "name" ) ) )
212
else if( process && "option".equals( qName ) )
214
currentOptionValue = atts.getValue( "value" );
215
currentText.setLength( 0 );
218
( currentOptionValue == null )
219
|| StringHelper.EMPTY_STRING.equals(
221
|| !currentOptionValue.endsWith(
222
"hallmarkchannel.com" ) )
224
currentOptionValue = null;
232
* @param uri DOCUMENT_ME!
233
* @param localName DOCUMENT_ME!
234
* @param qName DOCUMENT_ME!
236
* @throws SAXException DOCUMENT_ME!
238
public void endElement( String uri, String localName, String qName )
241
if( "select".equals( qName ) )
246
process && "option".equals( qName )
247
&& ( currentOptionValue != null ) )
249
countries.put( currentText.toString( ), currentOptionValue );
250
currentOptionValue = null;
257
* @param ch DOCUMENT_ME!
258
* @param start DOCUMENT_ME!
259
* @param length DOCUMENT_ME!
261
* @throws SAXException DOCUMENT_ME!
263
public void characters( char[] ch, int start, int length )
266
if( currentOptionValue != null )
268
currentText.append( ch, start, length );
273
protected static class HandlerLanguages extends HtmlHelper.DefaultContentHandler
275
protected Map<String, String> languages =
276
new TreeMap<String, String>( );
277
protected boolean process = false;
278
protected String currentOptionValue;
279
protected StringBuffer currentText = new StringBuffer( );
284
* @param uri DOCUMENT_ME!
285
* @param localName DOCUMENT_ME!
286
* @param qName DOCUMENT_ME!
287
* @param atts DOCUMENT_ME!
289
* @throws SAXException DOCUMENT_ME!
291
public void startElement(
292
String uri, String localName, String qName, Attributes atts )
296
"select".equals( qName )
297
&& "LANG".equals( atts.getValue( "name" ) ) )
301
else if( process && "option".equals( qName ) )
303
currentOptionValue = atts.getValue( "value" );
304
currentText.setLength( 0 );
307
( currentOptionValue == null )
308
|| StringHelper.EMPTY_STRING.equals(
309
currentOptionValue ) )
311
currentOptionValue = null;
319
* @param uri DOCUMENT_ME!
320
* @param localName DOCUMENT_ME!
321
* @param qName DOCUMENT_ME!
323
* @throws SAXException DOCUMENT_ME!
325
public void endElement( String uri, String localName, String qName )
328
if( "select".equals( qName ) )
333
process && "option".equals( qName )
334
&& ( currentOptionValue != null ) )
337
currentText.toString( ).trim( ), currentOptionValue );
338
currentOptionValue = null;
345
* @param ch DOCUMENT_ME!
346
* @param start DOCUMENT_ME!
347
* @param length DOCUMENT_ME!
349
* @throws SAXException DOCUMENT_ME!
351
public void characters( char[] ch, int start, int length )
354
if( currentOptionValue != null )
356
currentText.append( ch, start, length );
363
* @return DOCUMENT_ME!
365
public Map<String, String> getLanguages( )