aboutsummaryrefslogtreecommitdiff
blob: c3fda72e58c6527f0bd19e69f9788ff6bcad1c6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# /etc/gorg/gorg.conf: Configuration file for Gorg

# Root dir, typically, your DocumentRoot 
# (f)cgi scripts find it in their environment but
# the stand-alone webserver and the search engine need it
root = "/home/neysx/gentoo.org/gentoo/xml/htdocs"

# Make webrick listen on given IP (IP onlyu, no host name)
listen = 127.0.0.1

# Mount paths that are not under the root directory (used by stand-alone web server only)
# eg. to mount /cgi-bin which is usually not under the document root
# Note: Those directories will be handled by the stock FileHandler, ie. not by gorg
mount = /cgi-bin on /home/neysx/gentoo.org/gentoo/xml/cgi-bin
mount = /images on /home/neysx/gentoo.org/gentoo/xml/images

# Should gorg accept cookies and pass $param=$value to the xsl transform
# Default is no (anything but 1 is no)
acceptCookies = 1

# Only read so many lines in xml files to identify stylesheets, use 0 to scan whole file 
headXSL = 12

# Default stylesheet, relative to root dir
defaultXSL = "/xsl/guide.xsl"

# Only used by fastCGI, auto exit after given number of requests (0 means no)
# The fcgi process manager will restart a new instance automatically
# NB: it will NOT exit before at least 1 full minute has elapsed even if you set a very low value
# If you want a really short-lived version, use the cgi instead
# mod_fcgid does its own process recycling and this feature will be obsoleted in an later version
autoKill = 5000

# Allow return of unprocessed xml file if passthru==(anything but 0) appears in URI params
# 0==No, anything else==Yes
passthru = 1

# Pass pathname of requested file in named parameter to the xsl transform
# [a-zA-Z]+ , anything else is ignored and no param is passed
# Default is "link"
linkParam = link

# Pass a param named httphost to the style sheet (== host value from HTTP header)
# 0 or nothing (default) disables this feature
# * will pass the value as received from the user agent or none (http/1.0)
# name alias1 alias2... will pass name when the value sent by the user agent
#                       matches exactly any of name alias1 alias2...
#                       if any alias is *, any value (even nil) will match and name will be passed
# When no value matches, the value received from the user agent is passed
#httphost = mysite www.mysite.org mysite.org alias.mysite.org

# Cache directory. Directory must exist and be writable by whoever runs the server (e.g. apache)
# It must also be writable by the user who runs the stand-alone web server, i.e. not the apache user
# if you want to use both web servers. You can even run both at the same time.
# Default is no cache
cacheDir = "/var/cache/gorg"

# Number of seconds after which a document is considered too old, 0=never
# Can be used to force a refresh or to stress-test the system
#cacheTTL = 86400 # 1 day
cacheTTL = 864000 # or 10 days
#cacheTTL = 600 # or 10 minutes....

# Use a tree of directories under cacheDir that matches the site tree
# Use when your system has problems coping with a huge single cache dir
# 0 means no tree (all files in cacheDir) and is the default
# If you use this, make sure you clean up the cache with gorg -C regularly
cacheTree = 1

# Max size of cache in megabytes
# Please note that cacheSize is used ONLY when cleaning up either
#    when cacheTree==0 and a clean-up is started based on cacheWash (see below)
# or when cacheTree!=0 and `gorg -C` is run
cacheSize = 250

# Max number of files in a given cache directory
# Please note that this limit is also enforced when cacheTree == 0
# in which case it means the max total number of files in the whole cache
maxFiles = 2000

# Support gzip http encoding (ie. mod_deflate)
# 0 means no compression *and* no support for gzip encoding.
# 1-9 gives compression level, 1 least compressed, 9 max compressed
# Cached pages use the same compression level
# Default is 2
zipLevel = 2

# Clean cache automatically and regularly when a store into the cache occurs.
# gorg cleans up if random(value) < 10, i.e.
# Set to 0 to disable and rely on gorg --clean-cache being run regularly
# a value<=10 means at every call,
# 100 means 10 percent of stores will also clean the cache
# 1000 means 10 permille (cacheSize will be checked only once every 100 stores)
# Note: gorg only tries to clean the dir it caches to, not the whole cache tree
# Use `gorg -C` or `gorg --clean-cache` to clean up whole cache
cacheWash = 0

# Level of logging that goes to syslog
# OFF, FATAL, ERROR, WARN, INFO, DEBUG = 0, 1, 2, 3, 4, 5
logLevel = 4

#
# Used only by stand-alone webserver
#

# Send hit stats to syslog/stderr/a file
accessLog = "syslog"

# Listen on port (must be >1023 to be run by non-root)
port = 8008

#
# Search engine parameters
#

# Connect string, only mysql is supported at the moment
dbConnect  = DBI:mysql:DB_NAME:HOST_NAME
dbUser     = USENAME
dbPassword = PASSWORD

# Document language can be guessed from the document itself with
# an XPath expression. It should return the language code.
# Only the first 5 characters will be used.
# For instance, to use a root element's lang attribute:
xpath_to_lang = /*[1]/@lang

# If no XPath is given or no lang is found, you can use the file path as a fallback:
# define a regexp to apply to the file path, $1 must yield the language
# For instance, the following one applied to '/doc/en/file.xml' returns 'en'
fpath_to_lang = ^/[^/]+/([^/]+)/.*xml$

# include/exclude directives will be processed in the order they appear below.
# First match will be used to either include or exclude the file.
# If no match is found, file is skipped
# Each directive should define one and only one regexp
# Beware, regexp are not shell globs, .xml means any character followed by xml anywhere in the file name
# .+\.xml$  means one or more characters followed by a dot and ending with xml
# Any file that can't be processed, ie. because it is not well-formed will not be indexed

exclude = ^/proj/en/gdp/tests/
exclude = /CVS/
exclude = ^/xsl/
exclude = /draft/
exclude = ^/doc/.+/handbook/2004
exclude = metadoc\.xml$
exclude = /inserts-.+\.xml$
exclude = ^/dyn/
exclude = herds/pkgList.xml
include = ^/.+\.xml$