Rem Rem $Header: rdbms/admin/dbmsxdbt.sql /main/11 2009/10/08 17:21:09 attran Exp $ Rem Rem dbmsxdbt.sql Rem Rem Copyright (c) 2002, 2009, Oracle and/or its affiliates. Rem All rights reserved. Rem Rem NAME Rem dbmsxdbt.sql - XDB conText index setup Rem Rem DESCRIPTION Rem Useful routines and scripts for conText indexes on XDB Rem Rem NOTES Rem Must be connected as SYS to run this script Rem Rem MODIFIED (MM/DD/YY) Rem attran 09/09/09 - 8915200 - IFILTER -> PolicyFilter Rem badeoti 03/19/09 - dbms_xdbz.get_username moved to dbms_xdbz0 Rem llsun 06/19/08 - bug 7137308: xdb.dbms_xdbt Rem petam 02/04/05 - fix for bug 4148632 Rem najain 06/05/03 - dont use XDB_RESINFO Rem najain 08/01/03 - 3071986: index more fields Rem smuralid 01/08/03 - dbms_xdbt: ctx security rewrite Rem sichandr 10/03/02 - getCharsetId: handle Oracle names too Rem rshaikh 08/08/02 - bug2456600: change IndexMemory to be Rem equal to MAX_INDEX_MEMORY Rem smuralid 01/28/02 - globalization support Rem smuralid 01/24/02 - Merged smuralid_repos_search Rem smuralid 01/23/02 - Created Rem Rem Rem Make sure that XDB can see these packages Rem GRANT EXECUTE ON ctxsys.ctx_ddl TO xdb; GRANT EXECUTE ON ctxsys.ctx_output TO xdb; GRANT ctxapp TO xdb; Rem Rem Package to simplify context index creation on XDB Rem CREATE OR REPLACE PACKAGE xdb.dbms_xdbt AUTHID CURRENT_USER IS ------------ -- OVERVIEW -- -- This package provides utilities for creating and managing conText -- indexes on the XDB repository. -- -- The preferred mode of operation is as follows -- (a) drop any existing preferences. dbms_xdbt.dropPreferences -- (b) re-create preferences for the index -- (dbms_xdbt.createPreferences) -- (c) Create the index -- (dbms_xdbt.createIndex) -- Verify that things have gone smoothly using -- "select * from ctx_user_index_errors" -- (d) Setup automatic sync'ing of the index -- (dbms_xdbt.configureAutoSync) -- (e) Sit back and relax -- -- The package spec contains a a list of package variables that -- describe the configuration settings. These are intended to -- cover some of the more basic customizations that installations -- might require, but is not intended to be a complete set. -- -- There are 2 ways to customize this package. -- (a) Use a PL/SQL procedure to set the appropriate package variables -- that control the relevant configurations, and then execute -- the package. Obviously, this only applies to the set of existing -- package variables -- (b) The more general approach is to modify (in place, or as a copy) -- this package to introduce the appropriate customizations -- -- For instance, if you need to change the amount of memory available -- for indexing, you could use option (a). -- -- NOTES: -- If you're using this package as is, please note the following -- (a) Make sure that the LOG_DIRECTORY parameter is set using -- ctx_adm.set_parameter -- Alternately, turn off rowid logging by setting the -- 'LogFile' package variable to the empty string. -- (b) Make sure that the MAX_INDEX_MEMORY parameter is at least -- 128M. Other change the package variable 'IndexMemory' -- appropriately -- ------------ TYPE varcharset IS TABLE OF VARCHAR2(100); ------------------------------------------------------------- -- CONSTANTS ------------------------------------------------------------- ---------------------- -- FILTERING OPTIONS -- -- The following constants describe the kinds of filtering we may want -- to do. -- USE_NULL_FILTER simply sends the document over to the charset converter -- USE_INSO_FILTER uses the IFILTER api of INSO to convert the document -- into HTML -- SKIP_DATA is used to completely ignore the document's contents for -- filtering. (The document metadata is indexed, however) ---------------------- USE_NULL_FILTER CONSTANT PLS_INTEGER := 1; USE_INSO_FILTER CONSTANT PLS_INTEGER := 2; SKIP_DATA CONSTANT PLS_INTEGER := 3; ---------------------- -- Sync options -- -- There are basically two mechanisms of automatic sync provided here. -- SYNC_BY_PENDING_COUNT indicates that when the number of entries in the -- pending queue reaches a threshold, it is time to sync up the index -- SYNC_BY_TIME indicates that the index should be synced up at regular -- intervals -- SYNC_BY_PENDING_COUNT_AND_TIME is a combination of both these strategies -- ---------------------- SYNC_BY_PENDING_COUNT CONSTANT PLS_INTEGER := 1; SYNC_BY_TIME CONSTANT PLS_INTEGER := 2; SYNC_BY_PENDING_COUNT_AND_TIME CONSTANT PLS_INTEGER := 3; SyncTimeOut NUMBER := NULL; ----------------------------------------------------------------------- -- CONFIGURATION SETTINGS -- -- This section contains the default settings for the index. This -- section should be changed as appropriate. -- ----------------------------------------------------------------------- -- The name of the context index IndexName CONSTANT VARCHAR2(32) := 'XDB$CI'; -- The default memory to be used for index creation and sync -- NOTE: This must be less than (or equal to) the MAX_INDEX_MEMORY -- parameter IndexMemory CONSTANT VARCHAR2(32) := '50M'; ------------------------------------------------------------- -- SYNC OPTIONS ------------------------------------------------------------- -- The following section describes the automatic sync policy. -- By default, the auto-sync policy (once it has been configured) -- is to sync based on the pending count. -- Should we sync up based on pending count, or time or both ? AutoSyncPolicy PLS_INTEGER := SYNC_BY_PENDING_COUNT; -- This parameter determines the maximum size of the pending queue -- before the index is sync-ed. Applies only when the sync policy -- is SYNC_BY_PENDING_COUNT or SYNC_BY_TIME_AND_PENDING_COUNT -- MaxPendingCount PLS_INTEGER := 2; -- This parameter determines the interval - in minutes - at -- which the "regular" sync should be performed on the index. -- Applies only to the SYNC_BY_TIME and the SYNC_BY_PENDING_COUNT_AND_TIME -- policies SyncInterval PLS_INTEGER := 60; -- -- This parameter determines how frequently - in minutes - the pending -- queue is polled. Applies only to the SYNC_BY_PENDING_COUNT -- (and the SYNC_BY_PENDING_COUNT_AND_TIME) policies -- CheckPendingCountInterval PLS_INTEGER := 10; ---------------------------------------------------- -- LOGGING OPTIONS -- -- Please ensure that the LOG_DIRECTORY parameter is -- already set if you need rowid logging ---------------------------------------------------- -- -- Logging options. This parameter determines the logfile used - for -- rowid logging during index creation etc. -- Set this parameter to NULL to avoid rowid-logging -- LogFile VARCHAR2(32) := ''; ------------------------------------- -- FILTER OPTIONS -- -- The following classes determine the filtering options based on the -- mime type of the document -- -- The skipFilter_Types list contains a list of regular expressions -- that describe mime types for which the document is *not* to be -- filtered/indexed. (The document metadata, however, is still indexed) -- Use this for document types that cannot really be indexed. Good -- examples of this class are images, audio files etc -- -- The NullFilter_Types list contains a list of regular expressions -- that describe mime types of documents for which no INSO filtering -- is required - these documents should be basically text formats -- and the only filtering required should be character set conversion -- (if needed) -- -- For any given document, the skipFilter_Types list is first scanned -- to determine if any regular expression in that list matches the -- document's content type. If it does, then the document content is -- not indexed. -- Failing this, the NullFilter_Types list is then scanned. If any -- regular expression in this list matches the document's content type, -- the document is sent through character-set conversion -- failing this, the document is filtered using the INSO filter (using the -- IFILTER interfaces) ------------------------------------- SkipFilter_Types varcharset := varcharset('image/%', 'audio/%', 'video/%', 'model/%'); NullFilter_Types varcharset := varcharset('%text/plain', '%text/html', '%text/xml'); ------------------------------------- -- STOPWORD Settings -- -- This list describes the set of stopwords over and above that -- specified by the CTXSYS.DEFAULT_STOPLIST stoplist -- ------------------------------------- StopWords varcharset := varcharset('0','1','2','3','4','5','6', '7','8','9', 'a','b','c','d','e','f','g','h','i', 'j','k','l','m','n','o','p','q','r', 's','t','u','v','w','x','y','z', 'A','B','C','D','E','F','G','H','I', 'J','K','L','M','N','O','P','Q','R', 'S','T','U','V','W','X','Y','Z' ); -------------------------------------- -- LEXER preferences -- -- This parameter determines if multi-language lexers can be -- used. -- Not supported currently, and this parameter should always be FALSE -------------------------------------- UseMultiLexer BOOLEAN := false; -------------------------------------- -- SECTION GROUP -- -- This parameter determines the sectioner to use. -- By default, this is an HTML section group. No zone sections have been -- created - (ie) WITHIN searches are not possible -- If the vast majority of documents are XML or XML-like, consider using -- the AUTO_SECTION_GROUP or the PATH_SECTION_GROUP or even a -- NULL_SECTION_GROUP -------------------------------------- SectionGroup VARCHAR2(100) := 'HTML_SECTION_GROUP'; -------------------------------------- -- PUBLIC INTERFACES -- -- The public APIs exposed by this package -------------------------------------- -- -- This procedure drops all preferences required by the context index -- PROCEDURE dropPreferences; -- -- This procedure creates all preferences required by the context index -- on the XDB repository. -- The set of preferences include Datastore, Storage, Filter, Lexer, -- SectionGroup, Stoplist and Wordlist preferences -- NOTE: This will raise exceptions if any of the preferences already -- exist -- PROCEDURE createPreferences; -- -- Creates the datastore preference -- Will raise an exception if the datastore already exists PROCEDURE createDatastorePref; -- -- Creates the storage preferences -- Will raise an exception if the preference already exists -- PROCEDURE createStoragePref; -- -- Creates the section group -- Will raise an exception if the preference already exists -- PROCEDURE createSectiongroupPref; -- -- Creates the filter preference -- Will raise an exception if the preference already exists -- PROCEDURE createFilterPref; -- -- Creates the lexer preference -- Will raise an exception if the preference already exists -- PROCEDURE createLexerPref; -- -- Creates the stoplist -- Will raise an exception if the preference already exists -- PROCEDURE createStoplistPref; -- -- Creates the wordlist -- Will raise an exception if the preference already exists -- PROCEDURE createWordlistPref; -- -- Creates the index -- This requires the above preferences to have already been created. -- (a) The LOG_DIRECTORY parameter must be set (to enable -- rowid logging during index creation) -- (b) Ensure that the memory size specified to index creation is less than -- the MAX_INDEX_MEMORY parameter -- PROCEDURE createIndex; -- -- Syncs up the index -- This can be used to explicitly sync up the index. -- The preferred mechanism is to set up automatic sync'ing with -- the "configureAutoSync" procedure -- PROCEDURE syncIndex(myIndexName VARCHAR2 := Indexname, myIndexMemory VARCHAR2 := IndexMemory); -- -- Set a suggested time limit on the SYNC operation, in minutes. -- SYNC_INDEX will process as many documents in the queue as possible -- within the time limit. -- The maxtime value of NULL is equivalent to CTX_DDL.MAXTIME_UNLIMITED. -- PROCEDURE setSyncTimeout(timeout IN INTEGER := NULL); -- -- Optimizes the index -- PROCEDURE optimizeIndex; -- -- Configures for automatic sync of the index -- NOTE: The system must be configured for job queues. Also, the -- number of job queue processes must be non-zero -- PROCEDURE configureAutoSync; -- -- Procedure used by dbms_job to automatically sync up the context -- index -- Don't use this directly -- PROCEDURE autoSyncJobByCount(myIndexName VARCHAR2, myMaxPendingCount NUMBER, myIndexMemory VARCHAR2); -- -- Procedure used by dbms_job to automatically sync up the context -- index -- Don't use this directly -- PROCEDURE autoSyncJobByTime(myIndexName VARCHAR2, myIndexMemory VARCHAR2); -- -- The user-datastore procedure -- Do *not* call this directly -- PROCEDURE xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB); end dbms_xdbt; / show errors; CREATE OR REPLACE PACKAGE BODY xdb.dbms_xdbt AS ----------------------------------------------------------------------- -- CONFIGURATION SETTINGS -- -- This section contains the default settings for the index. This -- section should be changed as appropriate. -- ----------------------------------------------------------------------- -- -- The following are the default values for the policy_filter -- used to filter out HTML, from BLOB into CLOB. -- 'Policy-based' procedures do not make use of the Ctx index. -- PolicyFilterName VARCHAR2(32) := IndexName || '_POLICYFILTER'; BlobLanguage VARCHAR2(32) := NULL; BlobFormat VARCHAR2(32) := 'BINARY'; BlobCharset VARCHAR2(32) := NULL; -- -- The following are the default names for the various preferences -- DatastorePref VARCHAR2(32) := IndexName || '_DATASTORE'; AutoFilterPref VARCHAR2(32) := IndexName || '_AUTO_FILTER'; FilterPref VARCHAR2(32) := IndexName || '_FILTER'; SectionGroupPref VARCHAR2(32) := IndexName || '_SECTIONGROUP'; MultiLexerPref VARCHAR2(32) := IndexName || '_LEXER'; DefaultLexerPref VARCHAR2(32) := IndexName || '_DEFAULT_LEXER'; WordlistPref VARCHAR2(32) := IndexName || '_WORDLIST'; StoplistPref VARCHAR2(32) := IndexName || '_STOPLIST'; StoragePref VARCHAR2(32) := IndexName || '_STORAGE'; LanguageColumn VARCHAR2(32) := '"XMLDATA"."LANGUAGE"'; ----------------------------------------------------------------------- -- PRIVATE ROUTINES ----------------------------------------------------------------------- -- -- This function determines the filtering required by a document -- based on its mime type. -- FUNCTION getFilterOption(mimeType IN VARCHAR2) RETURN PLS_INTEGER; -- -- This function determines the internal character setid given an -- external character set name -- FUNCTION getCharsetId(extCSName IN VARCHAR2) RETURN NUMBER; -- This function determines if a sync is needed. FUNCTION checkSync(myIndexName VARCHAR2, myMaxPendingCount NUMBER) RETURN BOOLEAN; -- -- This function creates the index -- PROCEDURE createIndex IS sqlstr VARCHAR2(4000); BEGIN -- Turn rowid logging on if requested IF LogFile IS NOT NULL THEN -- Turn rowid logging on during a create index BEGIN ctxsys.ctx_output.end_log; EXCEPTION WHEN OTHERS THEN NULL; END; ctxsys.ctx_output.start_log(LogFile); ctxsys.ctx_output.add_event(ctxsys.ctx_output.EVENT_INDEX_PRINT_ROWID); END IF; -- Actually create the index sqlstr := 'create index ' || IndexName || ' on xdb.xdb$resource p(value(p))' || ' indextype is ctxsys.context' || ' parameters (''datastore ' || DatastorePref || ' storage ' || StoragePref || ' filter ' || FilterPref || ' section group ' || SectionGroupPref ; if UseMultiLexer = true THEN sqlstr := sqlstr || ' language column ' || LanguageColumn || ' lexer ' || MultiLexerPref; ELSE sqlstr := sqlstr || ' lexer ' || DefaultLexerPref; END IF; sqlstr := sqlstr || ' wordlist ' || WordlistPref || ' stoplist ' || StoplistPref || ' memory ' || IndexMemory || ''')'; execute immediate sqlstr; END createIndex; -- -- This function drops all preferences used by the context index on XDB -- PROCEDURE dropPreferences IS BEGIN -- datastore preference BEGIN ctx_ddl.drop_policy(policy_name => PolicyFilterName); EXCEPTION WHEN OTHERS THEN NULL; END; -- policy preference BEGIN ctx_ddl.drop_preference(DatastorePref); EXCEPTION WHEN OTHERS THEN NULL; END; -- filter preference BEGIN ctx_ddl.drop_preference(FilterPref); EXCEPTION WHEN OTHERS THEN NULL; END; BEGIN ctx_ddl.drop_preference(AutoFilterPref); EXCEPTION WHEN OTHERS THEN NULL; END; -- section group BEGIN ctx_ddl.drop_section_group(SectionGroupPref); EXCEPTION WHEN OTHERS THEN NULL; END; -- lexer preferences BEGIN ctx_ddl.drop_preference(MultiLexerPref); EXCEPTION WHEN OTHERS THEN NULL; END; BEGIN ctx_ddl.drop_preference(DefaultLexerPref); EXCEPTION WHEN OTHERS THEN NULL; END; -- wordlist preferences BEGIN ctx_ddl.drop_preference(WordlistPref); EXCEPTION WHEN OTHERS THEN NULL; END; -- stoplist preferences BEGIN ctx_ddl.drop_stoplist(StoplistPref); EXCEPTION WHEN OTHERS THEN NULL; END; -- storage preference BEGIN ctx_ddl.drop_preference(StoragePref); EXCEPTION WHEN OTHERS THEN NULL; END; END dropPreferences; -- -- This function creates all preferences required by the cntext index on XDB -- PROCEDURE createPreferences IS LexerPref VARCHAR2(32); BEGIN createDatastorePref; createFilterPref; createLexerPref; createStoplistPref; createWordlistPref; createStoragePref; createSectiongroupPref; IF UseMultiLexer = true THEN LexerPref := MultiLexerPref; ELSE LexerPref := DefaultLexerPref; END IF; ctx_ddl.create_policy(policy_name =>PolicyFilterName, filter =>AutoFilterPref); END createPreferences; -- -- This function creates the datastore preference. A USER datastore -- is used. -- PROCEDURE createDatastorePref IS BEGIN ctx_ddl.create_preference(DatastorePref, 'USER_DATASTORE'); ctx_ddl.set_attribute(DatastorePref, 'PROCEDURE', 'XDB_DATASTORE_PROC'); ctx_ddl.set_attribute(DatastorePref, 'OUTPUT_TYPE', 'CLOB'); END createDatastorePref; -- -- Creates the filter preference. -- A NULL filter is used - the user datastore procedure calls the IFILTER -- APIs to INSO when appropriate -- PROCEDURE createFilterPref IS BEGIN ctx_ddl.create_preference(FilterPref, 'NULL_FILTER'); ctx_ddl.create_preference(AutoFilterPref, 'AUTO_FILTER'); END createFilterPref; -- -- Creates the section group. -- PROCEDURE createSectiongroupPref IS BEGIN ctx_ddl.create_section_group(SectionGroupPref, SectionGroup); END createSectiongroupPref; -- -- Creates the lexer preferences -- Creates a default lexer with base letter transcriptions, and -- adds this as a sub-lexer of a multi-lexer -- Currently, the multi-lexer is not used. -- PROCEDURE createLexerPref IS BEGIN ctx_ddl.create_preference(DefaultLexerPref, 'BASIC_LEXER'); -- Turn on base letter translations ctx_ddl.set_attribute(DefaultLexerPref, 'base_letter', 'YES'); ctx_ddl.create_preference(MultiLexerPref, 'MULTI_LEXER'); ctx_ddl.add_sub_lexer(MultiLexerPref, 'DEFAULT', DefaultLexerPref); END createLexerPref; -- -- Creates the word-list preferences -- PROCEDURE createWordlistPref IS BEGIN ctx_ddl.create_preference(WordlistPref, 'BASIC_WORDLIST'); -- default values for stemming and fuzzy match ctx_ddl.set_attribute(WordlistPref, 'fuzzy_match', 'AUTO'); ctx_ddl.set_attribute(WordlistPref, 'stemmer', 'AUTO'); END createWordlistPref; -- -- Creates the stoplist. -- Imports the stopwords from CTXSYS.DEFAULT_STOPLIST -- And adds the stopwords specified in StopWords -- By default, NUMBERS are treated as a stopclass -- PROCEDURE createStoplistPref IS i PLS_INTEGER; wordset varcharset := varcharset(); BEGIN -- -- You can also create a MULTI_STOPLIST instead of a basic stoplist -- The problem then is to specify the appropriate language for the -- stopwords that are part of the default stoplist, and the stopwords -- that the user provides -- ctx_ddl.create_stoplist(StoplistPref, 'MULTI_STOPLIST'); -- IF UseMultiLexer = true THEN ctx_ddl.create_stoplist(StoplistPref, 'MULTI_STOPLIST'); ELSE ctx_ddl.create_stoplist(StoplistPref, 'BASIC_STOPLIST'); END IF; -- don't index any numbers ctx_ddl.add_stopclass(StoplistPref, 'NUMBERS'); -- extend the local stopwords array wordset.extend(4096); SELECT spw_word BULK COLLECT INTO wordset FROM ctx_stopwords WHERE spw_stoplist = 'DEFAULT_STOPLIST' and spw_owner = 'CTXSYS'; -- add all stopwords for the default stoplist to our stoplist FOR i in 1..wordset.COUNT LOOP ctx_ddl.add_stopword(StoplistPref, wordset(i)); END LOOP; -- Then add our own specific stop words FOR i in 1..StopWords.COUNT LOOP BEGIN ctx_ddl.add_stopword(StoplistPref, StopWords(i)); EXCEPTION WHEN OTHERS THEN NULL; END; END LOOP; END createStoplistPref; -- -- Storage preferences -- Determine if the $I table and the $X index require a special tablespace -- or any other properties -- Note that prefix and substr indexing are turned off -- PROCEDURE createStoragePref IS -- The tablespace of the tables/indexes that constitute the context index. IndexTableSpace VARCHAR2(32); ts_clause varchar2(100); BEGIN -- Get the tablespace of xdb$resource, that can be considered as the -- default tablespace. Only one row can be returned from the query. IndexTableSpace := xdb.dbms_xdb.getxdb_tablespace(); ts_clause := 'tablespace ' || IndexTableSpace; ctx_ddl.create_preference(StoragePref, 'BASIC_STORAGE'); ctx_ddl.set_attribute(StoragePref, 'I_TABLE_CLAUSE', ts_clause); ctx_ddl.set_attribute(StoragePref, 'K_TABLE_CLAUSE', ts_clause); ctx_ddl.set_attribute(StoragePref, 'R_TABLE_CLAUSE', ts_clause || ' lob(data) store as (cache)'); ctx_ddl.set_attribute(StoragePref, 'N_TABLE_CLAUSE', ts_clause); ctx_ddl.set_attribute(StoragePref, 'I_INDEX_CLAUSE', ts_clause || ' compress 2'); ctx_ddl.set_attribute(StoragePref, 'P_TABLE_CLAUSE', ts_clause); END createStoragePref; -- -- Determines the filtering to use for a document -- FUNCTION getFilterOption(mimeType IN VARCHAR2) RETURN PLS_INTEGER IS filterOption PLS_INTEGER := USE_INSO_FILTER; i PLS_INTEGER; BEGIN -- We have 3 choices. -- (a) Skip the data completely - this is true for resources that -- contain images, audios etc -- (b) No filtering required - this can be used when the data -- is a text type -- (c) Use INSO filter - for everything else FOR i IN 1..SkipFilter_Types.COUNT LOOP IF mimeType LIKE SkipFilter_Types(i) THEN filterOption := SKIP_DATA; EXIT; END IF; END LOOP; FOR i in 1..NullFilter_Types.COUNT LOOP IF mimeType LIKE NullFilter_Types(i) THEN filterOption := USE_NULL_FILTER; EXIT; END IF; END LOOP; RETURN filterOption; END getFilterOption; -- -- Determines the internal charset-id for an external charset name -- Currently hardcoded to 2 (represents UTF-8) -- FUNCTION getCharsetId(extCSName IN VARCHAR2) RETURN NUMBER IS intCSName varchar2(255); BEGIN intCSName := utl_gdk.charset_map(extCSName, utl_gdk.IANA_TO_ORACLE); IF intCSName is null THEN RETURN NLS_CHARSET_ID(extCSName); ELSE RETURN NLS_CHARSET_ID(intCSName); END IF; END getCharsetId; PROCEDURE annotate_doclob(author IN varchar2, dispname IN varchar2, -- resource's display name rescomment IN varchar2, -- resource creation comment owner IN varchar2, -- owner user name creator IN varchar2, -- creator user name lastmod IN varchar2, -- last modifier user name resextra IN clob, -- resource extra properties xmlref_lob IN clob, -- xmldata outlob IN OUT NOCOPY clob) IS begin -- Write the metadata fields into output lob if author is not null then dbms_lob.writeappend(outlob, length(author), author); dbms_lob.writeappend(outlob, 1, ' '); end if; if dispname is not null then dbms_lob.writeappend(outlob, length(dispname), dispname); dbms_lob.writeappend(outlob, 1, ' '); end if; if rescomment is not null then dbms_lob.writeappend(outlob, length(rescomment), rescomment); dbms_lob.writeappend(outlob, 1, ' '); end if; if owner is not null then dbms_lob.writeappend(outlob, length(owner), owner); dbms_lob.writeappend(outlob, 1, ' '); end if; if creator is not null then dbms_lob.writeappend(outlob, length(creator), creator); dbms_lob.writeappend(outlob, 1, ' '); end if; if lastmod is not null then dbms_lob.writeappend(outlob, length(lastmod), lastmod); dbms_lob.writeappend(outlob, 1, ' '); end if; -- check if resextra holds data if resextra is not null and dbms_lob.getlength(resextra) > 0 then dbms_lob.append(outlob, resextra); dbms_lob.writeappend(outlob, 1, ' '); end if; -- check if xmlref/xmllob holds data if xmlref_lob is not null and dbms_lob.getlength(xmlref_lob) > 0 then dbms_lob.append(outlob, xmlref_lob); end if; END annotate_doclob; -- -- The datastore procedure -- This reads the data from the underlying row -- Some of the metadata of the document are also indexed - notably, -- the author, the creation comment of the resource, the display name etc -- PROCEDURE xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB) IS author varchar2(128); -- the author dispname varchar2(128); -- resource's display name rescomment varchar2(128); -- resource creation comment contype varchar2(80); -- mimetype of the resource xmllob blob; -- blob containing inline resource xmlref_lob clob; -- xmldata charset varchar2(128); -- character set of the resource resextra clob; -- extra properties about the resource owneridr raw(16); -- owner id in its raw form creatoridr raw(16); -- creator id in its raw form lastmodidr raw(16); -- last modifier id in its raw form owner varchar2(32); -- owner user name creator varchar2(32); -- creator user name lastmod varchar2(32); -- last modifier user name begin if (dbms_lob.getlength(outlob) <> 0) then raise_application_error(-20010, 'non-zero lob'); end if; -- Get the columns from the resource row SELECT e.xmldata.author, e.xmldata.dispname, e.xmldata.rescomment, e.xmldata.contype, CASE WHEN e.xmldata.xmlref IS NOT NULL THEN DEREF(e.xmldata.xmlref).getclobval() ELSE NULL END, e.xmldata.xmllob, e.xmldata.charset,e.xmldata.ownerid, e.xmldata.creatorid, e.xmldata.lastmodifierid, e.xmldata.resextra INTO author, dispname, rescomment, contype, xmlref_lob, xmllob, charset, owneridr, creatoridr, lastmodidr, resextra FROM xdb.xdb$resource e WHERE ROWID = rid; -- get the user names for the owner, creator and the last modifier xdb.dbms_xdbz0.get_username(owneridr, owner); xdb.dbms_xdbz0.get_username(creatoridr, creator); xdb.dbms_xdbz0.get_username(lastmodidr, lastmod); annotate_doclob(author, dispname, rescomment, owner, creator, lastmod, resextra, xmlref_lob, outlob); -- If resource data is stored in the blob, filter it IF xmllob IS NOT NULL AND dbms_lob.getlength(xmllob) > 0 THEN DECLARE filterOption PLS_INTEGER; -- what kind of filter should I use BEGIN filterOption := getFilterOption(conType); IF (filterOption = USE_NULL_FILTER) THEN DECLARE amount number := dbms_lob.getlength(xmllob); dest_offset number := dbms_lob.getlength(outlob) + 1; src_offset number := 1; blob_csid number; lang_context integer := 0; warning integer := 0; BEGIN blob_csid := getCharsetId(charset); dbms_lob.convertToClob(outlob, xmllob, amount, dest_offset, src_offset, blob_csid, lang_context, warning); END; ELSIF (filterOption = USE_INSO_FILTER) THEN -- BUG 8915200: IFILTER deprecated since 9i. -- ctx_doc.ifilter(xmllob, outlob); -- This solution was suggested by mfaisal. ctx_doc.policy_filter(policy_name=>PolicyFilterName, document =>xmllob, restab =>outlob, plaintext =>FALSE, -- yes we want HTML / XML language =>BlobLanguage, format =>BlobFormat, charset =>BlobCharset); annotate_doclob(author, dispname, rescomment, owner, creator, lastmod, resextra, xmlref_lob, outlob); END IF; END; END IF; END xdb_datastore_proc; -- -- Sync's up the index -- PROCEDURE syncIndex(myIndexName VARCHAR2, myIndexMemory VARCHAR2) IS BEGIN ctx_ddl.sync_index(myIndexName, myIndexMemory, maxtime=>SyncTimeOut); END syncIndex; -- -- Set time limit for Sync Index PROCEDURE setSyncTimeout(timeout IN INTEGER := NULL) IS BEGIN -- Don't care about weird / negative values ! SyncTimeOut := timeout; END setSyncTimeOut; -- -- Optimizes the index -- PROCEDURE optimizeIndex IS BEGIN null; -- ctx_ddl.optimize_index(IndexName, lvl, OptimizeTime, NULL, NULL); END optimizeIndex; -- -- This function determines if a sync is needed. -- This is really applicable ONLY when the sync policy is _BY_PENDING_COUNT -- This function looks at the pending queue, and determines if the -- queue is larger than the predefined threshold. -- NOTE: This function is only to be used by the autoSyncJob procedure -- FUNCTION checkSync(myIndexName VARCHAR2, myMaxPendingCount NUMBER) RETURN BOOLEAN IS doSync BOOLEAN := false; pendingCount NUMBER; BEGIN -- Check to see if there are many documents waiting to be indexed SELECT COUNT(*) INTO pendingCount FROM ctx_user_pending WHERE pnd_index_name = myIndexName; IF pendingCount > myMaxPendingCount THEN doSync := true; END IF; -- Is the index in a reasonable state to be sync'ed RETURN doSync; END checkSync; -- -- procedure used by dbms_jobs to automatically sync the index -- PROCEDURE autoSyncJobByCount(myIndexName VARCHAR2, myMaxPendingCount NUMBER, myIndexMemory VARCHAR2) IS BEGIN IF checkSync(myIndexname, myMaxPendingCount) = true THEN syncIndex(myIndexName, myIndexMemory); END IF; END autoSyncJobByCount; -- -- procedure used by dbms_jobs to automatically sync the index -- PROCEDURE autoSyncJobByTime(myIndexName VARCHAR2, myIndexmemory VARCHAR2) IS BEGIN syncIndex(myIndexName, myIndexMemory); END autoSyncJobByTime; -- -- Configures auto-sync -- The system must be configured for job queues -- PROCEDURE configureAutoSync IS job number; what varchar2(255); quote varchar2(1) := ''''; BEGIN IF (AutoSyncPolicy = SYNC_BY_TIME or AutoSyncPolicy = SYNC_BY_PENDING_COUNT_AND_TIME) THEN what := 'xdb.dbms_xdbt.autoSyncJobByTime(' || quote || IndexName || quote || ',' || quote || IndexMemory || quote || ');' ; dbms_job.submit(job, what, interval=>'SYSDATE+'|| SyncInterval/1440); END IF; IF (AutoSyncPolicy = SYNC_BY_PENDING_COUNT or AutoSyncPolicy = SYNC_BY_PENDING_COUNT_AND_TIME) THEN what := 'xdb.dbms_xdbt.autoSyncJobByCount(' || quote || IndexName || quote || ',' || MaxPendingCount || ',' || quote || IndexMemory || quote || ');'; dbms_job.submit(job, what, interval=>'SYSDATE+'|| CheckPendingCountInterval/1440); END IF; END configureAutoSync; END dbms_xdbt; / show errors; Rem Rem Create the "real" user-datastore procedure Rem Note: This is around only for backward compatibility. In 9iR2, the Rem user-datastore procedure needed to be owned by CTXSYS. In 10i, it needs Rem to be owned by the index owner. Changing the user-datastore-proc name Rem may require a rebuild of the index - which we don't really want, and Rem hence we create a procedure of the same name in XDB's schema Rem CREATE OR REPLACE PROCEDURE xdb.xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB) AUTHID CURRENT_USER IS BEGIN xdb.dbms_xdbt.xdb_datastore_proc(rid, outlob); END; / show errors; Rem Remember to set these Rem exec ctxsys.ctx_adm.set_parameter('MAX_INDEX_MEMORY', '128M'); Rem exec ctxsys.ctx_adm.set_parameter('LOG_DIRECTORY', '/tmp');