/*----------------------------------------------------------------------------*\ * Copyright (c) 2002 CubeWerx Inc. Licensed under the GNU LGPL. * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 2.1 of the License, * or any later version. This library is distributed in the hope that * it will be useful, but WITHOUT ANY WARRANTY, without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details, either * in the "LICENSE.LGPL.txt" file distributed with this software or at * web page "http://www.fsf.org/licenses/lgpl.html". * * MODULE: cw_xmltree.h * PURPOSE: Interface for accessing CW-XML tree structures * HISTORY: * DATE PROGRAMMER DESCRIPTION * 18-Sep-2002 Keith Pomakis Initial creation as "xmlschemas.h" * 02-May-2003 Craig Bruce Generalized * 20-Jun-2003 Craig Bruce Repackaged in "cwxml" module *---------------------------------------------------------------------------- * $Id: cw_xmltree.h,v 1.17 2004/02/06 20:41:25 csbruce Exp $ \*----------------------------------------------------------------------------*/ #include #include #include #include #ifndef _CW_XMLTREE_H #define _CW_XMLTREE_H #ifdef __cplusplus extern "C" { #endif /*============================================================================*\ * STRUCTURE AND TYPE DEFINITIONS \*============================================================================*/ /*----------------------------------------------------------------------------*\ * CW_XML_STR_TAB_ENT - string-table entry \*----------------------------------------------------------------------------*/ typedef struct CW_XML_STR_TAB_ENT_ST { long index; /* index of this entry in table */ CW_STR_OBJ *strObj; /* string object for string content */ long prefixLength; /* length of namespace prefix incl ':' / -1 */ } CW_XML_STR_TAB_ENT; /*----------------------------------------------------------------------------*\ * CW_XML_STR_TAB - string table for element/attribute names, etc. \*----------------------------------------------------------------------------*/ typedef struct { CW_XML_STR_TAB_ENT **entries; /* string-table entries */ long nEntries; /* number of string-table entries */ long nAllocEntries; /* number of entries allocated */ long nEntriesFlushed; /* number of entries written to bxml file */ HashTable *nameHashTable; /* hash table for primary name */ } CW_XML_STR_TAB; /*----------------------------------------------------------------------------*\ * CW_XML_VALTYPE - value types for general / binary xml \*----------------------------------------------------------------------------*/ typedef enum { CW_XML_VALTYPE_BOOL = 0xF0, /* boolean value */ CW_XML_VALTYPE_BYTE = 0xF1, /* 'byte' numeric value */ CW_XML_VALTYPE_INT16 = 0xF2, /* 'short' numeric value */ CW_XML_VALTYPE_UINT16 = 0xF3, /* 'ushort' numeric value */ CW_XML_VALTYPE_INT32 = 0xF4, /* 'int' numeric value */ CW_XML_VALTYPE_INT64 = 0xF6, /* 'long' 64-bit numeric value */ CW_XML_VALTYPE_FLOAT = 0xF8, /* 'float' numeric value */ CW_XML_VALTYPE_DOUBLE = 0xF9, /* 'double' numeric value */ CW_XML_VALTYPE_STRING = 0xFA, /* character-string value */ CW_XML_VALTYPE_ARRAY = 0xFB /* array of scalar values */ } CW_XML_VALTYPE; /*----------------------------------------------------------------------------*\ * CW_XML_BLOB_TEXT - general / binary textual representations for blob types \*----------------------------------------------------------------------------*/ typedef enum { CW_XML_BLOB_NONE = 0x00, /* no suitable text encoding */ CW_XML_BLOB_HEXCODED = 0x01, /* hexadecimal text encoding */ CW_XML_BLOB_BASE64 = 0x02, /* base64 text encoding */ CW_XML_BLOB_NUM_LIST = 0x03 /* number list */ } CW_XML_BLOB_TEXT; /*----------------------------------------------------------------------------*\ * CW_XML_NAMESPACE - namespace indicator \*----------------------------------------------------------------------------*/ typedef struct { int dummy; } CW_XML_NAMESPACE; /*----------------------------------------------------------------------------*\ * CW_XML_NODE_TYPE - enumeration of possible node types \*----------------------------------------------------------------------------*/ typedef enum { CW_XML_NODE_ELEMENT, /* , */ CW_XML_NODE_ELEMENT_FINISH, /* low-level end of opening tag; not closer! */ CW_XML_NODE_CLOSE, /* , closer for */ CW_XML_NODE_ATTRIBUTE, /* attribute node */ CW_XML_NODE_TEXT, /* text-content node */ CW_XML_NODE_CDATA, /* CDATA-section-content node */ CW_XML_NODE_WHITESPACE, /* whitespace-content node */ CW_XML_NODE_BLOB, /* blob-content node */ CW_XML_NODE_ENTITY_REF, /* entity-reference node */ CW_XML_NODE_CHAR_ENTITY_REF, /* char-entity-reference node */ CW_XML_NODE_COMMENT, /* comment node */ CW_XML_NODE_XML_DECL, /* xml-declaration node */ CW_XML_NODE_BANG, /* */ CW_XML_NODE_BANG_BRACKET, /* */ CW_XML_NODE_PROC_INSTR, /* */ CW_XML_NODE_ROOT, /* root node of tree */ CW_XML_NODE_EOF /* end-of-file indicator */ } CW_XML_NODE_TYPE; /*----------------------------------------------------------------------------*\ * CW_XML_NODE_NUMS - structure for numeric array/scalar \*----------------------------------------------------------------------------*/ typedef struct { CW_XML_VALTYPE elemType; /* type of numeric element values */ long elemSize; /* element size in bytes */ void *array; /* array of numeric values */ long length; /* numeric-array length in words */ long allocSize; /* allocated size of array in bytes */ } CW_XML_NODE_NUMS; /*----------------------------------------------------------------------------*\ * CW_XML_NODE_EXTRA - 'extra' (infrequent) information for XML node \*----------------------------------------------------------------------------*/ typedef struct { void *_private; /* optional, opaque user-app-specific data */ } CW_XML_NODE_EXTRA; /*----------------------------------------------------------------------------*\ * CW_XML_NODE - xml-node/token descriptor; DO NOT ACCESS FIELDS DIRECTLY!! * * Node content (NAME, TEXT, COUNT, NUMS): The flags field indicates which * content information is 'active' (since structures may be allocated * without being active for efficiency of reuse). The correspondence of * flags to fields is: NAME=name/extName/xmlNamespace, TEXT=flatText, * COUNT=count, NUMS=numArray. All nodes may have the TEXT value * set, which can be interpreted as the "textual content" of the node. * The following descriptions are for the normal node and subtree scanning * modes and some of the raw-tokens are slightly different as described * in CwXmlScan_ReadRawToken(). * * CW_XML_NODE_ELEMENT - always has NAME set. This also has the HAS_ATTRS * and IS_EMPTY flags set to indicate exactly what kind of element * it is. * * CW_XML_NODE_ELEMENT_FINISH - has no content. This node cannot be part * of a node or a subtree (only shows up when scanning raw tokens). * This node will always have the HAS_ATTRS and IS_EMPTY flags set * appropriately. * * CW_XML_NODE_CLOSE - always has NAME set and will have the IS_EMPTY * flag set if the close tag was synthesized after an empty element. * This node cannot be part of a subtree. * * CW_XML_NODE_ATTRIBUTE - always has NAME. * * CW_XML_NODE_TEXT - may have TEXT set and one of NAME, COUNT, or * NUMS values that represent the source data. Only up to one * non-TEXT value may be set, which will be the authoritative * value definition, unless none are set, in which case TEXT is * the authoritative definition. NAME represents a string-table * reference, COUNT a simple integer value, and NUMS a numeric or * boolean scalar or array. * * CW_XML_NODE_CDATA - has same content as CW_XML_NODE_TEXT. * * CW_XML_NODE_WHITESPACE - will have TEXT set to contain the whitespace * string and COUNT set to indicate the number of completely blank * lines represented. * * CW_XML_NODE_BLOB - uses the NUMS array to store an array of bytes * with COUNT set to a coded value for the appropriate textual * representation for the the blob: 0=none, 1=hexCoded, 2=base64, * 3=byteList. * * CW_XML_NODE_ENTITY_REF - uses NAME for the entity name. When * retrieving text content, the entity will be replaced with a * literal "&name;" (substituting for "name") if it cannot be * expanded properly. * * CW_XML_NODE_CHAR_ENTITY_REF - uses COUNT to give the character code. * When retrieving text content, the character code #xA4 will be * substituted if the given character is not representable. * * CW_XML_NODE_COMMENT - uses TEXT to store the comment body. The body * may contain whitespace for formatting. Also uses COUNT to store * a coded value of a positioning hint: 0=indented, 1=startOfLine, * 2=endOfLine. * * CW_XML_NODE_XML_DECL - uses TEXT to store the XML version and COUNT * to store the 'standalone' indicator: 1=yes, 0=no, -1=notSet. * The source-character-encoding indicator is stored in the * CW_XML_SCAN object, and the strings stored in nodes are in the * internal encoding format, which is presently "ISO-8859-1". * * CW_XML_NODE_BANG - uses NAME to store the directive name and TEXT to * store the directive body content. * * CW_XML_NODE_BANG_BRACKET - same storage as CW_XML_NODE_BANG. * * CW_XML_NODE_PROC_INSTR - same storage as CW_XML_NODE_BANG. * * CW_XML_NODE_ROOT - stores no content. * * CW_XML_NODE_EOF - stores no content. \*----------------------------------------------------------------------------*/ #define CW_XML_NODE_HAS_ATTRS 0x0001 /* element has attributes */ #define CW_XML_NODE_IS_EMPTY 0x0002 /* empty element content */ #define CW_XML_NODE_IS_SUBTREED 0x0004 /* subtree is included */ #define CW_XML_NODE_USES_NAME 0x0010 /* node uses 'name', etc. fields */ #define CW_XML_NODE_USES_TEXT 0x0020 /* node uses 'flatText' field */ #define CW_XML_NODE_USES_COUNT 0x0040 /* node uses 'count' field */ #define CW_XML_NODE_USES_NUMS 0x0080 /* node uses 'numArray' field */ typedef struct CW_XML_NODE_ST CW_XML_NODE; /* forward definition */ typedef struct CW_XML_DOC_ST CW_XML_DOC; /* forward definition */ struct CW_XML_NODE_ST { /** type & name **/ CW_XML_NODE_TYPE nodeType; /* type of node */ uint32 flags; /* compact bool flags for node */ CW_XML_STR_TAB_ENT *name; /* string-table entry for internal name/str */ CW_XML_STR_TAB_ENT *extName; /* string-table entry for external name/str */ CW_XML_NAMESPACE*xmlNamespace;/* namespace ref for node */ /** links **/ CW_XML_DOC *document; /* document-object reference */ CW_XML_NODE *parent; /* pointer to parent node */ CW_XML_NODE *next; /* pointer to next sibling node */ CW_XML_NODE *prev; /* pointer to previous sibling node */ CW_XML_NODE *firstChild; /* pointer to first child node */ CW_XML_NODE *lastChild; /* pointer to last child node */ /** content & info **/ CW_STR_OBJ *flatText; /* flat plain text() for node, or NULL */ long count; /* counter, reference index, coded value */ CW_XML_NODE_NUMS *numArray; /* numeric/array-content data, or NULL */ CW_XML_NODE_EXTRA *extraInfo; /* optional extra info for node, or NULL */ long lineNum; /* text line of start of node */ long lineByte; /* text line byte of start of node */ }; /*CW_XML_NODE*/ /*----------------------------------------------------------------------------*\ * CW_XML_DOC - document descriptor - DO NOT ACCESS FIELDS DIRECTLY! \*----------------------------------------------------------------------------*/ struct CW_XML_DOC_ST { char *name; /* document name */ CW_XML_STR_TAB *intStringTable; /* internal string table for document */ CW_XML_STR_TAB *extStringTable; /* external string table for document */ CW_XML_NODE *rootNode; /* root node of document */ } /*CW_XML_DOC*/; /*============================================================================*\ * DOCUMENT METHODS \*============================================================================*/ /*----------------------------------------------------------------------------*\ * NAME: * CwXmlDoc_Destroy() - destroy document object * DESCRIPTION: * Destroys an xml-document object. Recursively destroys the attached * node tree. * ARGUMENTS: * doc - document object, allowed to be NULL * RETURNS: * (nothing) * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP void CwXmlDoc_Destroy( CW_XML_DOC *doc ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlDoc_GetRootNode() - locate root node of document * DESCRIPTION: * Returns a reference to the root node of a document. * ARGUMENTS: * doc - document object * RETURNS: * rootNode - pointer to root node * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlDoc_GetRootNode(doc) ((doc)->rootNode) /*============================================================================*\ * NODE METHODS \*============================================================================*/ /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetType() - get node type * DESCRIPTION: * Gets node type. * ARGUMENTS: * node - xml-node object * RETURNS: * type - node type code value (XML_NODE_TYPE type) * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_GetType(node) ((node)->nodeType) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetFlags() - get flags for a node * DESCRIPTION: * Returns the flags for a node. The flags have the following * meanings: * * FLAG MEANING * ---------------------- ----------- * CW_XML_NODE_HAS_ATTRS element has attributes * CW_XML_NODE_IS_EMPTY empty element content * CW_XML_NODE_USES_STR_TAB node uses 'stringTabRef' field * CW_XML_NODE_USES_TEXT node uses 'flatText' field * CW_XML_NODE_USES_COUNT node uses 'count' field * CW_XML_NODE_USES_NUMS node uses 'numArray' field * ARGUMENTS: * node - xml-node object * RETURNS: * flags - flag bits * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_GetFlags(node) ((node)->flags) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetName() - get name of node * DESCRIPTION: * Returns the full internal name of the given node. The namespace- * prefix translation mechanism is applied to this name. If no name * is available, a NULL will be returned, but this is not an error. * ARGUMENTS: * node - xml-node object * RETURNS: * name - name of node, or NULL if it has no name * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_GetName(node) \ (((node)->flags & CW_XML_NODE_USES_NAME) \ ? CwStrObj_Deref((node)->name->strObj) : NULL) #define CwXmlNode_GetNonNullName(node) \ ((node==NULL) ? "(null_node)" \ : (((node)->flags & CW_XML_NODE_USES_NAME) \ ? CwStrObj_Deref((node)->name->strObj) : "(null_name)")) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetBasename() - get basename for node * DESCRIPTION: * Returns the base name of the given node. The basename is * the internal tag name minus the xml-namespace prefix string. * For example, an element node with the name would have * "posList" as its basename. A value of NULL will be returned for * a node that has no name at all, but this does not indicate an error. * ARGUMENTS: * node - xml-node object * RETURNS: * basename - basename of node, or NULL if there is none * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlNode_GetBasename( const CW_XML_NODE *node ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetNamePrefix() - get namespace prefix of qualified name * DESCRIPTION: * Returns the namespace prefix string of the name of the given node. * For example, an element node with the name would have * "gml" as its prefix. A value of NULL will be returned for a node * that either has no name at all or which has no prefix, but this * does not indicate an error. * ARGUMENTS: * node - xml-node object * prefixStrBuf - buffer to store prefix string into * prefixStrBufSize - byte size of 'prefixStrBuf' buffer * RETURNS: * prefix - prefix string in user buffer, or NULL if there is none * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP char *CwXmlNode_GetNamePrefix( const CW_XML_NODE *node, char *prefixStrBuf, long prefixStrBufSize); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetStringBasename() - get basename of given QName string * DESCRIPTION: * Locates the basename (NCName) within the given string buffer. * The basename is the stuff after the ':' or the whole string if * there is no ':' prefix separator. * ARGUMENTS: * string - given string * RETURNS: * basename - pointer within same string buffer of basename * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlNode_GetStringBasename( const char *string ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetExternalName() - get external name of node * DESCRIPTION: * Returns the full external name of the given node. The 'external * name' is the qualified name that was read from the source document * with no namespace-prefix translation applied to it. If no name * is available, a NULL will be returned, but this is not an error. * ARGUMENTS: * node - xml-node object * RETURNS: * extName - external name * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_GetExternalName(node) \ (((node)->flags & CW_XML_NODE_USES_NAME) \ ? CwStrObj_Deref((node)->extName->strObj) : NULL) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetPrototype() - get prototype string for node * DESCRIPTION: * Gets a representative prototype string for a node. The string * size used is 81 bytes. * ARGUMENTS: * node - node object * outString - buffer to store prototype string in, at least 81 bytes * RETURNS: * prototype - point to given 'outString' * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlNode_GetPrototype( const CW_XML_NODE *node, char *outString ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetParent() - locate parent node to present node * CwXmlNode_GetFirstChild()- locate first child node * CwXmlNode_GetLastChild() - locate last child node * CwXmlNode_GetNext() - locate next sibling node * CwXmlNode_GetPrev() - locate previous sibling node * DESCRIPTION: * Locates various nodes in a tree relative to the present node. * A NULL is returned if no such node exists. * ARGUMENTS: * node - xml-node object * RETURNS: * newNode - node requested, or NULL if not available * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_GetParent(node) ((node)->parent) #define CwXmlNode_GetFirstChild(node) ((node)->firstChild) #define CwXmlNode_GetLastChild(node) ((node)->lastChild) #define CwXmlNode_GetNext(node) ((node)->next) #define CwXmlNode_GetPrev(node) ((node)->prev) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_IsElement() - determine if node matches list of elem names * DESCRIPTION: * Determines if the given node matches a list of element names. * The position of the match from 1 is returned or 0 if no match * is found. Element names are given as a '|'-separated list, or * NULL means to match any element. * ARGUMENTS: * node - xml-node object or NULL * names - list of '|'-separated names to match, or NULL='any' * RETURNS: * matchPos - matching name position starting from 1, 0=not found, -1=err * ERRORS: * - malloc errors are possible if they are enabled \*----------------------------------------------------------------------------*/ CWEXP long CwXmlNode_IsElement( const CW_XML_NODE *node, const char *names ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetSubElement() - get first sub-element matching name(s) * DESCRIPTION: * Locates the first sub-element that matches the given element * name(s). The position of the match from 1 is returned or 0 if no * match is found. Element names are given as a '|'-separated list, * or NULL means to match any element. * ARGUMENTS: * startNode - xml-node object or NULL * names - list of '|'-separated names to match, or NULL='any' * outNode - (out) located node, set to NULL if not found * RETURNS: * matchPos - matching name position starting from 1, 0=not found, -1=err * ERRORS: * - malloc errors are possible if they are enabled \*----------------------------------------------------------------------------*/ CWEXP long CwXmlNode_GetSubElement( const CW_XML_NODE *startNode, const char *names, const CW_XML_NODE **outNode ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetNextElement() - get next sibling element matching name(s) * DESCRIPTION: * Locates the first sibling element to the 'right' that matches * the given element name(s). The position of the match from 1 is * returned or 0 if no match is found. Element names are given as a * '|'-separated list, or NULL means to match any element. * ARGUMENTS: * startNode - xml-node object or NULL * names - list of '|'-separated names to match, or NULL='any' * outNode - (out) located node, set to NULL if not found * RETURNS: * matchPos - matching name position starting from 1, 0=not found, -1=err * ERRORS: * - malloc errors are possible if they are enabled \*----------------------------------------------------------------------------*/ CWEXP long CwXmlNode_GetNextElement( const CW_XML_NODE *startNode, const char *names, const CW_XML_NODE **outNode ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetText() - get content string for node * DESCRIPTION: * Gets the text content of a node, or NULL if there is no suitable * textual representation of its content. Note particularly that * the content content of an ELEMENT and ATTRIBUTE is defined as the * concatenation of the content of certain types of subordinate nodes. * The content of an element is the textual content of its subordinate * subtree with with all of the markups removed. This method only * works with elements if the whole subtree of the element is read. * The text content of each node type is defined as follows: * * CW_XML_NODE CONTENT * -------------- ---- * ELEMENT Concatenated content of children ELEMENT, TEXT, CDATA, * WHITESPACE, BLOB, ENTITY_REF, CHAR_ENTITY_REF nodes * ELEMENT_FINISH No content (can't appear in subtree) * CLOSE No content (can't appear in subtree) * ATTRIBUTE Concatenated content of all subordinate nodes * TEXT Direct content of node translated to text if necessary * CDATA Direct text content of node * WHITESPACE Direct whitespace text content of node * BLOB Blob content translated into text * ENTITY_REF Text expansion of entity-reference value, if available * CHAR_ENTITY_REF Character referred to * COMMENT Content of comment body * XML_DECL No content * BANG Text body of directive * BANG_BRACKET Text body of directive * PROC_INSTR Text body of directive * ROOT Concatenation of same child-node types as for ELEMENT * EOF No content * * ARGUMENTS: * node - xml-node object * outText - (out) text content, or NULL if there is no content * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - otherwise, no errors are possible \*----------------------------------------------------------------------------*/ CWEXP int CwXmlNode_GetText( const CW_XML_NODE *node, const char **outText ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetRequiredText() - gets required text content of a node * DESCRIPTION: * Returns the CwXmlNode_GetText() of the given node. Content is * required to be present or an error is returned. * ARGUMENTS: * node - given node object * RETURNS: * text - text content of node, or NULL on error * ERRORS: * - malloc errors are possible if they are enabled * - no content found \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlNode_GetRequiredText( const CW_XML_NODE *node ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetDouble() - gets a scalar 'double' from a node's content * DESCRIPTION: * Returns a 'double' number from the content of a node. You would * normally only use this on an element or attribute node. The node * must have content and the content must be a valid 'double' number. * ARGUMENTS: * node - node to take content from * outDouble - (out) result 'double' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlNode_GetDouble( const CW_XML_NODE *node, double *outDouble ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetLong() - gets a scalar 'long' from a node's content * DESCRIPTION: * Returns a 'long' number from the content of a node. You would * normally only use this on an element or attribute node. The node * must have content and the content must be a valid 'long' number. * ARGUMENTS: * node - node to take content from * outLong - (out) result 'long' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlNode_GetLong( const CW_XML_NODE *node, long *outLong ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetBool() - gets a scalar 'bool' from a node's content * DESCRIPTION: * Returns a 'bool' number from the content of a node. You would * normally only use this on an element or attribute node. The node * must have content and the content must be a valid 'bool' number. * ARGUMENTS: * node - node to take content from * outBool - (out) result 'bool' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlNode_GetBool( const CW_XML_NODE *node, bool *outBool ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_GetArray() - get node content as a numeric/bool array * DESCRIPTION: * Returns the content of the given node as a numeric or boolean * array. The node given should normally be an ELEMENT or ATTRIBUTE. * If successful, an array of the type you choose will be constructed * and stored in the given node or a descendent and you can access * the array by reference. However, if you request to extract * the array, it will be removed from the node and it will become * your responsibility to free() it when you are done using it. * You will also need to cast the return type to a non-'const' type. * Extracting the array will be a destructive operation, so it should * only be done when the content of the node is not needed for any * future operations. * * BXML format directly supports numeric arrays, so they can be * processed efficiently under the right circumstances. Textual-XML * arrays will need to be parsed element-by-element. If an array is * already stored in a node and you request a new array of a different * element type, the element type of the array will be changed and * data might be lost if the new element type is not 'big' enough to * hold all of the existing data. * ARGUMENTS: * node - xml-node object * elementType - element type to make array * allowEmpty - flag to allow the return of an empty array * extractArray - flag to permanently extract the array content * outArrayLength - (out) length of the accessed array * RETURNS: * array - array of elements of requested type, or NULL on error * ERRORS: * - malloc errors are possible if they are enabled * - numeric-parsing errors * - no content if (!allowEmpty) \*----------------------------------------------------------------------------*/ CWEXP const void *CwXmlNode_GetArray( const CW_XML_NODE *node, CW_XML_VALTYPE elementType, bool allowEmpty, bool extractArray, long *outArrayLength ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_Dump() - dump out the fields of an XML-node object * DESCRIPTION: * Dumps out the fields of an XML-node object. * ARGUMENTS: * node - XML-node object * recursive - flag to recursively dump subtree * outfile - file stream to dump to, NULL=none * RETURNS: * (nothing) * extraInfo: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP void CwXmlNode_Dump( const CW_XML_NODE *node, bool recursive, FILE *outfile ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlNode_AugErrorMsg() - augment error-message stack with node info * DESCRIPTION: * Sets an error on the error-message stack that includes a prefix * message of your choosing and gives the XML stream name and line * number/position. In English, the trailing message says " detected * in XML stream..." and the default message prefix (selected with * code -1) is "Parse error". Substituted prefix messages should * have the same grammatical structure. * ARGUMENTS: * node - xml-node object * prefixMsgCode - prefix message code, -1 = default * RETURNS: * name - name of node, or NULL if it has no name * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlNode_AugErrorMsg(node,prefixMsgCode) \ { \ const char *prefixText; \ long usePrefixMsgCode = (prefixMsgCode); \ if (usePrefixMsgCode < 0) usePrefixMsgCode = CWERR_XML_PARSE_ERROR; \ prefixText = MsgGetFormat( (usePrefixMsgCode) ); \ ErrAugment( CWERR_FMT, "%s detected in XML stream \"%s\" on " \ "line %ld char pos %ld\n", prefixText, (node)->document->name, \ (node)->lineNum, (node)->lineByte ); \ } /*============================================================================*\ * DIRECT ATTRIBUTE ACCESS \*============================================================================*/ /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetNode() - locates a named attribute * DESCRIPTION: * Finds the identified attribute in a given element node. This is * equivalent to doing an XPath search, but more efficient (and the * attribute name has no '@' prefix). * BUGS: * Namespaces are not presently compared; matching is determined * solely by the attribute basenames (NCNames). * ARGUMENTS: * elementNode - xml-element-node object * attributeName - name of attribute to search for * RETURNS: * attributeNode - requested attribute node, or NULL if not found * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ CWEXP const CW_XML_NODE *CwXmlAttr_GetNode( const CW_XML_NODE *elementNode, const char *attributeName ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetRequiredNode() - locates a named attribute * DESCRIPTION: * Finds the identified attribute in a given element node, which must * be present. * ARGUMENTS: * elementNode - xml-element-node object * attributeName - name of attribute to search for * RETURNS: * attributeNode - requested attribute node, or NULL on error * ERRORS: * - attribute not found \*----------------------------------------------------------------------------*/ CWEXP const CW_XML_NODE *CwXmlAttr_GetRequiredNode( const CW_XML_NODE *elementNode, const char *attributeName ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetText() - get text value of named attribute * DESCRIPTION: * Returns the text value of the named attribute or a given default * if the attribute is not found. If the attribute is found but has * no content (attr=""), the empty string will be returned (""). * ARGUMENTS: * elementNode - xml-element-node object * attributeName - name of attribute to search for * defaultValue - value to return if attribute is not found * outText - (out) text value of attribute content, or NULL if none * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - otherwise, no errors are possible \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetText( const CW_XML_NODE *elementNode, const char *attributeName, const char *defaultValue, const char **outText ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetRequiredText() - get text value of named attribute * DESCRIPTION: * Returns the text value of the named attribute, which must be * present. If the attribute has no content (attr=""), the empty * string will be returned (""). * ARGUMENTS: * elementNode - xml-element-node object * attributeName - name of attribute to search for * RETURNS: * text - text content of attribute, or NULL on error * ERRORS: * - malloc errors are possible if they are enabled * - attribute not found \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlAttr_GetRequiredText( const CW_XML_NODE *elementNode, const char *attributeName ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetDouble() - gets a scalar 'double' from an attribute * DESCRIPTION: * Returns a 'double' number from a named attribute given its parent * element node. If the attribute is not present, the 'defaultValue' * is returned. If the attribute is present, then it must have * content which is a valid single 'double' number. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * defaultValue - default value to return if no content found * outDouble - (out) result 'double' number * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetDouble( const CW_XML_NODE *elementNode, const char *attributeName, double defaultValue, double *outDouble); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetRequiredDouble() - gets a 'double' from an attribute * DESCRIPTION: * Returns a 'double' number from a named attribute given its parent * element node. The attribute must be present and it must have * content which is a valid single 'double' number. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * outDouble - (out) result 'double' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - no content found * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetRequiredDouble( const CW_XML_NODE *elementNode, const char *attributeName, double *outDouble ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetLong() - gets a scalar 'long' int from an attribute * DESCRIPTION: * Returns a 'long' number from a named attribute given its parent * element node. If the attribute is not present, the 'defaultValue' * is returned. If the attribute is present, then it must have * content which is a valid single 'long' number. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * defaultValue - default value to return if no content found * outLong - (out) result 'long' number * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetLong( const CW_XML_NODE *elementNode, const char *attributeName, long defaultValue, long *outLong ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetRequiredLong() - gets a 'long' int from an attribute * DESCRIPTION: * Returns a 'long' number from a named attribute given its parent * element node. The attribute must be present and it must have * content which is a valid single 'long' number. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * outLong - (out) result 'long' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - no content found * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetRequiredLong( const CW_XML_NODE *elementNode, const char *attributeName, long *outLong ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetBool() - gets a scalar 'bool' value from an attribute * DESCRIPTION: * Returns a 'bool' value from a named attribute given its parent * element node. If the attribute is not present, the 'defaultValue' * is returned. If the attribute is present, then it must have * content which is a valid single 'bool' number. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * defaultValue - default value to return if no content found * outBool - (out) result 'bool' value * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - invalid boolean value \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetBool( const CW_XML_NODE *elementNode, const char *attributeName, bool defaultValue, bool *outBool ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlAttr_GetRequiredBool() - gets a 'bool' value from an attribute * DESCRIPTION: * Returns a 'bool' value from a named attribute given its parent * element node. The attribute must be present and it must have * content which is a valid single 'bool' value. * ARGUMENTS: * elementNode - element-node object * attributeName - name of attribute to search for * outBool - (out) result 'long' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - no content found * - invalid boolean value \*----------------------------------------------------------------------------*/ CWEXP int CwXmlAttr_GetRequiredBool( const CW_XML_NODE *elementNode, const char *attributeName, bool *outBool ); /*============================================================================*\ * SIMPLIFIED XPATH PROCESSING \*============================================================================*/ /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetNodes() - resolve an XPath to a list of entity nodes * DESCRIPTION: * This is an "micro"-XPath implementation that resolves an XPath * to a set of tree nodes, returning the list of matching nodes. * Only the most basic features of XPath are implemented, and this is * intended to be used to interpret scanned XML subtrees in a simple * way for a pre-defined application. * * The nodes will be returned in the scan order of the XML document. * The caller must free the returned node list. The startNode * is allowed to be NULL, in which case, no nodes will be found. * The caller is responsible for free()ing the returned pointer table * of nodes (but not the nodes themselves). * * The XPaths have the form "step1/step2/step3". The paths are * relative to a start node and the steps can have the following forms: * * - "name" - selects child element(s) with tagname "name" * - "@name" - selects attribute "name" * - "." or "" - selects current node * - "*" - selects all child element nodes * * A simple attribute condition may also be included in an XPath. * The condition must be of the form '[@attribute="value"]'. * * Multiple XPaths may also be specified in a list separated by the * '|' character. The returned set will be the union of all nodes * selected by each subexpression. * * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * maxNNodes - maximum number of nodes to include in result, or -1 * outNodes - (out) list of selected entity-node pointers, or NULL * RETURNS: * nNodes - number of nodes found, or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath \*----------------------------------------------------------------------------*/ CWEXP long CwXmlPath_GetNodes( const CW_XML_NODE *startNode, const char *xPath, long maxNNodes, const CW_XML_NODE ***outNodes ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetNode() - gets the first node of an XPath * DESCRIPTION: * Locates the first node identified by the given XPath. * See CwXmlPath_GetNodes() for a description of XPath processing. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * outNode - (out) located node or NULL if no node found * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetNode( const CW_XML_NODE *startNode, const char *xPath, const CW_XML_NODE **outNode ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetRequiredNode() - gets the first node of an XPath * DESCRIPTION: * Locates the first node identified by the given XPath, or fails if * no node is found. See CwXmlPath_GetNodes() for a description of * XPath processing. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * RETURNS: * node - located node, or NULL on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - required node is not present \*----------------------------------------------------------------------------*/ CWEXP const CW_XML_NODE *CwXmlPath_GetRequiredNode(const CW_XML_NODE *startNode, const char *xPath ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetText() - gets the text of an XPath node * DESCRIPTION: * Returns the CwXmlNode_GetText() of the identified XPath node. * If more than one node match the path, the only the first matching * node is used. See CwXmlPath_GetNodes() for a description of XPath * processing. If the node is not found then the 'defaultValue' * will be returned for the text. If the node is present but has no * content, the NULL pointer will be returned for the text. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * defaultValue - default value to return on no node or no content * outText - (out) text content of node, or 'defaultValue' * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetText( const CW_XML_NODE *startNode, const char *xPath, const char *defaultValue, const char **outText ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetRequiredText() - gets the text of an XPath node * DESCRIPTION: * Returns the CwXmlNode_GetText() of the identified XPath node. * If more than one node match the path, the only the first matching * node is used. The XPath must resolve to at least one node and the * content of the node cannot be empty. See CwXmlPath_LocateNodes() * for a description of XPath processing. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * RETURNS: * text - text content of node, or NULL on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - no content found \*----------------------------------------------------------------------------*/ CWEXP const char *CwXmlPath_GetRequiredText( const CW_XML_NODE *startNode, const char *xPath ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetTexts() - gets an array of text values for XPath nodes * DESCRIPTION: * Returns an array with one entry for the CwXmlNode_GetText() * of each identified XPath node. If any nodes are identified by * have no content, they will have a NULL pointer for their string * value in the array. The caller is responsible for free()ing the * returned pointer array (but not the string entries). * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * outTexts - (out) array of string pointers * RETURNS: * nTexts - number of text values returned (including 0), or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath \*----------------------------------------------------------------------------*/ CWEXP long CwXmlPath_GetTexts( const CW_XML_NODE *startNode, const char *xPath, const char ***outTexts ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetDouble() - gets a scalar 'double' from the XPath content * DESCRIPTION: * Returns a 'double' number from the content of an XPath node. * If more than one node match the path, the only the first matching * node is used. If no nodes are found, then the 'defaultValue' * is returned. If the node is found, then it must have content * which must be a valid single 'double' number. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * defaultValue - default value to return if no content found * outDouble - (out) result 'double' number * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetDouble( const CW_XML_NODE *startNode, const char *xPath, double defaultValue, double *outDouble ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetRequiredDouble() - gets a 'double' from the XPath content * DESCRIPTION: * Returns a 'double' number from the content of an XPath node. * If more than one node match the path, the only the first matching * node is used. The XPath must resolve to at least one node and the * content of the node cannot be empty. The content must be a valid * 'double' number. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * outDouble - (out) result 'double' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - no content found * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetRequiredDouble( const CW_XML_NODE *startNode, const char *xPath, double *outDouble ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetLong() - gets a scalar 'long' int from the XPath content * DESCRIPTION: * Returns a 'long' number from the content of an XPath node. If more * than one node matchs the path, the only the first matching node * is used. If no nodes match, then the 'defaultValue' is returned. * If the node if found, then it must have content which must be a * valid 'long' number. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * defaultValue - default value to return if no content found * outLong - (out) result 'long' number * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetLong( const CW_XML_NODE *startNode, const char *xPath, long defaultValue, long *outLong ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetRequiredLong() - gets a 'long' int from the XPath content * DESCRIPTION: * Returns a 'long' number from the content of an XPath node. * If more than one node match the path, the only the first matching * node is used. The node must be found and it must have content. * The content must be a valid 'long' number. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * outLong - (out) result 'long' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - no content found * - invalid number \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetRequiredLong( const CW_XML_NODE *startNode, const char *xPath, long *outLong ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetBool() - gets a scalar 'bool' value from the XPath content * DESCRIPTION: * Returns a 'bool' value from the content of an XPath node. If more * than one node matches the path, only the first matching node * is used. If no nodes match, then the 'defaultValue' is returned. * If the node is found, then it must have content which must be a * valid boolean value. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * defaultValue - default value to return if no content is present * outBool - (out) result 'bool' number * RETURNS: * rc - 0 on success, -1 on error, or -2 if node not found (not fatal) * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - invalid boolean value \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetBool( const CW_XML_NODE *startNode, const char *xPath, bool defaultValue, bool *outBool ); /*----------------------------------------------------------------------------*\ * NAME: * CwXmlPath_GetRequiredBool() - gets a 'bool' value from the XPath content * DESCRIPTION: * Returns a 'bool' value from the content of an XPath node. If more * than one node match the path, the only the first matching node * is used. The XPath must resolve to at least one node and the * content of the node cannot be empty. The content must be a valid * 'bool' value. * ARGUMENTS: * startNode - starting node of subtree * xPath - XPath expression * outBool - (out) result 'long' number * RETURNS: * err - 0 on success or -1 on error * ERRORS: * - malloc errors are possible if they are enabled * - malformed XPath * - no content found * - invalid boolean value \*----------------------------------------------------------------------------*/ CWEXP int CwXmlPath_GetRequiredBool( const CW_XML_NODE *startNode, const char *xPath, bool *outBool ); /*============================================================================*\ * STRING-TABLE METHODS \*============================================================================*/ /*----------------------------------------------------------------------------*\ * NAME: * CwXmlStrTabEnt_GetString() - get string value for string-table entry * DESCRIPTION: * Returns the string value for the given string-table entry. * ARGUMENTS: * entry - string-table entry object * RETURNS: * string - string value * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlStrTabEnt_GetString(entry) CwStrObj_Deref((entry)->strObj) /*----------------------------------------------------------------------------*\ * NAME: * CwXmlStrTabEnt_GetStrLen() - get string length of string-table entry * DESCRIPTION: * Returns the string length of the given string-table entry. * ARGUMENTS: * entry - string-table entry object * RETURNS: * length - string length * ERRORS: * (no errors are possible) \*----------------------------------------------------------------------------*/ #define CwXmlStrTabEnt_GetStrLen(entry) CwStrObj_GetLen((entry)->strObj) #ifdef __cplusplus }; #endif #endif /*----------------------------------------------------------------------------*\ * END OF MODULE: cw_xmltree.h \*----------------------------------------------------------------------------*/