From: Ivan Hernanez Date: Mon, 28 May 2012 21:54:55 +0000 (-0500) Subject: added yaml library to source control X-Git-Url: http://git.mmlx.us/?a=commitdiff_plain;h=3ab0e0fb984434f4006bd36af7a4e2a5aa79d5db;p=IvanGame.git added yaml library to source control --- diff --git a/Animation.cpp b/Animation.cpp index 902cbe9..0ea926e 100644 --- a/Animation.cpp +++ b/Animation.cpp @@ -10,7 +10,7 @@ #include "Animation.h" #include "fns.h" #include -#include "yaml-cpp/yaml.h" +#include using namespace std; Animation::Animation():mName() diff --git a/Level.cpp b/Level.cpp index 2a01403..588439b 100644 --- a/Level.cpp +++ b/Level.cpp @@ -46,15 +46,7 @@ void Level::DrawIMG(SDL_Surface *img, int x, int y, int w, int h, int x2, int y2 SDL_BlitSurface(img, &src, mScreen, &dest); } -int round(double x) -{ - return (int)(x + 0.5); -} - -void SDL_GL_RenderText(string s, - TTF_Font *font, - SDL_Color color, - SDL_Rect *location) +void SDL_GL_RenderText(string s, TTF_Font *font, SDL_Color color, SDL_Rect *location, bool unicode=false) { const char* text = s.c_str(); SDL_Surface *initial; @@ -63,8 +55,10 @@ void SDL_GL_RenderText(string s, int w,h; Texture texture; + Uint16 t[] = {'日','本','語'}; + /* Use SDL_TTF to render our text */ - initial = TTF_RenderText_Blended(font, text, color); + initial = unicode?TTF_RenderUNICODE_Blended(font, t, color):TTF_RenderText_Blended(font, text, color); /* Convert the rendered text to a known format */ w = initial->w; @@ -78,8 +72,7 @@ void SDL_GL_RenderText(string s, /* Tell GL about our new texture */ glGenTextures(1, &texture); glBindTexture(GL_TEXTURE_2D, texture); - glTexImage2D(GL_TEXTURE_2D, 0, 4, w, h, 0, GL_BGRA, - GL_UNSIGNED_BYTE, intermediary->pixels ); + glTexImage2D(GL_TEXTURE_2D, 0, 4, w, h, 0, GL_BGRA, GL_UNSIGNED_BYTE, intermediary->pixels ); /* GL_NEAREST looks horrible, if scaled... */ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -143,9 +136,8 @@ void Level::drawScene() TTF_CloseFont(font); - // Load a font - TTF_Font *font; - font = TTF_OpenFont("FreeSans.ttf", 24); + // Load another font + font = TTF_OpenFont("Japanese.ttf", 24); if(font){ // Write text to surface @@ -153,7 +145,7 @@ void Level::drawScene() SDL_Color text_color = {0xFF, 0xFF, 0xFF}; string txt="日本語"; SDL_Rect rect = {0,100,100,100}; - SDL_GL_RenderText(txt, font, text_color, &rect); + SDL_GL_RenderText(txt, font, text_color, &rect, true); } else cerr << "TTF_OpenFont() Failed: " << TTF_GetError() << endl; diff --git a/game.vcxproj b/game.vcxproj index 6cc0485..e66f718 100644 --- a/game.vcxproj +++ b/game.vcxproj @@ -50,12 +50,12 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - C:\SDL;%(AdditionalIncludeDirectories) + C:\SDL;$(SolutionDir)\game\yaml-cpp\include;%(AdditionalIncludeDirectories) Console true - C:\SDL\lib;%(AdditionalLibraryDirectories) + C:\SDL\lib;$(SolutionDir)game\yaml-cpp\build\Debug;%(AdditionalLibraryDirectories) SDL_ttf.lib;libyaml-cppmdd.lib;SDL.lib;SDL_image.lib;SDLmain.lib;SDL_draw.lib;glu32.lib;opengl32.lib;%(AdditionalDependencies) false diff --git a/libyaml-cppmdd.pdb b/libyaml-cppmdd.pdb deleted file mode 100644 index 9a407a2..0000000 Binary files a/libyaml-cppmdd.pdb and /dev/null differ diff --git a/libyaml-cppmdd_Windows.pdb b/libyaml-cppmdd_Windows.pdb deleted file mode 100644 index 9a407a2..0000000 Binary files a/libyaml-cppmdd_Windows.pdb and /dev/null differ diff --git a/yaml-cpp/aliasmanager.h b/yaml-cpp/include/yaml-cpp/aliasmanager.h similarity index 100% rename from yaml-cpp/aliasmanager.h rename to yaml-cpp/include/yaml-cpp/aliasmanager.h diff --git a/yaml-cpp/anchor.h b/yaml-cpp/include/yaml-cpp/anchor.h similarity index 100% rename from yaml-cpp/anchor.h rename to yaml-cpp/include/yaml-cpp/anchor.h diff --git a/yaml-cpp/binary.h b/yaml-cpp/include/yaml-cpp/binary.h similarity index 100% rename from yaml-cpp/binary.h rename to yaml-cpp/include/yaml-cpp/binary.h diff --git a/yaml-cpp/contrib/anchordict.h b/yaml-cpp/include/yaml-cpp/contrib/anchordict.h similarity index 100% rename from yaml-cpp/contrib/anchordict.h rename to yaml-cpp/include/yaml-cpp/contrib/anchordict.h diff --git a/yaml-cpp/contrib/graphbuilder.h b/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h similarity index 100% rename from yaml-cpp/contrib/graphbuilder.h rename to yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h diff --git a/yaml-cpp/conversion.h b/yaml-cpp/include/yaml-cpp/conversion.h similarity index 100% rename from yaml-cpp/conversion.h rename to yaml-cpp/include/yaml-cpp/conversion.h diff --git a/yaml-cpp/dll.h b/yaml-cpp/include/yaml-cpp/dll.h similarity index 100% rename from yaml-cpp/dll.h rename to yaml-cpp/include/yaml-cpp/dll.h diff --git a/yaml-cpp/emitfromevents.h b/yaml-cpp/include/yaml-cpp/emitfromevents.h similarity index 100% rename from yaml-cpp/emitfromevents.h rename to yaml-cpp/include/yaml-cpp/emitfromevents.h diff --git a/yaml-cpp/emitter.h b/yaml-cpp/include/yaml-cpp/emitter.h similarity index 100% rename from yaml-cpp/emitter.h rename to yaml-cpp/include/yaml-cpp/emitter.h diff --git a/yaml-cpp/emittermanip.h b/yaml-cpp/include/yaml-cpp/emittermanip.h similarity index 100% rename from yaml-cpp/emittermanip.h rename to yaml-cpp/include/yaml-cpp/emittermanip.h diff --git a/yaml-cpp/eventhandler.h b/yaml-cpp/include/yaml-cpp/eventhandler.h similarity index 100% rename from yaml-cpp/eventhandler.h rename to yaml-cpp/include/yaml-cpp/eventhandler.h diff --git a/yaml-cpp/exceptions.h b/yaml-cpp/include/yaml-cpp/exceptions.h similarity index 100% rename from yaml-cpp/exceptions.h rename to yaml-cpp/include/yaml-cpp/exceptions.h diff --git a/yaml-cpp/iterator.h b/yaml-cpp/include/yaml-cpp/iterator.h similarity index 100% rename from yaml-cpp/iterator.h rename to yaml-cpp/include/yaml-cpp/iterator.h diff --git a/yaml-cpp/ltnode.h b/yaml-cpp/include/yaml-cpp/ltnode.h similarity index 100% rename from yaml-cpp/ltnode.h rename to yaml-cpp/include/yaml-cpp/ltnode.h diff --git a/yaml-cpp/mark.h b/yaml-cpp/include/yaml-cpp/mark.h similarity index 100% rename from yaml-cpp/mark.h rename to yaml-cpp/include/yaml-cpp/mark.h diff --git a/yaml-cpp/node.h b/yaml-cpp/include/yaml-cpp/node.h similarity index 100% rename from yaml-cpp/node.h rename to yaml-cpp/include/yaml-cpp/node.h diff --git a/yaml-cpp/nodeimpl.h b/yaml-cpp/include/yaml-cpp/nodeimpl.h similarity index 100% rename from yaml-cpp/nodeimpl.h rename to yaml-cpp/include/yaml-cpp/nodeimpl.h diff --git a/yaml-cpp/nodereadimpl.h b/yaml-cpp/include/yaml-cpp/nodereadimpl.h similarity index 100% rename from yaml-cpp/nodereadimpl.h rename to yaml-cpp/include/yaml-cpp/nodereadimpl.h diff --git a/yaml-cpp/nodeutil.h b/yaml-cpp/include/yaml-cpp/nodeutil.h similarity index 100% rename from yaml-cpp/nodeutil.h rename to yaml-cpp/include/yaml-cpp/nodeutil.h diff --git a/yaml-cpp/noncopyable.h b/yaml-cpp/include/yaml-cpp/noncopyable.h similarity index 100% rename from yaml-cpp/noncopyable.h rename to yaml-cpp/include/yaml-cpp/noncopyable.h diff --git a/yaml-cpp/null.h b/yaml-cpp/include/yaml-cpp/null.h similarity index 100% rename from yaml-cpp/null.h rename to yaml-cpp/include/yaml-cpp/null.h diff --git a/yaml-cpp/ostream.h b/yaml-cpp/include/yaml-cpp/ostream.h similarity index 100% rename from yaml-cpp/ostream.h rename to yaml-cpp/include/yaml-cpp/ostream.h diff --git a/yaml-cpp/parser.h b/yaml-cpp/include/yaml-cpp/parser.h similarity index 100% rename from yaml-cpp/parser.h rename to yaml-cpp/include/yaml-cpp/parser.h diff --git a/yaml-cpp/stlemitter.h b/yaml-cpp/include/yaml-cpp/stlemitter.h similarity index 100% rename from yaml-cpp/stlemitter.h rename to yaml-cpp/include/yaml-cpp/stlemitter.h diff --git a/yaml-cpp/stlnode.h b/yaml-cpp/include/yaml-cpp/stlnode.h similarity index 100% rename from yaml-cpp/stlnode.h rename to yaml-cpp/include/yaml-cpp/stlnode.h diff --git a/yaml-cpp/traits.h b/yaml-cpp/include/yaml-cpp/traits.h similarity index 100% rename from yaml-cpp/traits.h rename to yaml-cpp/include/yaml-cpp/traits.h diff --git a/yaml-cpp/yaml.h b/yaml-cpp/include/yaml-cpp/yaml.h similarity index 100% rename from yaml-cpp/yaml.h rename to yaml-cpp/include/yaml-cpp/yaml.h diff --git a/yaml-cpp/install.txt b/yaml-cpp/install.txt new file mode 100644 index 0000000..9392362 --- /dev/null +++ b/yaml-cpp/install.txt @@ -0,0 +1,24 @@ +*** With CMake *** + +yaml-cpp uses CMake to support cross-platform building. In a UNIX-like system, the basic steps to build are: + +1. Download and install CMake (if you don't have root privileges, just install to a local directory, like ~/bin) + +2. From the source directory, run: + +mkdir build +cd build +cmake .. + +and then the usual + +make +make install + +3. To clean up, just remove the 'build' directory. + +*** Without CMake *** + +If you don't want to use CMake, just add all .cpp files to a makefile. yaml-cpp does not need any special build settings, so no 'configure' file is necessary. + +(Note: this is pretty tedious. It's sooo much easier to use CMake.) diff --git a/yaml-cpp/license.txt b/yaml-cpp/license.txt new file mode 100644 index 0000000..5bd9e1a --- /dev/null +++ b/yaml-cpp/license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2008 Jesse Beder. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/yaml-cpp/src/aliasmanager.cpp b/yaml-cpp/src/aliasmanager.cpp new file mode 100644 index 0000000..ed4d3b5 --- /dev/null +++ b/yaml-cpp/src/aliasmanager.cpp @@ -0,0 +1,29 @@ +#include "yaml-cpp/aliasmanager.h" +#include "yaml-cpp/node.h" +#include +#include + +namespace YAML +{ + AliasManager::AliasManager(): m_curAnchor(0) + { + } + + void AliasManager::RegisterReference(const Node& node) + { + m_anchorByIdentity.insert(std::make_pair(&node, _CreateNewAnchor())); + } + + anchor_t AliasManager::LookupAnchor(const Node& node) const + { + AnchorByIdentity::const_iterator it = m_anchorByIdentity.find(&node); + if(it == m_anchorByIdentity.end()) + return 0; + return it->second; + } + + anchor_t AliasManager::_CreateNewAnchor() + { + return ++m_curAnchor; + } +} diff --git a/yaml-cpp/src/binary.cpp b/yaml-cpp/src/binary.cpp new file mode 100644 index 0000000..589eb08 --- /dev/null +++ b/yaml-cpp/src/binary.cpp @@ -0,0 +1,102 @@ +#include "yaml-cpp/binary.h" +#include "yaml-cpp/node.h" + +namespace YAML +{ + static const char encoding[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::string EncodeBase64(const unsigned char *data, std::size_t size) + { + const char PAD = '='; + + std::string ret; + ret.resize(4 * size / 3 + 3); + char *out = &ret[0]; + + std::size_t chunks = size / 3; + std::size_t remainder = size % 3; + + for(std::size_t i=0;i> 2]; + *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)]; + *out++ = encoding[((data[1] & 0xf) << 2) | (data[2] >> 6)]; + *out++ = encoding[data[2] & 0x3f]; + } + + switch(remainder) { + case 0: + break; + case 1: + *out++ = encoding[data[0] >> 2]; + *out++ = encoding[((data[0] & 0x3) << 4)]; + *out++ = PAD; + *out++ = PAD; + break; + case 2: + *out++ = encoding[data[0] >> 2]; + *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)]; + *out++ = encoding[((data[1] & 0xf) << 2)]; + *out++ = PAD; + break; + } + + ret.resize(out - &ret[0]); + return ret; + } + + static const unsigned char decoding[] = { + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + }; + + std::vector DecodeBase64(const std::string& input) + { + typedef std::vector ret_type; + if(input.empty()) + return ret_type(); + + ret_type ret(3 * input.size() / 4 + 1); + unsigned char *out = &ret[0]; + + unsigned value = 0; + for(std::size_t i=0;i(input[i])]; + if(d == 255) + return ret_type(); + + value = (value << 6) | d; + if(i % 4 == 3) { + *out++ = value >> 16; + if(i > 0 && input[i - 1] != '=') + *out++ = value >> 8; + if(input[i] != '=') + *out++ = value; + } + } + + ret.resize(out - &ret[0]); + return ret; + } + + void operator >> (const Node& node, Binary& binary) + { + std::string scalar; + node.GetScalar(scalar); + std::vector data = DecodeBase64(scalar); + binary.swap(data); + } +} diff --git a/yaml-cpp/src/collectionstack.h b/yaml-cpp/src/collectionstack.h new file mode 100644 index 0000000..4a986bc --- /dev/null +++ b/yaml-cpp/src/collectionstack.h @@ -0,0 +1,35 @@ +#ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include + +namespace YAML +{ + struct CollectionType { + enum value { None, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap }; + }; + + class CollectionStack + { + public: + CollectionType::value GetCurCollectionType() const { + if(collectionStack.empty()) + return CollectionType::None; + return collectionStack.top(); + } + + void PushCollectionType(CollectionType::value type) { collectionStack.push(type); } + void PopCollectionType(CollectionType::value type) { assert(type == GetCurCollectionType()); collectionStack.pop(); } + + private: + std::stack collectionStack; + }; +} + +#endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/contrib/graphbuilder.cpp b/yaml-cpp/src/contrib/graphbuilder.cpp new file mode 100644 index 0000000..ab5159c --- /dev/null +++ b/yaml-cpp/src/contrib/graphbuilder.cpp @@ -0,0 +1,16 @@ +#include "yaml-cpp/parser.h" +#include "yaml-cpp/contrib/graphbuilder.h" +#include "graphbuilderadapter.h" + +namespace YAML +{ + void *BuildGraphOfNextDocument(Parser& parser, GraphBuilderInterface& graphBuilder) + { + GraphBuilderAdapter eventHandler(graphBuilder); + if (parser.HandleNextDocument(eventHandler)) { + return eventHandler.RootNode(); + } else { + return NULL; + } + } +} diff --git a/yaml-cpp/src/contrib/graphbuilderadapter.cpp b/yaml-cpp/src/contrib/graphbuilderadapter.cpp new file mode 100644 index 0000000..557e97c --- /dev/null +++ b/yaml-cpp/src/contrib/graphbuilderadapter.cpp @@ -0,0 +1,96 @@ +#include "graphbuilderadapter.h" + +namespace YAML +{ + int GraphBuilderAdapter::ContainerFrame::sequenceMarker; + + void GraphBuilderAdapter::OnNull(const Mark& mark, anchor_t anchor) + { + void *pParent = GetCurrentParent(); + void *pNode = m_builder.NewNull(mark, pParent); + RegisterAnchor(anchor, pNode); + + DispositionNode(pNode); + } + + void GraphBuilderAdapter::OnAlias(const Mark& mark, anchor_t anchor) + { + void *pReffedNode = m_anchors.Get(anchor); + DispositionNode(m_builder.AnchorReference(mark, pReffedNode)); + } + + void GraphBuilderAdapter::OnScalar(const Mark& mark, const std::string& tag, anchor_t anchor, const std::string& value) + { + void *pParent = GetCurrentParent(); + void *pNode = m_builder.NewScalar(mark, tag, pParent, value); + RegisterAnchor(anchor, pNode); + + DispositionNode(pNode); + } + + void GraphBuilderAdapter::OnSequenceStart(const Mark& mark, const std::string& tag, anchor_t anchor) + { + void *pNode = m_builder.NewSequence(mark, tag, GetCurrentParent()); + m_containers.push(ContainerFrame(pNode)); + RegisterAnchor(anchor, pNode); + } + + void GraphBuilderAdapter::OnSequenceEnd() + { + void *pSequence = m_containers.top().pContainer; + m_containers.pop(); + + DispositionNode(pSequence); + } + + void GraphBuilderAdapter::OnMapStart(const Mark& mark, const std::string& tag, anchor_t anchor) + { + void *pNode = m_builder.NewMap(mark, tag, GetCurrentParent()); + m_containers.push(ContainerFrame(pNode, m_pKeyNode)); + m_pKeyNode = NULL; + RegisterAnchor(anchor, pNode); + } + + void GraphBuilderAdapter::OnMapEnd() + { + void *pMap = m_containers.top().pContainer; + m_pKeyNode = m_containers.top().pPrevKeyNode; + m_containers.pop(); + DispositionNode(pMap); + } + + void *GraphBuilderAdapter::GetCurrentParent() const + { + if (m_containers.empty()) { + return NULL; + } + return m_containers.top().pContainer; + } + + void GraphBuilderAdapter::RegisterAnchor(anchor_t anchor, void *pNode) + { + if (anchor) { + m_anchors.Register(anchor, pNode); + } + } + + void GraphBuilderAdapter::DispositionNode(void *pNode) + { + if (m_containers.empty()) { + m_pRootNode = pNode; + return; + } + + void *pContainer = m_containers.top().pContainer; + if (m_containers.top().isMap()) { + if (m_pKeyNode) { + m_builder.AssignInMap(pContainer, m_pKeyNode, pNode); + m_pKeyNode = NULL; + } else { + m_pKeyNode = pNode; + } + } else { + m_builder.AppendToSequence(pContainer, pNode); + } + } +} diff --git a/yaml-cpp/src/contrib/graphbuilderadapter.h b/yaml-cpp/src/contrib/graphbuilderadapter.h new file mode 100644 index 0000000..3ef8ab6 --- /dev/null +++ b/yaml-cpp/src/contrib/graphbuilderadapter.h @@ -0,0 +1,73 @@ +#ifndef GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include +#include +#include +#include "yaml-cpp/eventhandler.h" +#include "yaml-cpp/contrib/anchordict.h" +#include "yaml-cpp/contrib/graphbuilder.h" + +namespace YAML +{ + class GraphBuilderAdapter : public EventHandler + { + public: + GraphBuilderAdapter(GraphBuilderInterface& builder) + : m_builder(builder), m_pRootNode(NULL), m_pKeyNode(NULL) + { + } + + virtual void OnDocumentStart(const Mark& mark) {(void)mark;} + virtual void OnDocumentEnd() {} + + virtual void OnNull(const Mark& mark, anchor_t anchor); + virtual void OnAlias(const Mark& mark, anchor_t anchor); + virtual void OnScalar(const Mark& mark, const std::string& tag, anchor_t anchor, const std::string& value); + + virtual void OnSequenceStart(const Mark& mark, const std::string& tag, anchor_t anchor); + virtual void OnSequenceEnd(); + + virtual void OnMapStart(const Mark& mark, const std::string& tag, anchor_t anchor); + virtual void OnMapEnd(); + + void *RootNode() const {return m_pRootNode;} + + private: + struct ContainerFrame + { + ContainerFrame(void *pSequence) + : pContainer(pSequence), pPrevKeyNode(&sequenceMarker) + {} + ContainerFrame(void *pMap, void* pPrevKeyNode) + : pContainer(pMap), pPrevKeyNode(pPrevKeyNode) + {} + + void *pContainer; + void *pPrevKeyNode; + + bool isMap() const {return pPrevKeyNode != &sequenceMarker;} + + private: + static int sequenceMarker; + }; + typedef std::stack ContainerStack; + typedef AnchorDict AnchorMap; + + GraphBuilderInterface& m_builder; + ContainerStack m_containers; + AnchorMap m_anchors; + void *m_pRootNode; + void *m_pKeyNode; + + void *GetCurrentParent() const; + void RegisterAnchor(anchor_t anchor, void *pNode); + void DispositionNode(void *pNode); + }; +} + +#endif // GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/conversion.cpp b/yaml-cpp/src/conversion.cpp new file mode 100644 index 0000000..f81e1a0 --- /dev/null +++ b/yaml-cpp/src/conversion.cpp @@ -0,0 +1,89 @@ +#include "yaml-cpp/conversion.h" +#include + +//////////////////////////////////////////////////////////////// +// Specializations for converting a string to specific types + +namespace +{ + // we're not gonna mess with the mess that is all the isupper/etc. functions + bool IsLower(char ch) { return 'a' <= ch && ch <= 'z'; } + bool IsUpper(char ch) { return 'A' <= ch && ch <= 'Z'; } + char ToLower(char ch) { return IsUpper(ch) ? ch + 'a' - 'A' : ch; } + + std::string tolower(const std::string& str) + { + std::string s(str); + std::transform(s.begin(), s.end(), s.begin(), ToLower); + return s; + } + + template + bool IsEntirely(const std::string& str, T func) + { + for(std::size_t i=0;i::const_iterator it = tags.find(handle); + if(it == tags.end()) { + if(handle == "!!") + return "tag:yaml.org,2002:"; + return handle; + } + + return it->second; + } +} diff --git a/yaml-cpp/src/directives.h b/yaml-cpp/src/directives.h new file mode 100644 index 0000000..a3308f7 --- /dev/null +++ b/yaml-cpp/src/directives.h @@ -0,0 +1,29 @@ +#ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include + +namespace YAML +{ + struct Version { + bool isDefault; + int major, minor; + }; + + struct Directives { + Directives(); + + const std::string TranslateTagHandle(const std::string& handle) const; + + Version version; + std::map tags; + }; +} + +#endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/emitfromevents.cpp b/yaml-cpp/src/emitfromevents.cpp new file mode 100644 index 0000000..49fc10b --- /dev/null +++ b/yaml-cpp/src/emitfromevents.cpp @@ -0,0 +1,105 @@ +#include "yaml-cpp/emitfromevents.h" +#include "yaml-cpp/emitter.h" +#include "yaml-cpp/null.h" +#include +#include + +namespace { + std::string ToString(YAML::anchor_t anchor) { + std::stringstream stream; + stream << anchor; + return stream.str(); + } +} + +namespace YAML +{ + EmitFromEvents::EmitFromEvents(Emitter& emitter): m_emitter(emitter) + { + } + + void EmitFromEvents::OnDocumentStart(const Mark&) + { + } + + void EmitFromEvents::OnDocumentEnd() + { + } + + void EmitFromEvents::OnNull(const Mark&, anchor_t anchor) + { + BeginNode(); + EmitProps("", anchor); + m_emitter << Null; + } + + void EmitFromEvents::OnAlias(const Mark&, anchor_t anchor) + { + BeginNode(); + m_emitter << Alias(ToString(anchor)); + } + + void EmitFromEvents::OnScalar(const Mark&, const std::string& tag, anchor_t anchor, const std::string& value) + { + BeginNode(); + EmitProps(tag, anchor); + m_emitter << value; + } + + void EmitFromEvents::OnSequenceStart(const Mark&, const std::string& tag, anchor_t anchor) + { + BeginNode(); + EmitProps(tag, anchor); + m_emitter << BeginSeq; + m_stateStack.push(State::WaitingForSequenceEntry); + } + + void EmitFromEvents::OnSequenceEnd() + { + m_emitter << EndSeq; + assert(m_stateStack.top() == State::WaitingForSequenceEntry); + m_stateStack.pop(); + } + + void EmitFromEvents::OnMapStart(const Mark&, const std::string& tag, anchor_t anchor) + { + BeginNode(); + EmitProps(tag, anchor); + m_emitter << BeginMap; + m_stateStack.push(State::WaitingForKey); + } + + void EmitFromEvents::OnMapEnd() + { + m_emitter << EndMap; + assert(m_stateStack.top() == State::WaitingForKey); + m_stateStack.pop(); + } + + void EmitFromEvents::BeginNode() + { + if(m_stateStack.empty()) + return; + + switch(m_stateStack.top()) { + case State::WaitingForKey: + m_emitter << Key; + m_stateStack.top() = State::WaitingForValue; + break; + case State::WaitingForValue: + m_emitter << Value; + m_stateStack.top() = State::WaitingForKey; + break; + default: + break; + } + } + + void EmitFromEvents::EmitProps(const std::string& tag, anchor_t anchor) + { + if(!tag.empty() && tag != "?") + m_emitter << VerbatimTag(tag); + if(anchor) + m_emitter << Anchor(ToString(anchor)); + } +} diff --git a/yaml-cpp/src/emitter.cpp b/yaml-cpp/src/emitter.cpp new file mode 100644 index 0000000..91f48da --- /dev/null +++ b/yaml-cpp/src/emitter.cpp @@ -0,0 +1,882 @@ +#include "yaml-cpp/emitter.h" +#include "emitterstate.h" +#include "emitterutils.h" +#include "indentation.h" +#include "yaml-cpp/exceptions.h" +#include + +namespace YAML +{ + Emitter::Emitter(): m_pState(new EmitterState) + { + } + + Emitter::~Emitter() + { + } + + const char *Emitter::c_str() const + { + return m_stream.str(); + } + + unsigned Emitter::size() const + { + return m_stream.pos(); + } + + // state checking + bool Emitter::good() const + { + return m_pState->good(); + } + + const std::string Emitter::GetLastError() const + { + return m_pState->GetLastError(); + } + + // global setters + bool Emitter::SetOutputCharset(EMITTER_MANIP value) + { + return m_pState->SetOutputCharset(value, GLOBAL); + } + + bool Emitter::SetStringFormat(EMITTER_MANIP value) + { + return m_pState->SetStringFormat(value, GLOBAL); + } + + bool Emitter::SetBoolFormat(EMITTER_MANIP value) + { + bool ok = false; + if(m_pState->SetBoolFormat(value, GLOBAL)) + ok = true; + if(m_pState->SetBoolCaseFormat(value, GLOBAL)) + ok = true; + if(m_pState->SetBoolLengthFormat(value, GLOBAL)) + ok = true; + return ok; + } + + bool Emitter::SetIntBase(EMITTER_MANIP value) + { + return m_pState->SetIntFormat(value, GLOBAL); + } + + bool Emitter::SetSeqFormat(EMITTER_MANIP value) + { + return m_pState->SetFlowType(GT_SEQ, value, GLOBAL); + } + + bool Emitter::SetMapFormat(EMITTER_MANIP value) + { + bool ok = false; + if(m_pState->SetFlowType(GT_MAP, value, GLOBAL)) + ok = true; + if(m_pState->SetMapKeyFormat(value, GLOBAL)) + ok = true; + return ok; + } + + bool Emitter::SetIndent(unsigned n) + { + return m_pState->SetIndent(n, GLOBAL); + } + + bool Emitter::SetPreCommentIndent(unsigned n) + { + return m_pState->SetPreCommentIndent(n, GLOBAL); + } + + bool Emitter::SetPostCommentIndent(unsigned n) + { + return m_pState->SetPostCommentIndent(n, GLOBAL); + } + + bool Emitter::SetFloatPrecision(unsigned n) + { + return m_pState->SetFloatPrecision(n, GLOBAL); + } + + bool Emitter::SetDoublePrecision(unsigned n) + { + return m_pState->SetDoublePrecision(n, GLOBAL); + } + + // SetLocalValue + // . Either start/end a group, or set a modifier locally + Emitter& Emitter::SetLocalValue(EMITTER_MANIP value) + { + if(!good()) + return *this; + + switch(value) { + case BeginDoc: + EmitBeginDoc(); + break; + case EndDoc: + EmitEndDoc(); + break; + case BeginSeq: + EmitBeginSeq(); + break; + case EndSeq: + EmitEndSeq(); + break; + case BeginMap: + EmitBeginMap(); + break; + case EndMap: + EmitEndMap(); + break; + case Key: + EmitKey(); + break; + case Value: + EmitValue(); + break; + case TagByKind: + EmitKindTag(); + break; + case Newline: + EmitNewline(); + break; + default: + m_pState->SetLocalValue(value); + break; + } + return *this; + } + + Emitter& Emitter::SetLocalIndent(const _Indent& indent) + { + m_pState->SetIndent(indent.value, LOCAL); + return *this; + } + + Emitter& Emitter::SetLocalPrecision(const _Precision& precision) + { + if(precision.floatPrecision >= 0) + m_pState->SetFloatPrecision(precision.floatPrecision, LOCAL); + if(precision.doublePrecision >= 0) + m_pState->SetDoublePrecision(precision.doublePrecision, LOCAL); + return *this; + } + + // GotoNextPreAtomicState + // . Runs the state machine, emitting if necessary, and returns 'true' if done (i.e., ready to emit an atom) + bool Emitter::GotoNextPreAtomicState() + { + if(!good()) + return true; + + unsigned curIndent = m_pState->GetCurIndent(); + + EMITTER_STATE curState = m_pState->GetCurState(); + switch(curState) { + // document-level + case ES_WAITING_FOR_DOC: + m_pState->SwitchState(ES_WRITING_DOC); + return true; + case ES_WRITING_DOC: + return true; + case ES_DONE_WITH_DOC: + EmitBeginDoc(); + return false; + + // block sequence + case ES_WAITING_FOR_BLOCK_SEQ_ENTRY: + m_stream << IndentTo(curIndent) << "-"; + m_pState->RequireSoftSeparation(); + m_pState->SwitchState(ES_WRITING_BLOCK_SEQ_ENTRY); + return true; + case ES_WRITING_BLOCK_SEQ_ENTRY: + return true; + case ES_DONE_WITH_BLOCK_SEQ_ENTRY: + m_stream << '\n'; + m_pState->SwitchState(ES_WAITING_FOR_BLOCK_SEQ_ENTRY); + return false; + + // flow sequence + case ES_WAITING_FOR_FLOW_SEQ_ENTRY: + m_pState->SwitchState(ES_WRITING_FLOW_SEQ_ENTRY); + return true; + case ES_WRITING_FLOW_SEQ_ENTRY: + return true; + case ES_DONE_WITH_FLOW_SEQ_ENTRY: + EmitSeparationIfNecessary(); + m_stream << ','; + m_pState->RequireSoftSeparation(); + m_pState->SwitchState(ES_WAITING_FOR_FLOW_SEQ_ENTRY); + return false; + + // block map + case ES_WAITING_FOR_BLOCK_MAP_ENTRY: + m_pState->SetError(ErrorMsg::EXPECTED_KEY_TOKEN); + return true; + case ES_WAITING_FOR_BLOCK_MAP_KEY: + if(m_pState->CurrentlyInLongKey()) { + m_stream << IndentTo(curIndent) << '?'; + m_pState->RequireSoftSeparation(); + } + m_pState->SwitchState(ES_WRITING_BLOCK_MAP_KEY); + return true; + case ES_WRITING_BLOCK_MAP_KEY: + return true; + case ES_DONE_WITH_BLOCK_MAP_KEY: + m_pState->SetError(ErrorMsg::EXPECTED_VALUE_TOKEN); + return true; + case ES_WAITING_FOR_BLOCK_MAP_VALUE: + m_pState->SwitchState(ES_WRITING_BLOCK_MAP_VALUE); + return true; + case ES_WRITING_BLOCK_MAP_VALUE: + return true; + case ES_DONE_WITH_BLOCK_MAP_VALUE: + m_pState->SetError(ErrorMsg::EXPECTED_KEY_TOKEN); + return true; + + // flow map + case ES_WAITING_FOR_FLOW_MAP_ENTRY: + m_pState->SetError(ErrorMsg::EXPECTED_KEY_TOKEN); + return true; + case ES_WAITING_FOR_FLOW_MAP_KEY: + EmitSeparationIfNecessary(); + m_pState->SwitchState(ES_WRITING_FLOW_MAP_KEY); + if(m_pState->CurrentlyInLongKey()) { + m_stream << '?'; + m_pState->RequireSoftSeparation(); + } + return true; + case ES_WRITING_FLOW_MAP_KEY: + return true; + case ES_DONE_WITH_FLOW_MAP_KEY: + m_pState->SetError(ErrorMsg::EXPECTED_VALUE_TOKEN); + return true; + case ES_WAITING_FOR_FLOW_MAP_VALUE: + EmitSeparationIfNecessary(); + m_stream << ':'; + m_pState->RequireSoftSeparation(); + m_pState->SwitchState(ES_WRITING_FLOW_MAP_VALUE); + return true; + case ES_WRITING_FLOW_MAP_VALUE: + return true; + case ES_DONE_WITH_FLOW_MAP_VALUE: + m_pState->SetError(ErrorMsg::EXPECTED_KEY_TOKEN); + return true; + default: + assert(false); + } + + assert(false); + return true; + } + + // PreAtomicWrite + // . Depending on the emitter state, write to the stream to get it + // in position to do an atomic write (e.g., scalar, sequence, or map) + void Emitter::PreAtomicWrite() + { + if(!good()) + return; + + while(!GotoNextPreAtomicState()) + ; + } + + // PostAtomicWrite + // . Clean up + void Emitter::PostAtomicWrite() + { + if(!good()) + return; + + EMITTER_STATE curState = m_pState->GetCurState(); + switch(curState) { + // document-level + case ES_WRITING_DOC: + m_pState->SwitchState(ES_DONE_WITH_DOC); + break; + + // block seq + case ES_WRITING_BLOCK_SEQ_ENTRY: + m_pState->SwitchState(ES_DONE_WITH_BLOCK_SEQ_ENTRY); + break; + + // flow seq + case ES_WRITING_FLOW_SEQ_ENTRY: + m_pState->SwitchState(ES_DONE_WITH_FLOW_SEQ_ENTRY); + break; + + // block map + case ES_WRITING_BLOCK_MAP_KEY: + if(!m_pState->CurrentlyInLongKey()) { + m_stream << ':'; + m_pState->RequireSoftSeparation(); + } + m_pState->SwitchState(ES_DONE_WITH_BLOCK_MAP_KEY); + break; + case ES_WRITING_BLOCK_MAP_VALUE: + m_pState->SwitchState(ES_DONE_WITH_BLOCK_MAP_VALUE); + break; + + // flow map + case ES_WRITING_FLOW_MAP_KEY: + m_pState->SwitchState(ES_DONE_WITH_FLOW_MAP_KEY); + break; + case ES_WRITING_FLOW_MAP_VALUE: + m_pState->SwitchState(ES_DONE_WITH_FLOW_MAP_VALUE); + break; + default: + assert(false); + }; + + m_pState->ClearModifiedSettings(); + } + + // EmitSeparationIfNecessary + void Emitter::EmitSeparationIfNecessary() + { + if(!good()) + return; + + if(m_pState->RequiresSoftSeparation()) + m_stream << ' '; + else if(m_pState->RequiresHardSeparation()) + m_stream << '\n'; + m_pState->UnsetSeparation(); + } + + // EmitBeginDoc + void Emitter::EmitBeginDoc() + { + if(!good()) + return; + + EMITTER_STATE curState = m_pState->GetCurState(); + if(curState != ES_WAITING_FOR_DOC && curState != ES_WRITING_DOC && curState != ES_DONE_WITH_DOC) { + m_pState->SetError("Unexpected begin document"); + return; + } + + if(curState == ES_WRITING_DOC || curState == ES_DONE_WITH_DOC) + m_stream << '\n'; + m_stream << "---\n"; + + m_pState->UnsetSeparation(); + m_pState->SwitchState(ES_WAITING_FOR_DOC); + } + + // EmitEndDoc + void Emitter::EmitEndDoc() + { + if(!good()) + return; + + + EMITTER_STATE curState = m_pState->GetCurState(); + if(curState != ES_WAITING_FOR_DOC && curState != ES_WRITING_DOC && curState != ES_DONE_WITH_DOC) { + m_pState->SetError("Unexpected end document"); + return; + } + + if(curState == ES_WRITING_DOC || curState == ES_DONE_WITH_DOC) + m_stream << '\n'; + m_stream << "...\n"; + + m_pState->UnsetSeparation(); + m_pState->SwitchState(ES_WAITING_FOR_DOC); + } + + // EmitBeginSeq + void Emitter::EmitBeginSeq() + { + if(!good()) + return; + + // must have a long key if we're emitting a sequence + m_pState->StartLongKey(); + + PreAtomicWrite(); + + EMITTER_STATE curState = m_pState->GetCurState(); + EMITTER_MANIP flowType = m_pState->GetFlowType(GT_SEQ); + if(flowType == Block) { + if(curState == ES_WRITING_BLOCK_SEQ_ENTRY || + curState == ES_WRITING_BLOCK_MAP_KEY || curState == ES_WRITING_BLOCK_MAP_VALUE || + curState == ES_WRITING_DOC + ) { + if(m_pState->RequiresHardSeparation() || curState != ES_WRITING_DOC) { + m_stream << "\n"; + m_pState->UnsetSeparation(); + } + } + m_pState->PushState(ES_WAITING_FOR_BLOCK_SEQ_ENTRY); + } else if(flowType == Flow) { + EmitSeparationIfNecessary(); + m_stream << "["; + m_pState->PushState(ES_WAITING_FOR_FLOW_SEQ_ENTRY); + } else + assert(false); + + m_pState->BeginGroup(GT_SEQ); + } + + // EmitEndSeq + void Emitter::EmitEndSeq() + { + if(!good()) + return; + + if(m_pState->GetCurGroupType() != GT_SEQ) + return m_pState->SetError(ErrorMsg::UNEXPECTED_END_SEQ); + + EMITTER_STATE curState = m_pState->GetCurState(); + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + if(flowType == FT_BLOCK) { + // Note: block sequences are *not* allowed to be empty, but we convert it + // to a flow sequence if it is + assert(curState == ES_DONE_WITH_BLOCK_SEQ_ENTRY || curState == ES_WAITING_FOR_BLOCK_SEQ_ENTRY); + if(curState == ES_WAITING_FOR_BLOCK_SEQ_ENTRY) { + // Note: only one of these will actually output anything for a given situation + EmitSeparationIfNecessary(); + unsigned curIndent = m_pState->GetCurIndent(); + m_stream << IndentTo(curIndent); + + m_stream << "[]"; + } + } else if(flowType == FT_FLOW) { + // Note: flow sequences are allowed to be empty + assert(curState == ES_DONE_WITH_FLOW_SEQ_ENTRY || curState == ES_WAITING_FOR_FLOW_SEQ_ENTRY); + m_stream << "]"; + } else + assert(false); + + m_pState->PopState(); + m_pState->EndGroup(GT_SEQ); + + PostAtomicWrite(); + } + + // EmitBeginMap + void Emitter::EmitBeginMap() + { + if(!good()) + return; + + // must have a long key if we're emitting a map + m_pState->StartLongKey(); + + PreAtomicWrite(); + + EMITTER_STATE curState = m_pState->GetCurState(); + EMITTER_MANIP flowType = m_pState->GetFlowType(GT_MAP); + if(flowType == Block) { + if(curState == ES_WRITING_BLOCK_SEQ_ENTRY || + curState == ES_WRITING_BLOCK_MAP_KEY || curState == ES_WRITING_BLOCK_MAP_VALUE || + curState == ES_WRITING_DOC + ) { + if(m_pState->RequiresHardSeparation() || (curState != ES_WRITING_DOC && curState != ES_WRITING_BLOCK_SEQ_ENTRY)) { + m_stream << "\n"; + m_pState->UnsetSeparation(); + } + } + m_pState->PushState(ES_WAITING_FOR_BLOCK_MAP_ENTRY); + } else if(flowType == Flow) { + EmitSeparationIfNecessary(); + m_stream << "{"; + m_pState->PushState(ES_WAITING_FOR_FLOW_MAP_ENTRY); + } else + assert(false); + + m_pState->BeginGroup(GT_MAP); + } + + // EmitEndMap + void Emitter::EmitEndMap() + { + if(!good()) + return; + + if(m_pState->GetCurGroupType() != GT_MAP) + return m_pState->SetError(ErrorMsg::UNEXPECTED_END_MAP); + + EMITTER_STATE curState = m_pState->GetCurState(); + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + if(flowType == FT_BLOCK) { + // Note: block sequences are *not* allowed to be empty, but we convert it + // to a flow sequence if it is + assert(curState == ES_DONE_WITH_BLOCK_MAP_VALUE || curState == ES_WAITING_FOR_BLOCK_MAP_ENTRY); + if(curState == ES_WAITING_FOR_BLOCK_MAP_ENTRY) { + // Note: only one of these will actually output anything for a given situation + EmitSeparationIfNecessary(); + unsigned curIndent = m_pState->GetCurIndent(); + m_stream << IndentTo(curIndent); + m_stream << "{}"; + } + } else if(flowType == FT_FLOW) { + // Note: flow maps are allowed to be empty + assert(curState == ES_DONE_WITH_FLOW_MAP_VALUE || curState == ES_WAITING_FOR_FLOW_MAP_ENTRY); + EmitSeparationIfNecessary(); + m_stream << "}"; + } else + assert(false); + + m_pState->PopState(); + m_pState->EndGroup(GT_MAP); + + PostAtomicWrite(); + } + + // EmitKey + void Emitter::EmitKey() + { + if(!good()) + return; + + EMITTER_STATE curState = m_pState->GetCurState(); + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + if(curState != ES_WAITING_FOR_BLOCK_MAP_ENTRY && curState != ES_DONE_WITH_BLOCK_MAP_VALUE + && curState != ES_WAITING_FOR_FLOW_MAP_ENTRY && curState != ES_DONE_WITH_FLOW_MAP_VALUE) + return m_pState->SetError(ErrorMsg::UNEXPECTED_KEY_TOKEN); + + if(flowType == FT_BLOCK) { + if(curState == ES_DONE_WITH_BLOCK_MAP_VALUE) + m_stream << '\n'; + unsigned curIndent = m_pState->GetCurIndent(); + m_stream << IndentTo(curIndent); + m_pState->UnsetSeparation(); + m_pState->SwitchState(ES_WAITING_FOR_BLOCK_MAP_KEY); + } else if(flowType == FT_FLOW) { + EmitSeparationIfNecessary(); + if(curState == ES_DONE_WITH_FLOW_MAP_VALUE) { + m_stream << ','; + m_pState->RequireSoftSeparation(); + } + m_pState->SwitchState(ES_WAITING_FOR_FLOW_MAP_KEY); + } else + assert(false); + + if(m_pState->GetMapKeyFormat() == LongKey) + m_pState->StartLongKey(); + else if(m_pState->GetMapKeyFormat() == Auto) + m_pState->StartSimpleKey(); + else + assert(false); + } + + // EmitValue + void Emitter::EmitValue() + { + if(!good()) + return; + + EMITTER_STATE curState = m_pState->GetCurState(); + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + if(curState != ES_DONE_WITH_BLOCK_MAP_KEY && curState != ES_DONE_WITH_FLOW_MAP_KEY) + return m_pState->SetError(ErrorMsg::UNEXPECTED_VALUE_TOKEN); + + if(flowType == FT_BLOCK) { + if(m_pState->CurrentlyInLongKey()) { + m_stream << '\n'; + m_stream << IndentTo(m_pState->GetCurIndent()); + m_stream << ':'; + m_pState->RequireSoftSeparation(); + } + m_pState->SwitchState(ES_WAITING_FOR_BLOCK_MAP_VALUE); + } else if(flowType == FT_FLOW) { + m_pState->SwitchState(ES_WAITING_FOR_FLOW_MAP_VALUE); + } else + assert(false); + } + + // EmitNewline + void Emitter::EmitNewline() + { + if(!good()) + return; + + if(CanEmitNewline()) { + m_stream << '\n'; + m_pState->UnsetSeparation(); + } + } + + bool Emitter::CanEmitNewline() const + { + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + if(flowType == FT_BLOCK && m_pState->CurrentlyInLongKey()) + return true; + + EMITTER_STATE curState = m_pState->GetCurState(); + return curState != ES_DONE_WITH_BLOCK_MAP_KEY && curState != ES_WAITING_FOR_BLOCK_MAP_VALUE && curState != ES_WRITING_BLOCK_MAP_VALUE; + } + + // ******************************************************************************************* + // overloads of Write + + Emitter& Emitter::Write(const std::string& str) + { + if(!good()) + return *this; + + // literal scalars must use long keys + if(m_pState->GetStringFormat() == Literal && m_pState->GetCurGroupFlowType() != FT_FLOW) + m_pState->StartLongKey(); + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + + bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii; + EMITTER_MANIP strFmt = m_pState->GetStringFormat(); + FLOW_TYPE flowType = m_pState->GetCurGroupFlowType(); + unsigned curIndent = m_pState->GetCurIndent(); + + switch(strFmt) { + case Auto: + Utils::WriteString(m_stream, str, flowType == FT_FLOW, escapeNonAscii); + break; + case SingleQuoted: + if(!Utils::WriteSingleQuotedString(m_stream, str)) { + m_pState->SetError(ErrorMsg::SINGLE_QUOTED_CHAR); + return *this; + } + break; + case DoubleQuoted: + Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii); + break; + case Literal: + if(flowType == FT_FLOW) + Utils::WriteString(m_stream, str, flowType == FT_FLOW, escapeNonAscii); + else + Utils::WriteLiteralString(m_stream, str, curIndent + m_pState->GetIndent()); + break; + default: + assert(false); + } + + PostAtomicWrite(); + return *this; + } + + void Emitter::PreWriteIntegralType(std::stringstream& str) + { + PreAtomicWrite(); + EmitSeparationIfNecessary(); + + EMITTER_MANIP intFmt = m_pState->GetIntFormat(); + switch(intFmt) { + case Dec: + str << std::dec; + break; + case Hex: + str << "0x"; + str << std::hex; + break; + case Oct: + str << "0"; + str << std::oct; + break; + default: + assert(false); + } + } + + void Emitter::PreWriteStreamable(std::stringstream&) + { + PreAtomicWrite(); + EmitSeparationIfNecessary(); + } + + unsigned Emitter::GetFloatPrecision() const + { + return m_pState->GetFloatPrecision(); + } + + unsigned Emitter::GetDoublePrecision() const + { + return m_pState->GetDoublePrecision(); + } + + void Emitter::PostWriteIntegralType(const std::stringstream& str) + { + m_stream << str.str(); + PostAtomicWrite(); + } + + void Emitter::PostWriteStreamable(const std::stringstream& str) + { + m_stream << str.str(); + PostAtomicWrite(); + } + + const char *Emitter::ComputeFullBoolName(bool b) const + { + const EMITTER_MANIP mainFmt = (m_pState->GetBoolLengthFormat() == ShortBool ? YesNoBool : m_pState->GetBoolFormat()); + const EMITTER_MANIP caseFmt = m_pState->GetBoolCaseFormat(); + switch(mainFmt) { + case YesNoBool: + switch(caseFmt) { + case UpperCase: return b ? "YES" : "NO"; + case CamelCase: return b ? "Yes" : "No"; + case LowerCase: return b ? "yes" : "no"; + default: break; + } + break; + case OnOffBool: + switch(caseFmt) { + case UpperCase: return b ? "ON" : "OFF"; + case CamelCase: return b ? "On" : "Off"; + case LowerCase: return b ? "on" : "off"; + default: break; + } + break; + case TrueFalseBool: + switch(caseFmt) { + case UpperCase: return b ? "TRUE" : "FALSE"; + case CamelCase: return b ? "True" : "False"; + case LowerCase: return b ? "true" : "false"; + default: break; + } + break; + default: + break; + } + return b ? "y" : "n"; // should never get here, but it can't hurt to give these answers + } + + Emitter& Emitter::Write(bool b) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + + const char *name = ComputeFullBoolName(b); + if(m_pState->GetBoolLengthFormat() == ShortBool) + m_stream << name[0]; + else + m_stream << name; + + PostAtomicWrite(); + return *this; + } + + Emitter& Emitter::Write(char ch) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + + Utils::WriteChar(m_stream, ch); + + PostAtomicWrite(); + return *this; + } + + Emitter& Emitter::Write(const _Alias& alias) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + if(!Utils::WriteAlias(m_stream, alias.content)) { + m_pState->SetError(ErrorMsg::INVALID_ALIAS); + return *this; + } + PostAtomicWrite(); + return *this; + } + + Emitter& Emitter::Write(const _Anchor& anchor) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + if(!Utils::WriteAnchor(m_stream, anchor.content)) { + m_pState->SetError(ErrorMsg::INVALID_ANCHOR); + return *this; + } + m_pState->RequireHardSeparation(); + // Note: no PostAtomicWrite() because we need another value for this node + return *this; + } + + Emitter& Emitter::Write(const _Tag& tag) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + + bool success = false; + if(tag.type == _Tag::Type::Verbatim) + success = Utils::WriteTag(m_stream, tag.content, true); + else if(tag.type == _Tag::Type::PrimaryHandle) + success = Utils::WriteTag(m_stream, tag.content, false); + else + success = Utils::WriteTagWithPrefix(m_stream, tag.prefix, tag.content); + + if(!success) { + m_pState->SetError(ErrorMsg::INVALID_TAG); + return *this; + } + + m_pState->RequireHardSeparation(); + // Note: no PostAtomicWrite() because we need another value for this node + return *this; + } + + void Emitter::EmitKindTag() + { + Write(LocalTag("")); + } + + Emitter& Emitter::Write(const _Comment& comment) + { + if(!good()) + return *this; + + if(m_stream.col() > 0) + m_stream << Indentation(m_pState->GetPreCommentIndent()); + Utils::WriteComment(m_stream, comment.content, m_pState->GetPostCommentIndent()); + m_pState->RequireHardSeparation(); + m_pState->ForceHardSeparation(); + + return *this; + } + + Emitter& Emitter::Write(const _Null& /*null*/) + { + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + m_stream << "~"; + PostAtomicWrite(); + return *this; + } + + Emitter& Emitter::Write(const Binary& binary) + { + Write(SecondaryTag("binary")); + + if(!good()) + return *this; + + PreAtomicWrite(); + EmitSeparationIfNecessary(); + Utils::WriteBinary(m_stream, binary); + PostAtomicWrite(); + return *this; + } +} + diff --git a/yaml-cpp/src/emitterstate.cpp b/yaml-cpp/src/emitterstate.cpp new file mode 100644 index 0000000..562e82c --- /dev/null +++ b/yaml-cpp/src/emitterstate.cpp @@ -0,0 +1,284 @@ +#include "emitterstate.h" +#include "yaml-cpp/exceptions.h" +#include + +namespace YAML +{ + EmitterState::EmitterState(): m_isGood(true), m_curIndent(0), m_requiresSoftSeparation(false), m_requiresHardSeparation(false) + { + // start up + m_stateStack.push(ES_WAITING_FOR_DOC); + + // set default global manipulators + m_charset.set(EmitNonAscii); + m_strFmt.set(Auto); + m_boolFmt.set(TrueFalseBool); + m_boolLengthFmt.set(LongBool); + m_boolCaseFmt.set(LowerCase); + m_intFmt.set(Dec); + m_indent.set(2); + m_preCommentIndent.set(2); + m_postCommentIndent.set(1); + m_seqFmt.set(Block); + m_mapFmt.set(Block); + m_mapKeyFmt.set(Auto); + m_floatPrecision.set(6); + m_doublePrecision.set(15); + } + + EmitterState::~EmitterState() + { + } + + // SetLocalValue + // . We blindly tries to set all possible formatters to this value + // . Only the ones that make sense will be accepted + void EmitterState::SetLocalValue(EMITTER_MANIP value) + { + SetOutputCharset(value, LOCAL); + SetStringFormat(value, LOCAL); + SetBoolFormat(value, LOCAL); + SetBoolCaseFormat(value, LOCAL); + SetBoolLengthFormat(value, LOCAL); + SetIntFormat(value, LOCAL); + SetFlowType(GT_SEQ, value, LOCAL); + SetFlowType(GT_MAP, value, LOCAL); + SetMapKeyFormat(value, LOCAL); + } + + void EmitterState::BeginGroup(GROUP_TYPE type) + { + unsigned lastIndent = (m_groups.empty() ? 0 : m_groups.top().indent); + m_curIndent += lastIndent; + + std::auto_ptr pGroup(new Group(type)); + + // transfer settings (which last until this group is done) + pGroup->modifiedSettings = m_modifiedSettings; + + // set up group + pGroup->flow = GetFlowType(type); + pGroup->indent = GetIndent(); + pGroup->usingLongKey = (GetMapKeyFormat() == LongKey ? true : false); + + m_groups.push(pGroup); + } + + void EmitterState::EndGroup(GROUP_TYPE type) + { + if(m_groups.empty()) + return SetError(ErrorMsg::UNMATCHED_GROUP_TAG); + + // get rid of the current group + { + std::auto_ptr pFinishedGroup = m_groups.pop(); + if(pFinishedGroup->type != type) + return SetError(ErrorMsg::UNMATCHED_GROUP_TAG); + } + + // reset old settings + unsigned lastIndent = (m_groups.empty() ? 0 : m_groups.top().indent); + assert(m_curIndent >= lastIndent); + m_curIndent -= lastIndent; + + // some global settings that we changed may have been overridden + // by a local setting we just popped, so we need to restore them + m_globalModifiedSettings.restore(); + } + + GROUP_TYPE EmitterState::GetCurGroupType() const + { + if(m_groups.empty()) + return GT_NONE; + + return m_groups.top().type; + } + + FLOW_TYPE EmitterState::GetCurGroupFlowType() const + { + if(m_groups.empty()) + return FT_NONE; + + return (m_groups.top().flow == Flow ? FT_FLOW : FT_BLOCK); + } + + bool EmitterState::CurrentlyInLongKey() + { + if(m_groups.empty()) + return false; + return m_groups.top().usingLongKey; + } + + void EmitterState::StartLongKey() + { + if(!m_groups.empty()) + m_groups.top().usingLongKey = true; + } + + void EmitterState::StartSimpleKey() + { + if(!m_groups.empty()) + m_groups.top().usingLongKey = false; + } + + void EmitterState::ClearModifiedSettings() + { + m_modifiedSettings.clear(); + } + + bool EmitterState::SetOutputCharset(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case EmitNonAscii: + case EscapeNonAscii: + _Set(m_charset, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case Auto: + case SingleQuoted: + case DoubleQuoted: + case Literal: + _Set(m_strFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetBoolFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case OnOffBool: + case TrueFalseBool: + case YesNoBool: + _Set(m_boolFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetBoolLengthFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case LongBool: + case ShortBool: + _Set(m_boolLengthFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetBoolCaseFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case UpperCase: + case LowerCase: + case CamelCase: + _Set(m_boolCaseFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetIntFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case Dec: + case Hex: + case Oct: + _Set(m_intFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetIndent(unsigned value, FMT_SCOPE scope) + { + if(value == 0) + return false; + + _Set(m_indent, value, scope); + return true; + } + + bool EmitterState::SetPreCommentIndent(unsigned value, FMT_SCOPE scope) + { + if(value == 0) + return false; + + _Set(m_preCommentIndent, value, scope); + return true; + } + + bool EmitterState::SetPostCommentIndent(unsigned value, FMT_SCOPE scope) + { + if(value == 0) + return false; + + _Set(m_postCommentIndent, value, scope); + return true; + } + + bool EmitterState::SetFlowType(GROUP_TYPE groupType, EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case Block: + case Flow: + _Set(groupType == GT_SEQ ? m_seqFmt : m_mapFmt, value, scope); + return true; + default: + return false; + } + } + + EMITTER_MANIP EmitterState::GetFlowType(GROUP_TYPE groupType) const + { + // force flow style if we're currently in a flow + FLOW_TYPE flowType = GetCurGroupFlowType(); + if(flowType == FT_FLOW) + return Flow; + + // otherwise, go with what's asked of use + return (groupType == GT_SEQ ? m_seqFmt.get() : m_mapFmt.get()); + } + + bool EmitterState::SetMapKeyFormat(EMITTER_MANIP value, FMT_SCOPE scope) + { + switch(value) { + case Auto: + case LongKey: + _Set(m_mapKeyFmt, value, scope); + return true; + default: + return false; + } + } + + bool EmitterState::SetFloatPrecision(int value, FMT_SCOPE scope) + { + if(value < 0 || value > std::numeric_limits::digits10) + return false; + _Set(m_floatPrecision, value, scope); + return true; + } + + bool EmitterState::SetDoublePrecision(int value, FMT_SCOPE scope) + { + if(value < 0 || value > std::numeric_limits::digits10) + return false; + _Set(m_doublePrecision, value, scope); + return true; + } +} + diff --git a/yaml-cpp/src/emitterstate.h b/yaml-cpp/src/emitterstate.h new file mode 100644 index 0000000..5698e32 --- /dev/null +++ b/yaml-cpp/src/emitterstate.h @@ -0,0 +1,217 @@ +#ifndef EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "ptr_stack.h" +#include "setting.h" +#include "yaml-cpp/emittermanip.h" +#include +#include +#include +#include + +namespace YAML +{ + enum FMT_SCOPE { + LOCAL, + GLOBAL + }; + + enum GROUP_TYPE { + GT_NONE, + GT_SEQ, + GT_MAP + }; + + enum FLOW_TYPE { + FT_NONE, + FT_FLOW, + FT_BLOCK + }; + + enum NODE_STATE { + NS_START, + NS_READY_FOR_ATOM, + NS_END + }; + + enum EMITTER_STATE { + ES_WAITING_FOR_DOC, + ES_WRITING_DOC, + ES_DONE_WITH_DOC, + + // block seq + ES_WAITING_FOR_BLOCK_SEQ_ENTRY, + ES_WRITING_BLOCK_SEQ_ENTRY, + ES_DONE_WITH_BLOCK_SEQ_ENTRY, + + // flow seq + ES_WAITING_FOR_FLOW_SEQ_ENTRY, + ES_WRITING_FLOW_SEQ_ENTRY, + ES_DONE_WITH_FLOW_SEQ_ENTRY, + + // block map + ES_WAITING_FOR_BLOCK_MAP_ENTRY, + ES_WAITING_FOR_BLOCK_MAP_KEY, + ES_WRITING_BLOCK_MAP_KEY, + ES_DONE_WITH_BLOCK_MAP_KEY, + ES_WAITING_FOR_BLOCK_MAP_VALUE, + ES_WRITING_BLOCK_MAP_VALUE, + ES_DONE_WITH_BLOCK_MAP_VALUE, + + // flow map + ES_WAITING_FOR_FLOW_MAP_ENTRY, + ES_WAITING_FOR_FLOW_MAP_KEY, + ES_WRITING_FLOW_MAP_KEY, + ES_DONE_WITH_FLOW_MAP_KEY, + ES_WAITING_FOR_FLOW_MAP_VALUE, + ES_WRITING_FLOW_MAP_VALUE, + ES_DONE_WITH_FLOW_MAP_VALUE + }; + + class EmitterState + { + public: + EmitterState(); + ~EmitterState(); + + // basic state checking + bool good() const { return m_isGood; } + const std::string GetLastError() const { return m_lastError; } + void SetError(const std::string& error) { m_isGood = false; m_lastError = error; } + + // main state of the machine + EMITTER_STATE GetCurState() const { return m_stateStack.top(); } + void SwitchState(EMITTER_STATE state) { PopState(); PushState(state); } + void PushState(EMITTER_STATE state) { m_stateStack.push(state); } + void PopState() { m_stateStack.pop(); } + + void SetLocalValue(EMITTER_MANIP value); + + // group handling + void BeginGroup(GROUP_TYPE type); + void EndGroup(GROUP_TYPE type); + + GROUP_TYPE GetCurGroupType() const; + FLOW_TYPE GetCurGroupFlowType() const; + int GetCurIndent() const { return m_curIndent; } + + bool CurrentlyInLongKey(); + void StartLongKey(); + void StartSimpleKey(); + + bool RequiresSoftSeparation() const { return m_requiresSoftSeparation; } + bool RequiresHardSeparation() const { return m_requiresHardSeparation; } + void RequireSoftSeparation() { m_requiresSoftSeparation = true; } + void RequireHardSeparation() { m_requiresSoftSeparation = true; m_requiresHardSeparation = true; } + void ForceHardSeparation() { m_requiresSoftSeparation = false; } + void UnsetSeparation() { m_requiresSoftSeparation = false; m_requiresHardSeparation = false; } + + void ClearModifiedSettings(); + + // formatters + bool SetOutputCharset(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetOutputCharset() const { return m_charset.get(); } + + bool SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetStringFormat() const { return m_strFmt.get(); } + + bool SetBoolFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetBoolFormat() const { return m_boolFmt.get(); } + + bool SetBoolLengthFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetBoolLengthFormat() const { return m_boolLengthFmt.get(); } + + bool SetBoolCaseFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetBoolCaseFormat() const { return m_boolCaseFmt.get(); } + + bool SetIntFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetIntFormat() const { return m_intFmt.get(); } + + bool SetIndent(unsigned value, FMT_SCOPE scope); + int GetIndent() const { return m_indent.get(); } + + bool SetPreCommentIndent(unsigned value, FMT_SCOPE scope); + int GetPreCommentIndent() const { return m_preCommentIndent.get(); } + bool SetPostCommentIndent(unsigned value, FMT_SCOPE scope); + int GetPostCommentIndent() const { return m_postCommentIndent.get(); } + + bool SetFlowType(GROUP_TYPE groupType, EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetFlowType(GROUP_TYPE groupType) const; + + bool SetMapKeyFormat(EMITTER_MANIP value, FMT_SCOPE scope); + EMITTER_MANIP GetMapKeyFormat() const { return m_mapKeyFmt.get(); } + + bool SetFloatPrecision(int value, FMT_SCOPE scope); + unsigned GetFloatPrecision() const { return m_floatPrecision.get(); } + bool SetDoublePrecision(int value, FMT_SCOPE scope); + unsigned GetDoublePrecision() const { return m_doublePrecision.get(); } + + private: + template + void _Set(Setting& fmt, T value, FMT_SCOPE scope); + + private: + // basic state ok? + bool m_isGood; + std::string m_lastError; + + // other state + std::stack m_stateStack; + + Setting m_charset; + Setting m_strFmt; + Setting m_boolFmt; + Setting m_boolLengthFmt; + Setting m_boolCaseFmt; + Setting m_intFmt; + Setting m_indent; + Setting m_preCommentIndent, m_postCommentIndent; + Setting m_seqFmt; + Setting m_mapFmt; + Setting m_mapKeyFmt; + Setting m_floatPrecision; + Setting m_doublePrecision; + + SettingChanges m_modifiedSettings; + SettingChanges m_globalModifiedSettings; + + struct Group { + Group(GROUP_TYPE type_): type(type_), usingLongKey(false), indent(0) {} + + GROUP_TYPE type; + EMITTER_MANIP flow; + bool usingLongKey; + int indent; + + SettingChanges modifiedSettings; + }; + + ptr_stack m_groups; + unsigned m_curIndent; + bool m_requiresSoftSeparation; + bool m_requiresHardSeparation; + }; + + template + void EmitterState::_Set(Setting& fmt, T value, FMT_SCOPE scope) { + switch(scope) { + case LOCAL: + m_modifiedSettings.push(fmt.set(value)); + break; + case GLOBAL: + fmt.set(value); + m_globalModifiedSettings.push(fmt.set(value)); // this pushes an identity set, so when we restore, + // it restores to the value here, and not the previous one + break; + default: + assert(false); + } + } +} + +#endif // EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/emitterutils.cpp b/yaml-cpp/src/emitterutils.cpp new file mode 100644 index 0000000..3d184d6 --- /dev/null +++ b/yaml-cpp/src/emitterutils.cpp @@ -0,0 +1,378 @@ +#include "emitterutils.h" +#include "exp.h" +#include "indentation.h" +#include "yaml-cpp/binary.h" +#include "yaml-cpp/exceptions.h" +#include "stringsource.h" +#include +#include + +namespace YAML +{ + namespace Utils + { + namespace { + enum {REPLACEMENT_CHARACTER = 0xFFFD}; + + bool IsAnchorChar(int ch) { // test for ns-anchor-char + switch (ch) { + case ',': case '[': case ']': case '{': case '}': // c-flow-indicator + case ' ': case '\t': // s-white + case 0xFEFF: // c-byte-order-mark + case 0xA: case 0xD: // b-char + return false; + case 0x85: + return true; + } + + if (ch < 0x20) + return false; + + if (ch < 0x7E) + return true; + + if (ch < 0xA0) + return false; + if (ch >= 0xD800 && ch <= 0xDFFF) + return false; + if ((ch & 0xFFFE) == 0xFFFE) + return false; + if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) + return false; + if (ch > 0x10FFFF) + return false; + + return true; + } + + int Utf8BytesIndicated(char ch) { + int byteVal = static_cast(ch); + switch (byteVal >> 4) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + return 1; + case 12: case 13: + return 2; + case 14: + return 3; + case 15: + return 4; + default: + return -1; + } + } + + bool IsTrailingByte(char ch) { + return (ch & 0xC0) == 0x80; + } + + bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) { + if (first == last) + return false; + + int nBytes = Utf8BytesIndicated(*first); + if (nBytes < 1) { + // Bad lead byte + ++first; + codePoint = REPLACEMENT_CHARACTER; + return true; + } + + if (nBytes == 1) { + codePoint = *first++; + return true; + } + + // Gather bits from trailing bytes + codePoint = static_cast(*first) & ~(0xFF << (7 - nBytes)); + ++first; + --nBytes; + for (; nBytes > 0; ++first, --nBytes) { + if ((first == last) || !IsTrailingByte(*first)) { + codePoint = REPLACEMENT_CHARACTER; + break; + } + codePoint <<= 6; + codePoint |= *first & 0x3F; + } + + // Check for illegal code points + if (codePoint > 0x10FFFF) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xD800 && codePoint <= 0xDFFF) + codePoint = REPLACEMENT_CHARACTER; + else if ((codePoint & 0xFFFE) == 0xFFFE) + codePoint = REPLACEMENT_CHARACTER; + else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF) + codePoint = REPLACEMENT_CHARACTER; + return true; + } + + void WriteCodePoint(ostream& out, int codePoint) { + if (codePoint < 0 || codePoint > 0x10FFFF) { + codePoint = REPLACEMENT_CHARACTER; + } + if (codePoint < 0x7F) { + out << static_cast(codePoint); + } else if (codePoint < 0x7FF) { + out << static_cast(0xC0 | (codePoint >> 6)) + << static_cast(0x80 | (codePoint & 0x3F)); + } else if (codePoint < 0xFFFF) { + out << static_cast(0xE0 | (codePoint >> 12)) + << static_cast(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast(0x80 | (codePoint & 0x3F)); + } else { + out << static_cast(0xF0 | (codePoint >> 18)) + << static_cast(0x80 | ((codePoint >> 12) & 0x3F)) + << static_cast(0x80 | ((codePoint >> 6) & 0x3F)) + << static_cast(0x80 | (codePoint & 0x3F)); + } + } + + bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) { + if(str.empty()) + return false; + + // first check the start + const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar()); + if(!start.Matches(str)) + return false; + + // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar) + if(!str.empty() && *str.rbegin() == ' ') + return false; + + // then check until something is disallowed + const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar()) + || (Exp::BlankOrBreak() + Exp::Comment()) + || Exp::NotPrintable() + || Exp::Utf8_ByteOrderMark() + || Exp::Break() + || Exp::Tab(); + StringCharSource buffer(str.c_str(), str.size()); + while(buffer) { + if(disallowed.Matches(buffer)) + return false; + if(allowOnlyAscii && (0x7F < static_cast(buffer[0]))) + return false; + ++buffer; + } + + return true; + } + + void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) { + static const char hexDigits[] = "0123456789abcdef"; + + char escSeq[] = "\\U00000000"; + int digits = 8; + if (codePoint < 0xFF) { + escSeq[1] = 'x'; + digits = 2; + } else if (codePoint < 0xFFFF) { + escSeq[1] = 'u'; + digits = 4; + } + + // Write digits into the escape sequence + int i = 2; + for (; digits > 0; --digits, ++i) { + escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF]; + } + + escSeq[i] = 0; // terminate with NUL character + out << escSeq; + } + + bool WriteAliasName(ostream& out, const std::string& str) { + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (!IsAnchorChar(codePoint)) + return false; + + WriteCodePoint(out, codePoint); + } + return true; + } + } + + bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii) + { + if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) { + out << str; + return true; + } else + return WriteDoubleQuotedString(out, str, escapeNonAscii); + } + + bool WriteSingleQuotedString(ostream& out, const std::string& str) + { + out << "'"; + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\n') + return false; // We can't handle a new line and the attendant indentation yet + + if (codePoint == '\'') + out << "''"; + else + WriteCodePoint(out, codePoint); + } + out << "'"; + return true; + } + + bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii) + { + out << "\""; + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\"') + out << "\\\""; + else if (codePoint == '\\') + out << "\\\\"; + else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space + WriteDoubleQuoteEscapeSequence(out, codePoint); + else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2) + WriteDoubleQuoteEscapeSequence(out, codePoint); + else if (escapeNonAscii && codePoint > 0x7E) + WriteDoubleQuoteEscapeSequence(out, codePoint); + else + WriteCodePoint(out, codePoint); + } + out << "\""; + return true; + } + + bool WriteLiteralString(ostream& out, const std::string& str, int indent) + { + out << "|\n"; + out << IndentTo(indent); + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if (codePoint == '\n') + out << "\n" << IndentTo(indent); + else + WriteCodePoint(out, codePoint); + } + return true; + } + + bool WriteChar(ostream& out, char ch) + { + if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) + out << ch; + else if((0x20 <= ch && ch <= 0x7e) || ch == ' ') + out << "\"" << ch << "\""; + else if(ch == '\t') + out << "\"\\t\""; + else if(ch == '\n') + out << "\"\\n\""; + else if(ch == '\b') + out << "\"\\b\""; + else { + out << "\""; + WriteDoubleQuoteEscapeSequence(out, ch); + out << "\""; + } + return true; + } + + bool WriteComment(ostream& out, const std::string& str, int postCommentIndent) + { + const unsigned curIndent = out.col(); + out << "#" << Indentation(postCommentIndent); + int codePoint; + for(std::string::const_iterator i = str.begin(); + GetNextCodePointAndAdvance(codePoint, i, str.end()); + ) + { + if(codePoint == '\n') + out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent); + else + WriteCodePoint(out, codePoint); + } + return true; + } + + bool WriteAlias(ostream& out, const std::string& str) + { + out << "*"; + return WriteAliasName(out, str); + } + + bool WriteAnchor(ostream& out, const std::string& str) + { + out << "&"; + return WriteAliasName(out, str); + } + + bool WriteTag(ostream& out, const std::string& str, bool verbatim) + { + out << (verbatim ? "!<" : "!"); + StringCharSource buffer(str.c_str(), str.size()); + const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag(); + while(buffer) { + int n = reValid.Match(buffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << buffer[0]; + ++buffer; + } + } + if (verbatim) + out << ">"; + return true; + } + + bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag) + { + out << "!"; + StringCharSource prefixBuffer(prefix.c_str(), prefix.size()); + while(prefixBuffer) { + int n = Exp::URI().Match(prefixBuffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << prefixBuffer[0]; + ++prefixBuffer; + } + } + + out << "!"; + StringCharSource tagBuffer(tag.c_str(), tag.size()); + while(tagBuffer) { + int n = Exp::Tag().Match(tagBuffer); + if(n <= 0) + return false; + + while(--n >= 0) { + out << tagBuffer[0]; + ++tagBuffer; + } + } + return true; + } + + bool WriteBinary(ostream& out, const Binary& binary) + { + WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false); + return true; + } + } +} + diff --git a/yaml-cpp/src/emitterutils.h b/yaml-cpp/src/emitterutils.h new file mode 100644 index 0000000..0e270d6 --- /dev/null +++ b/yaml-cpp/src/emitterutils.h @@ -0,0 +1,32 @@ +#ifndef EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/ostream.h" +#include + +namespace YAML +{ + class Binary; + + namespace Utils + { + bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii); + bool WriteSingleQuotedString(ostream& out, const std::string& str); + bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii); + bool WriteLiteralString(ostream& out, const std::string& str, int indent); + bool WriteChar(ostream& out, char ch); + bool WriteComment(ostream& out, const std::string& str, int postCommentIndent); + bool WriteAlias(ostream& out, const std::string& str); + bool WriteAnchor(ostream& out, const std::string& str); + bool WriteTag(ostream& out, const std::string& str, bool verbatim); + bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag); + bool WriteBinary(ostream& out, const Binary& binary); + } +} + +#endif // EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/exp.cpp b/yaml-cpp/src/exp.cpp new file mode 100644 index 0000000..7bc5454 --- /dev/null +++ b/yaml-cpp/src/exp.cpp @@ -0,0 +1,113 @@ +#include "exp.h" +#include "yaml-cpp/exceptions.h" +#include + +namespace YAML +{ + namespace Exp + { + unsigned ParseHex(const std::string& str, const Mark& mark) + { + unsigned value = 0; + for(std::size_t i=0;i(ch)); + } + + // Escape + // . Translates the next 'codeLength' characters into a hex number and returns the result. + // . Throws if it's not actually hex. + std::string Escape(Stream& in, int codeLength) + { + // grab string + std::string str; + for(int i=0;i= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { + std::stringstream msg; + msg << ErrorMsg::INVALID_UNICODE << value; + throw ParserException(in.mark(), msg.str()); + } + + // now break it up into chars + if(value <= 0x7F) + return Str(value); + else if(value <= 0x7FF) + return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F)); + else if(value <= 0xFFFF) + return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F)); + else + return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) + + Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F)); + } + + // Escape + // . Escapes the sequence starting 'in' (it must begin with a '\' or single quote) + // and returns the result. + // . Throws if it's an unknown escape character. + std::string Escape(Stream& in) + { + // eat slash + char escape = in.get(); + + // switch on escape character + char ch = in.get(); + + // first do single quote, since it's easier + if(escape == '\'' && ch == '\'') + return "\'"; + + // now do the slash (we're not gonna check if it's a slash - you better pass one!) + switch(ch) { + case '0': return std::string(1, '\x00'); + case 'a': return "\x07"; + case 'b': return "\x08"; + case 't': + case '\t': return "\x09"; + case 'n': return "\x0A"; + case 'v': return "\x0B"; + case 'f': return "\x0C"; + case 'r': return "\x0D"; + case 'e': return "\x1B"; + case ' ': return "\x20"; + case '\"': return "\""; + case '\'': return "\'"; + case '\\': return "\\"; + case '/': return "/"; + case 'N': return "\x85"; + case '_': return "\xA0"; + case 'L': return "\xE2\x80\xA8"; // LS (#x2028) + case 'P': return "\xE2\x80\xA9"; // PS (#x2029) + case 'x': return Escape(in, 2); + case 'u': return Escape(in, 4); + case 'U': return Escape(in, 8); + } + + std::stringstream msg; + throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch); + } + } +} diff --git a/yaml-cpp/src/exp.h b/yaml-cpp/src/exp.h new file mode 100644 index 0000000..3e12aba --- /dev/null +++ b/yaml-cpp/src/exp.h @@ -0,0 +1,196 @@ +#ifndef EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "regex.h" +#include +#include +#include "stream.h" + +namespace YAML +{ + //////////////////////////////////////////////////////////////////////////////// + // Here we store a bunch of expressions for matching different parts of the file. + + namespace Exp + { + // misc + inline const RegEx& Space() { + static const RegEx e = RegEx(' '); + return e; + } + inline const RegEx& Tab() { + static const RegEx e = RegEx('\t'); + return e; + } + inline const RegEx& Blank() { + static const RegEx e = Space() || Tab(); + return e; + } + inline const RegEx& Break() { + static const RegEx e = RegEx('\n') || RegEx("\r\n"); + return e; + } + inline const RegEx& BlankOrBreak() { + static const RegEx e = Blank() || Break(); + return e; + } + inline const RegEx& Digit() { + static const RegEx e = RegEx('0', '9'); + return e; + } + inline const RegEx& Alpha() { + static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z'); + return e; + } + inline const RegEx& AlphaNumeric() { + static const RegEx e = Alpha() || Digit(); + return e; + } + inline const RegEx& Word() { + static const RegEx e = AlphaNumeric() || RegEx('-'); + return e; + } + inline const RegEx& Hex() { + static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f'); + return e; + } + // Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. 5.1) + inline const RegEx& NotPrintable() { + static const RegEx e = RegEx(0) || + RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) || + RegEx(0x0E, 0x1F) || + (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F'))); + return e; + } + inline const RegEx& Utf8_ByteOrderMark() { + static const RegEx e = RegEx("\xEF\xBB\xBF"); + return e; + } + + // actual tags + + inline const RegEx& DocStart() { + static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& DocEnd() { + static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& DocIndicator() { + static const RegEx e = DocStart() || DocEnd(); + return e; + } + inline const RegEx& BlockEntry() { + static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& Key() { + static const RegEx e = RegEx('?'); + return e; + } + inline const RegEx& KeyInFlow() { + static const RegEx e = RegEx('?') + BlankOrBreak(); + return e; + } + inline const RegEx& Value() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& ValueInFlow() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR)); + return e; + } + inline const RegEx& ValueInJSONFlow() { + static const RegEx e = RegEx(':'); + return e; + } + inline const RegEx Comment() { + static const RegEx e = RegEx('#'); + return e; + } + inline const RegEx& Anchor() { + static const RegEx e = !(RegEx("[]{},", REGEX_OR) || BlankOrBreak()); + return e; + } + inline const RegEx& AnchorEnd() { + static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak(); + return e; + } + inline const RegEx& URI() { + static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || (RegEx('%') + Hex() + Hex()); + return e; + } + inline const RegEx& Tag() { + static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || (RegEx('%') + Hex() + Hex()); + return e; + } + + // Plain scalar rules: + // . Cannot start with a blank. + // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` + // . In the block context - ? : must be not be followed with a space. + // . In the flow context ? is illegal and : and - must not be followed with a space. + inline const RegEx& PlainScalar() { + static const RegEx e = !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + (BlankOrBreak() || RegEx()))); + return e; + } + inline const RegEx& PlainScalarInFlow() { + static const RegEx e = !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-:", REGEX_OR) + Blank())); + return e; + } + inline const RegEx& EndScalar() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& EndScalarInFlow() { + static const RegEx e = (RegEx(':') + (BlankOrBreak() || RegEx() || RegEx(",]}", REGEX_OR))) || RegEx(",?[]{}", REGEX_OR); + return e; + } + + inline const RegEx& EscSingleQuote() { + static const RegEx e = RegEx("\'\'"); + return e; + } + inline const RegEx& EscBreak() { + static const RegEx e = RegEx('\\') + Break(); + return e; + } + + inline const RegEx& ChompIndicator() { + static const RegEx e = RegEx("+-", REGEX_OR); + return e; + } + inline const RegEx& Chomp() { + static const RegEx e = (ChompIndicator() + Digit()) || (Digit() + ChompIndicator()) || ChompIndicator() || Digit(); + return e; + } + + // and some functions + std::string Escape(Stream& in); + } + + namespace Keys + { + const char Directive = '%'; + const char FlowSeqStart = '['; + const char FlowSeqEnd = ']'; + const char FlowMapStart = '{'; + const char FlowMapEnd = '}'; + const char FlowEntry = ','; + const char Alias = '*'; + const char Anchor = '&'; + const char Tag = '!'; + const char LiteralScalar = '|'; + const char FoldedScalar = '>'; + const char VerbatimTagStart = '<'; + const char VerbatimTagEnd = '>'; + } +} + +#endif // EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/indentation.h b/yaml-cpp/src/indentation.h new file mode 100644 index 0000000..25f684f --- /dev/null +++ b/yaml-cpp/src/indentation.h @@ -0,0 +1,38 @@ +#ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/ostream.h" +#include + +namespace YAML +{ + struct Indentation { + Indentation(unsigned n_): n(n_) {} + unsigned n; + }; + + inline ostream& operator << (ostream& out, const Indentation& indent) { + for(unsigned i=0;i pData): m_pData(pData) + { + } + + Iterator::Iterator(const Iterator& rhs): m_pData(new IterPriv(*rhs.m_pData)) + { + } + + Iterator& Iterator::operator = (const Iterator& rhs) + { + if(this == &rhs) + return *this; + + m_pData.reset(new IterPriv(*rhs.m_pData)); + return *this; + } + + Iterator::~Iterator() + { + } + + Iterator& Iterator::operator ++ () + { + if(m_pData->type == IterPriv::IT_SEQ) + ++m_pData->seqIter; + else if(m_pData->type == IterPriv::IT_MAP) + ++m_pData->mapIter; + + return *this; + } + + Iterator Iterator::operator ++ (int) + { + Iterator temp = *this; + + if(m_pData->type == IterPriv::IT_SEQ) + ++m_pData->seqIter; + else if(m_pData->type == IterPriv::IT_MAP) + ++m_pData->mapIter; + + return temp; + } + + const Node& Iterator::operator * () const + { + if(m_pData->type == IterPriv::IT_SEQ) + return **m_pData->seqIter; + + throw BadDereference(); + } + + const Node *Iterator::operator -> () const + { + if(m_pData->type == IterPriv::IT_SEQ) + return *m_pData->seqIter; + + throw BadDereference(); + } + + const Node& Iterator::first() const + { + if(m_pData->type == IterPriv::IT_MAP) + return *m_pData->mapIter->first; + + throw BadDereference(); + } + + const Node& Iterator::second() const + { + if(m_pData->type == IterPriv::IT_MAP) + return *m_pData->mapIter->second; + + throw BadDereference(); + } + + bool operator == (const Iterator& it, const Iterator& jt) + { + if(it.m_pData->type != jt.m_pData->type) + return false; + + if(it.m_pData->type == IterPriv::IT_SEQ) + return it.m_pData->seqIter == jt.m_pData->seqIter; + else if(it.m_pData->type == IterPriv::IT_MAP) + return it.m_pData->mapIter == jt.m_pData->mapIter; + + return true; + } + + bool operator != (const Iterator& it, const Iterator& jt) + { + return !(it == jt); + } +} diff --git a/yaml-cpp/src/iterpriv.h b/yaml-cpp/src/iterpriv.h new file mode 100644 index 0000000..c511e8a --- /dev/null +++ b/yaml-cpp/src/iterpriv.h @@ -0,0 +1,33 @@ +#ifndef ITERPRIV_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define ITERPRIV_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/ltnode.h" +#include +#include + +namespace YAML +{ + class Node; + + // IterPriv + // . The implementation for iterators - essentially a union of sequence and map iterators. + struct IterPriv + { + IterPriv(): type(IT_NONE) {} + IterPriv(std::vector ::const_iterator it): type(IT_SEQ), seqIter(it) {} + IterPriv(std::map ::const_iterator it): type(IT_MAP), mapIter(it) {} + + enum ITER_TYPE { IT_NONE, IT_SEQ, IT_MAP }; + ITER_TYPE type; + + std::vector ::const_iterator seqIter; + std::map ::const_iterator mapIter; + }; +} + +#endif // ITERPRIV_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/node.cpp b/yaml-cpp/src/node.cpp new file mode 100644 index 0000000..360b4ad --- /dev/null +++ b/yaml-cpp/src/node.cpp @@ -0,0 +1,269 @@ +#include "yaml-cpp/node.h" +#include "yaml-cpp/aliasmanager.h" +#include "yaml-cpp/emitfromevents.h" +#include "yaml-cpp/emitter.h" +#include "yaml-cpp/eventhandler.h" +#include "iterpriv.h" +#include "nodebuilder.h" +#include "nodeownership.h" +#include "scanner.h" +#include "tag.h" +#include "token.h" +#include +#include + +namespace YAML +{ + bool ltnode::operator()(const Node *pNode1, const Node *pNode2) const { + return *pNode1 < *pNode2; + } + + Node::Node(): m_pOwnership(new NodeOwnership), m_type(NodeType::Null) + { + } + + Node::Node(NodeOwnership& owner): m_pOwnership(new NodeOwnership(&owner)), m_type(NodeType::Null) + { + } + + Node::~Node() + { + Clear(); + } + + void Node::Clear() + { + m_pOwnership.reset(new NodeOwnership); + m_type = NodeType::Null; + m_tag.clear(); + m_scalarData.clear(); + m_seqData.clear(); + m_mapData.clear(); + } + + bool Node::IsAliased() const + { + return m_pOwnership->IsAliased(*this); + } + + Node& Node::CreateNode() + { + return m_pOwnership->Create(); + } + + std::auto_ptr Node::Clone() const + { + std::auto_ptr pNode(new Node); + NodeBuilder nodeBuilder(*pNode); + EmitEvents(nodeBuilder); + return pNode; + } + + void Node::EmitEvents(EventHandler& eventHandler) const + { + eventHandler.OnDocumentStart(m_mark); + AliasManager am; + EmitEvents(am, eventHandler); + eventHandler.OnDocumentEnd(); + } + + void Node::EmitEvents(AliasManager& am, EventHandler& eventHandler) const + { + anchor_t anchor = NullAnchor; + if(IsAliased()) { + anchor = am.LookupAnchor(*this); + if(anchor) { + eventHandler.OnAlias(m_mark, anchor); + return; + } + + am.RegisterReference(*this); + anchor = am.LookupAnchor(*this); + } + + switch(m_type) { + case NodeType::Null: + eventHandler.OnNull(m_mark, anchor); + break; + case NodeType::Scalar: + eventHandler.OnScalar(m_mark, m_tag, anchor, m_scalarData); + break; + case NodeType::Sequence: + eventHandler.OnSequenceStart(m_mark, m_tag, anchor); + for(std::size_t i=0;iEmitEvents(am, eventHandler); + eventHandler.OnSequenceEnd(); + break; + case NodeType::Map: + eventHandler.OnMapStart(m_mark, m_tag, anchor); + for(node_map::const_iterator it=m_mapData.begin();it!=m_mapData.end();++it) { + it->first->EmitEvents(am, eventHandler); + it->second->EmitEvents(am, eventHandler); + } + eventHandler.OnMapEnd(); + break; + } + } + + void Node::Init(NodeType::value type, const Mark& mark, const std::string& tag) + { + Clear(); + m_mark = mark; + m_type = type; + m_tag = tag; + } + + void Node::MarkAsAliased() + { + m_pOwnership->MarkAsAliased(*this); + } + + void Node::SetScalarData(const std::string& data) + { + assert(m_type == NodeType::Scalar); // TODO: throw? + m_scalarData = data; + } + + void Node::Append(Node& node) + { + assert(m_type == NodeType::Sequence); // TODO: throw? + m_seqData.push_back(&node); + } + + void Node::Insert(Node& key, Node& value) + { + assert(m_type == NodeType::Map); // TODO: throw? + m_mapData[&key] = &value; + } + + // begin + // Returns an iterator to the beginning of this (sequence or map). + Iterator Node::begin() const + { + switch(m_type) { + case NodeType::Null: + case NodeType::Scalar: + return Iterator(); + case NodeType::Sequence: + return Iterator(std::auto_ptr(new IterPriv(m_seqData.begin()))); + case NodeType::Map: + return Iterator(std::auto_ptr(new IterPriv(m_mapData.begin()))); + } + + assert(false); + return Iterator(); + } + + // end + // . Returns an iterator to the end of this (sequence or map). + Iterator Node::end() const + { + switch(m_type) { + case NodeType::Null: + case NodeType::Scalar: + return Iterator(); + case NodeType::Sequence: + return Iterator(std::auto_ptr(new IterPriv(m_seqData.end()))); + case NodeType::Map: + return Iterator(std::auto_ptr(new IterPriv(m_mapData.end()))); + } + + assert(false); + return Iterator(); + } + + // size + // . Returns the size of a sequence or map node + // . Otherwise, returns zero. + std::size_t Node::size() const + { + switch(m_type) { + case NodeType::Null: + case NodeType::Scalar: + return 0; + case NodeType::Sequence: + return m_seqData.size(); + case NodeType::Map: + return m_mapData.size(); + } + + assert(false); + return 0; + } + + const Node *Node::FindAtIndex(std::size_t i) const + { + if(m_type == NodeType::Sequence) + return m_seqData[i]; + return 0; + } + + bool Node::GetScalar(std::string& s) const + { + switch(m_type) { + case NodeType::Null: + s = "~"; + return true; + case NodeType::Scalar: + s = m_scalarData; + return true; + case NodeType::Sequence: + case NodeType::Map: + return false; + } + + assert(false); + return false; + } + + Emitter& operator << (Emitter& out, const Node& node) + { + EmitFromEvents emitFromEvents(out); + node.EmitEvents(emitFromEvents); + return out; + } + + int Node::Compare(const Node& rhs) const + { + if(m_type != rhs.m_type) + return rhs.m_type - m_type; + + switch(m_type) { + case NodeType::Null: + return 0; + case NodeType::Scalar: + return m_scalarData.compare(rhs.m_scalarData); + case NodeType::Sequence: + if(m_seqData.size() < rhs.m_seqData.size()) + return 1; + else if(m_seqData.size() > rhs.m_seqData.size()) + return -1; + for(std::size_t i=0;iCompare(*rhs.m_seqData[i])) + return cmp; + return 0; + case NodeType::Map: + if(m_mapData.size() < rhs.m_mapData.size()) + return 1; + else if(m_mapData.size() > rhs.m_mapData.size()) + return -1; + node_map::const_iterator it = m_mapData.begin(); + node_map::const_iterator jt = rhs.m_mapData.begin(); + for(;it!=m_mapData.end() && jt!=rhs.m_mapData.end();it++, jt++) { + if(int cmp = it->first->Compare(*jt->first)) + return cmp; + if(int cmp = it->second->Compare(*jt->second)) + return cmp; + } + return 0; + } + + assert(false); + return 0; + } + + bool operator < (const Node& n1, const Node& n2) + { + return n1.Compare(n2) < 0; + } +} diff --git a/yaml-cpp/src/nodebuilder.cpp b/yaml-cpp/src/nodebuilder.cpp new file mode 100644 index 0000000..13a7032 --- /dev/null +++ b/yaml-cpp/src/nodebuilder.cpp @@ -0,0 +1,145 @@ +#include "nodebuilder.h" +#include "yaml-cpp/mark.h" +#include "yaml-cpp/node.h" +#include + +namespace YAML +{ + NodeBuilder::NodeBuilder(Node& root): m_root(root), m_initializedRoot(false), m_finished(false) + { + m_root.Clear(); + m_anchors.push_back(0); // since the anchors start at 1 + } + + NodeBuilder::~NodeBuilder() + { + } + + void NodeBuilder::OnDocumentStart(const Mark&) + { + } + + void NodeBuilder::OnDocumentEnd() + { + assert(m_finished); + } + + void NodeBuilder::OnNull(const Mark& mark, anchor_t anchor) + { + Node& node = Push(anchor); + node.Init(NodeType::Null, mark, ""); + Pop(); + } + + void NodeBuilder::OnAlias(const Mark& /*mark*/, anchor_t anchor) + { + Node& node = *m_anchors[anchor]; + Insert(node); + node.MarkAsAliased(); + } + + void NodeBuilder::OnScalar(const Mark& mark, const std::string& tag, anchor_t anchor, const std::string& value) + { + Node& node = Push(anchor); + node.Init(NodeType::Scalar, mark, tag); + node.SetScalarData(value); + Pop(); + } + + void NodeBuilder::OnSequenceStart(const Mark& mark, const std::string& tag, anchor_t anchor) + { + Node& node = Push(anchor); + node.Init(NodeType::Sequence, mark, tag); + } + + void NodeBuilder::OnSequenceEnd() + { + Pop(); + } + + void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag, anchor_t anchor) + { + Node& node = Push(anchor); + node.Init(NodeType::Map, mark, tag); + m_didPushKey.push(false); + } + + void NodeBuilder::OnMapEnd() + { + m_didPushKey.pop(); + Pop(); + } + + Node& NodeBuilder::Push(anchor_t anchor) + { + Node& node = Push(); + RegisterAnchor(anchor, node); + return node; + } + + Node& NodeBuilder::Push() + { + if(!m_initializedRoot) { + m_initializedRoot = true; + return m_root; + } + + Node& node = m_root.CreateNode(); + m_stack.push(&node); + return node; + } + + Node& NodeBuilder::Top() + { + return m_stack.empty() ? m_root : *m_stack.top(); + } + + void NodeBuilder::Pop() + { + assert(!m_finished); + if(m_stack.empty()) { + m_finished = true; + return; + } + + Node& node = *m_stack.top(); + m_stack.pop(); + Insert(node); + } + + void NodeBuilder::Insert(Node& node) + { + Node& curTop = Top(); + switch(curTop.Type()) { + case NodeType::Null: + case NodeType::Scalar: + assert(false); + break; + case NodeType::Sequence: + curTop.Append(node); + break; + case NodeType::Map: + assert(!m_didPushKey.empty()); + if(m_didPushKey.top()) { + assert(!m_pendingKeys.empty()); + + Node& key = *m_pendingKeys.top(); + m_pendingKeys.pop(); + curTop.Insert(key, node); + m_didPushKey.top() = false; + } else { + m_pendingKeys.push(&node); + m_didPushKey.top() = true; + } + break; + } + } + + void NodeBuilder::RegisterAnchor(anchor_t anchor, Node& node) + { + if(anchor) { + assert(anchor == m_anchors.size()); + m_anchors.push_back(&node); + } + } +} diff --git a/yaml-cpp/src/nodebuilder.h b/yaml-cpp/src/nodebuilder.h new file mode 100644 index 0000000..9c1d16a --- /dev/null +++ b/yaml-cpp/src/nodebuilder.h @@ -0,0 +1,61 @@ +#ifndef NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include "yaml-cpp/eventhandler.h" +#include +#include +#include +#include + +namespace YAML +{ + class Node; + + class NodeBuilder: public EventHandler + { + public: + explicit NodeBuilder(Node& root); + virtual ~NodeBuilder(); + + virtual void OnDocumentStart(const Mark& mark); + virtual void OnDocumentEnd(); + + virtual void OnNull(const Mark& mark, anchor_t anchor); + virtual void OnAlias(const Mark& mark, anchor_t anchor); + virtual void OnScalar(const Mark& mark, const std::string& tag, anchor_t anchor, const std::string& value); + + virtual void OnSequenceStart(const Mark& mark, const std::string& tag, anchor_t anchor); + virtual void OnSequenceEnd(); + + virtual void OnMapStart(const Mark& mark, const std::string& tag, anchor_t anchor); + virtual void OnMapEnd(); + + private: + Node& Push(anchor_t anchor); + Node& Push(); + Node& Top(); + void Pop(); + + void Insert(Node& node); + void RegisterAnchor(anchor_t anchor, Node& node); + + private: + Node& m_root; + bool m_initializedRoot; + bool m_finished; + + std::stack m_stack; + std::stack m_pendingKeys; + std::stack m_didPushKey; + + typedef std::vector Anchors; + Anchors m_anchors; + }; +} + +#endif // NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + diff --git a/yaml-cpp/src/nodeownership.cpp b/yaml-cpp/src/nodeownership.cpp new file mode 100644 index 0000000..118edbc --- /dev/null +++ b/yaml-cpp/src/nodeownership.cpp @@ -0,0 +1,31 @@ +#include "nodeownership.h" +#include "yaml-cpp/node.h" + +namespace YAML +{ + NodeOwnership::NodeOwnership(NodeOwnership *pOwner): m_pOwner(pOwner) + { + if(!m_pOwner) + m_pOwner = this; + } + + NodeOwnership::~NodeOwnership() + { + } + + Node& NodeOwnership::_Create() + { + m_nodes.push_back(std::auto_ptr(new Node)); + return m_nodes.back(); + } + + void NodeOwnership::_MarkAsAliased(const Node& node) + { + m_aliasedNodes.insert(&node); + } + + bool NodeOwnership::_IsAliased(const Node& node) const + { + return m_aliasedNodes.count(&node) > 0; + } +} diff --git a/yaml-cpp/src/nodeownership.h b/yaml-cpp/src/nodeownership.h new file mode 100644 index 0000000..6987081 --- /dev/null +++ b/yaml-cpp/src/nodeownership.h @@ -0,0 +1,39 @@ +#ifndef NODE_OWNERSHIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define NODE_OWNERSHIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/noncopyable.h" +#include "ptr_vector.h" +#include + +namespace YAML +{ + class Node; + + class NodeOwnership: private noncopyable + { + public: + explicit NodeOwnership(NodeOwnership *pOwner = 0); + ~NodeOwnership(); + + Node& Create() { return m_pOwner->_Create(); } + void MarkAsAliased(const Node& node) { m_pOwner->_MarkAsAliased(node); } + bool IsAliased(const Node& node) const { return m_pOwner->_IsAliased(node); } + + private: + Node& _Create(); + void _MarkAsAliased(const Node& node); + bool _IsAliased(const Node& node) const; + + private: + ptr_vector m_nodes; + std::set m_aliasedNodes; + NodeOwnership *m_pOwner; + }; +} + +#endif // NODE_OWNERSHIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/null.cpp b/yaml-cpp/src/null.cpp new file mode 100644 index 0000000..08fa9aa --- /dev/null +++ b/yaml-cpp/src/null.cpp @@ -0,0 +1,12 @@ +#include "yaml-cpp/null.h" +#include "yaml-cpp/node.h" + +namespace YAML +{ + _Null Null; + + bool IsNull(const Node& node) + { + return node.Read(Null); + } +} diff --git a/yaml-cpp/src/ostream.cpp b/yaml-cpp/src/ostream.cpp new file mode 100644 index 0000000..a7f1e14 --- /dev/null +++ b/yaml-cpp/src/ostream.cpp @@ -0,0 +1,63 @@ +#include "yaml-cpp/ostream.h" +#include + +namespace YAML +{ + ostream::ostream(): m_buffer(0), m_pos(0), m_size(0), m_row(0), m_col(0) + { + reserve(1024); + } + + ostream::~ostream() + { + delete [] m_buffer; + } + + void ostream::reserve(unsigned size) + { + if(size <= m_size) + return; + + char *newBuffer = new char[size]; + std::memset(newBuffer, 0, size * sizeof(char)); + std::memcpy(newBuffer, m_buffer, m_size * sizeof(char)); + delete [] m_buffer; + m_buffer = newBuffer; + m_size = size; + } + + void ostream::put(char ch) + { + if(m_pos >= m_size - 1) // an extra space for the NULL terminator + reserve(m_size * 2); + + m_buffer[m_pos] = ch; + m_pos++; + + if(ch == '\n') { + m_row++; + m_col = 0; + } else + m_col++; + } + + ostream& operator << (ostream& out, const char *str) + { + std::size_t length = std::strlen(str); + for(std::size_t i=0;i +#include + +namespace YAML +{ + Parser::Parser() + { + } + + Parser::Parser(std::istream& in) + { + Load(in); + } + + Parser::~Parser() + { + } + + Parser::operator bool() const + { + return m_pScanner.get() && !m_pScanner->empty(); + } + + void Parser::Load(std::istream& in) + { + m_pScanner.reset(new Scanner(in)); + m_pDirectives.reset(new Directives); + } + + // HandleNextDocument + // . Handles the next document + // . Throws a ParserException on error. + // . Returns false if there are no more documents + bool Parser::HandleNextDocument(EventHandler& eventHandler) + { + if(!m_pScanner.get()) + return false; + + ParseDirectives(); + if(m_pScanner->empty()) + return false; + + SingleDocParser sdp(*m_pScanner, *m_pDirectives); + sdp.HandleDocument(eventHandler); + return true; + } + + // GetNextDocument + // . Reads the next document in the queue (of tokens). + // . Throws a ParserException on error. + bool Parser::GetNextDocument(Node& document) + { + NodeBuilder builder(document); + return HandleNextDocument(builder); + } + + // ParseDirectives + // . Reads any directives that are next in the queue. + void Parser::ParseDirectives() + { + bool readDirective = false; + + while(1) { + if(m_pScanner->empty()) + break; + + Token& token = m_pScanner->peek(); + if(token.type != Token::DIRECTIVE) + break; + + // we keep the directives from the last document if none are specified; + // but if any directives are specific, then we reset them + if(!readDirective) + m_pDirectives.reset(new Directives); + + readDirective = true; + HandleDirective(token); + m_pScanner->pop(); + } + } + + void Parser::HandleDirective(const Token& token) + { + if(token.value == "YAML") + HandleYamlDirective(token); + else if(token.value == "TAG") + HandleTagDirective(token); + } + + // HandleYamlDirective + // . Should be of the form 'major.minor' (like a version number) + void Parser::HandleYamlDirective(const Token& token) + { + if(token.params.size() != 1) + throw ParserException(token.mark, ErrorMsg::YAML_DIRECTIVE_ARGS); + + if(!m_pDirectives->version.isDefault) + throw ParserException(token.mark, ErrorMsg::REPEATED_YAML_DIRECTIVE); + + std::stringstream str(token.params[0]); + str >> m_pDirectives->version.major; + str.get(); + str >> m_pDirectives->version.minor; + if(!str || str.peek() != EOF) + throw ParserException(token.mark, std::string(ErrorMsg::YAML_VERSION) + token.params[0]); + + if(m_pDirectives->version.major > 1) + throw ParserException(token.mark, ErrorMsg::YAML_MAJOR_VERSION); + + m_pDirectives->version.isDefault = false; + // TODO: warning on major == 1, minor > 2? + } + + // HandleTagDirective + // . Should be of the form 'handle prefix', where 'handle' is converted to 'prefix' in the file. + void Parser::HandleTagDirective(const Token& token) + { + if(token.params.size() != 2) + throw ParserException(token.mark, ErrorMsg::TAG_DIRECTIVE_ARGS); + + const std::string& handle = token.params[0]; + const std::string& prefix = token.params[1]; + if(m_pDirectives->tags.find(handle) != m_pDirectives->tags.end()) + throw ParserException(token.mark, ErrorMsg::REPEATED_TAG_DIRECTIVE); + + m_pDirectives->tags[handle] = prefix; + } + + void Parser::PrintTokens(std::ostream& out) + { + if(!m_pScanner.get()) + return; + + while(1) { + if(m_pScanner->empty()) + break; + + out << m_pScanner->peek() << "\n"; + m_pScanner->pop(); + } + } +} diff --git a/yaml-cpp/src/ptr_stack.h b/yaml-cpp/src/ptr_stack.h new file mode 100644 index 0000000..bf454fb --- /dev/null +++ b/yaml-cpp/src/ptr_stack.h @@ -0,0 +1,46 @@ +#ifndef PTR_STACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define PTR_STACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include "yaml-cpp/noncopyable.h" +#include +#include +#include +#include + +template +class ptr_stack: private YAML::noncopyable +{ +public: + ptr_stack() {} + ~ptr_stack() { clear(); } + + void clear() { + for(unsigned i=0;i t) { + m_data.push_back(NULL); + m_data.back() = t.release(); + } + std::auto_ptr pop() { + std::auto_ptr t(m_data.back()); + m_data.pop_back(); + return t; + } + T& top() { return *m_data.back(); } + const T& top() const { return *m_data.back(); } + +private: + std::vector m_data; +}; + +#endif // PTR_STACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/ptr_vector.h b/yaml-cpp/src/ptr_vector.h new file mode 100644 index 0000000..7b936cb --- /dev/null +++ b/yaml-cpp/src/ptr_vector.h @@ -0,0 +1,47 @@ +#ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include "yaml-cpp/noncopyable.h" +#include +#include +#include +#include + +namespace YAML { + + template + class ptr_vector: private YAML::noncopyable + { + public: + ptr_vector() {} + ~ptr_vector() { clear(); } + + void clear() { + for(unsigned i=0;i t) { + m_data.push_back(NULL); + m_data.back() = t.release(); + } + T& operator[](std::size_t i) { return *m_data[i]; } + const T& operator[](std::size_t i) const { return *m_data[i]; } + + T& back() { return *m_data.back(); } + const T& back() const { return *m_data.back(); } + + private: + std::vector m_data; + }; +} + +#endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/regex.cpp b/yaml-cpp/src/regex.cpp new file mode 100644 index 0000000..b35b1f4 --- /dev/null +++ b/yaml-cpp/src/regex.cpp @@ -0,0 +1,60 @@ +#include "regex.h" + +namespace YAML +{ + // constructors + RegEx::RegEx(): m_op(REGEX_EMPTY) + { + } + + RegEx::RegEx(REGEX_OP op): m_op(op) + { + } + + RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch) + { + } + + RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z) + { + } + + RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op) + { + for(std::size_t i=0;i= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include + +namespace YAML +{ + class Stream; + + enum REGEX_OP { REGEX_EMPTY, REGEX_MATCH, REGEX_RANGE, REGEX_OR, REGEX_AND, REGEX_NOT, REGEX_SEQ }; + + // simplified regular expressions + // . Only straightforward matches (no repeated characters) + // . Only matches from start of string + class RegEx + { + public: + RegEx(); + RegEx(char ch); + RegEx(char a, char z); + RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ); + ~RegEx() {} + + friend RegEx operator ! (const RegEx& ex); + friend RegEx operator || (const RegEx& ex1, const RegEx& ex2); + friend RegEx operator && (const RegEx& ex1, const RegEx& ex2); + friend RegEx operator + (const RegEx& ex1, const RegEx& ex2); + + bool Matches(char ch) const; + bool Matches(const std::string& str) const; + bool Matches(const Stream& in) const; + template bool Matches(const Source& source) const; + + int Match(const std::string& str) const; + int Match(const Stream& in) const; + template int Match(const Source& source) const; + + private: + RegEx(REGEX_OP op); + + template bool IsValidSource(const Source& source) const; + template int MatchUnchecked(const Source& source) const; + + template int MatchOpEmpty(const Source& source) const; + template int MatchOpMatch(const Source& source) const; + template int MatchOpRange(const Source& source) const; + template int MatchOpOr(const Source& source) const; + template int MatchOpAnd(const Source& source) const; + template int MatchOpNot(const Source& source) const; + template int MatchOpSeq(const Source& source) const; + + private: + REGEX_OP m_op; + char m_a, m_z; + std::vector m_params; + }; +} + +#include "regeximpl.h" + +#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/regeximpl.h b/yaml-cpp/src/regeximpl.h new file mode 100644 index 0000000..d5c20d7 --- /dev/null +++ b/yaml-cpp/src/regeximpl.h @@ -0,0 +1,186 @@ +#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "stream.h" +#include "stringsource.h" +#include "streamcharsource.h" + +namespace YAML +{ + // query matches + inline bool RegEx::Matches(char ch) const { + std::string str; + str += ch; + return Matches(str); + } + + inline bool RegEx::Matches(const std::string& str) const { + return Match(str) >= 0; + } + + inline bool RegEx::Matches(const Stream& in) const { + return Match(in) >= 0; + } + + template + inline bool RegEx::Matches(const Source& source) const { + return Match(source) >= 0; + } + + // Match + // . Matches the given string against this regular expression. + // . Returns the number of characters matched. + // . Returns -1 if no characters were matched (the reason for + // not returning zero is that we may have an empty regex + // which is ALWAYS successful at matching zero characters). + // . REMEMBER that we only match from the start of the buffer! + inline int RegEx::Match(const std::string& str) const + { + StringCharSource source(str.c_str(), str.size()); + return Match(source); + } + + inline int RegEx::Match(const Stream& in) const + { + StreamCharSource source(in); + return Match(source); + } + + template + inline bool RegEx::IsValidSource(const Source& source) const + { + return source; + } + + template<> + inline bool RegEx::IsValidSource(const StringCharSource&source) const + { + switch(m_op) { + case REGEX_MATCH: + case REGEX_RANGE: + return source; + default: + return true; + } + } + + template + inline int RegEx::Match(const Source& source) const + { + return IsValidSource(source) ? MatchUnchecked(source) : -1; + } + + template + inline int RegEx::MatchUnchecked(const Source& source) const + { + switch(m_op) { + case REGEX_EMPTY: + return MatchOpEmpty(source); + case REGEX_MATCH: + return MatchOpMatch(source); + case REGEX_RANGE: + return MatchOpRange(source); + case REGEX_OR: + return MatchOpOr(source); + case REGEX_AND: + return MatchOpAnd(source); + case REGEX_NOT: + return MatchOpNot(source); + case REGEX_SEQ: + return MatchOpSeq(source); + } + + return -1; + } + + ////////////////////////////////////////////////////////////////////////////// + // Operators + // Note: the convention MatchOp* is that we can assume IsSourceValid(source). + // So we do all our checks *before* we call these functions + + // EmptyOperator + template + inline int RegEx::MatchOpEmpty(const Source& source) const { + return source[0] == Stream::eof() ? 0 : -1; + } + + template <> + inline int RegEx::MatchOpEmpty(const StringCharSource& source) const { + return !source ? 0 : -1; // the empty regex only is successful on the empty string + } + + // MatchOperator + template + inline int RegEx::MatchOpMatch(const Source& source) const { + if(source[0] != m_a) + return -1; + return 1; + } + + // RangeOperator + template + inline int RegEx::MatchOpRange(const Source& source) const { + if(m_a > source[0] || m_z < source[0]) + return -1; + return 1; + } + + // OrOperator + template + inline int RegEx::MatchOpOr(const Source& source) const { + for(std::size_t i=0;i= 0) + return n; + } + return -1; + } + + // AndOperator + // Note: 'AND' is a little funny, since we may be required to match things + // of different lengths. If we find a match, we return the length of + // the FIRST entry on the list. + template + inline int RegEx::MatchOpAnd(const Source& source) const { + int first = -1; + for(std::size_t i=0;i + inline int RegEx::MatchOpNot(const Source& source) const { + if(m_params.empty()) + return -1; + if(m_params[0].MatchUnchecked(source) >= 0) + return -1; + return 1; + } + + // SeqOperator + template + inline int RegEx::MatchOpSeq(const Source& source) const { + int offset = 0; + for(std::size_t i=0;i +#include + +namespace YAML +{ + Scanner::Scanner(std::istream& in) + : INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false) + { + } + + Scanner::~Scanner() + { + } + + // empty + // . Returns true if there are no more tokens to be read + bool Scanner::empty() + { + EnsureTokensInQueue(); + return m_tokens.empty(); + } + + // pop + // . Simply removes the next token on the queue. + void Scanner::pop() + { + EnsureTokensInQueue(); + if(!m_tokens.empty()) + m_tokens.pop(); + } + + // peek + // . Returns (but does not remove) the next token on the queue. + Token& Scanner::peek() + { + EnsureTokensInQueue(); + assert(!m_tokens.empty()); // should we be asserting here? I mean, we really just be checking + // if it's empty before peeking. + +#if 0 + static Token *pLast = 0; + if(pLast != &m_tokens.front()) + std::cerr << "peek: " << m_tokens.front() << "\n"; + pLast = &m_tokens.front(); +#endif + + return m_tokens.front(); + } + + // EnsureTokensInQueue + // . Scan until there's a valid token at the front of the queue, + // or we're sure the queue is empty. + void Scanner::EnsureTokensInQueue() + { + while(1) { + if(!m_tokens.empty()) { + Token& token = m_tokens.front(); + + // if this guy's valid, then we're done + if(token.status == Token::VALID) + return; + + // here's where we clean up the impossible tokens + if(token.status == Token::INVALID) { + m_tokens.pop(); + continue; + } + + // note: what's left are the unverified tokens + } + + // no token? maybe we've actually finished + if(m_endedStream) + return; + + // no? then scan... + ScanNextToken(); + } + } + + // ScanNextToken + // . The main scanning function; here we branch out and + // scan whatever the next token should be. + void Scanner::ScanNextToken() + { + if(m_endedStream) + return; + + if(!m_startedStream) + return StartStream(); + + // get rid of whitespace, etc. (in between tokens it should be irrelevent) + ScanToNextToken(); + + // maybe need to end some blocks + PopIndentToHere(); + + // ***** + // And now branch based on the next few characters! + // ***** + + // end of stream + if(!INPUT) + return EndStream(); + + if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive) + return ScanDirective(); + + // document token + if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) + return ScanDocStart(); + + if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) + return ScanDocEnd(); + + // flow start/end/entry + if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart) + return ScanFlowStart(); + + if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) + return ScanFlowEnd(); + + if(INPUT.peek() == Keys::FlowEntry) + return ScanFlowEntry(); + + // block/map stuff + if(Exp::BlockEntry().Matches(INPUT)) + return ScanBlockEntry(); + + if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) + return ScanKey(); + + if(GetValueRegex().Matches(INPUT)) + return ScanValue(); + + // alias/anchor + if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) + return ScanAnchorOrAlias(); + + // tag + if(INPUT.peek() == Keys::Tag) + return ScanTag(); + + // special scalars + if(InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar)) + return ScanBlockScalar(); + + if(INPUT.peek() == '\'' || INPUT.peek() == '\"') + return ScanQuotedScalar(); + + // plain scalars + if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT)) + return ScanPlainScalar(); + + // don't know what it is! + throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN); + } + + // ScanToNextToken + // . Eats input until we reach the next token-like thing. + void Scanner::ScanToNextToken() + { + while(1) { + // first eat whitespace + while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { + if(InBlockContext() && Exp::Tab().Matches(INPUT)) + m_simpleKeyAllowed = false; + INPUT.eat(1); + } + + // then eat a comment + if(Exp::Comment().Matches(INPUT)) { + // eat until line break + while(INPUT && !Exp::Break().Matches(INPUT)) + INPUT.eat(1); + } + + // if it's NOT a line break, then we're done! + if(!Exp::Break().Matches(INPUT)) + break; + + // otherwise, let's eat the line break and keep going + int n = Exp::Break().Match(INPUT); + INPUT.eat(n); + + // oh yeah, and let's get rid of that simple key + InvalidateSimpleKey(); + + // new line - we may be able to accept a simple key now + if(InBlockContext()) + m_simpleKeyAllowed = true; + } + } + + /////////////////////////////////////////////////////////////////////// + // Misc. helpers + + // IsWhitespaceToBeEaten + // . We can eat whitespace if it's a space or tab + // . Note: originally tabs in block context couldn't be eaten + // "where a simple key could be allowed + // (i.e., not at the beginning of a line, or following '-', '?', or ':')" + // I think this is wrong, since tabs can be non-content whitespace; it's just + // that they can't contribute to indentation, so once you've seen a tab in a + // line, you can't start a simple key + bool Scanner::IsWhitespaceToBeEaten(char ch) + { + if(ch == ' ') + return true; + + if(ch == '\t') + return true; + + return false; + } + + // GetValueRegex + // . Get the appropriate regex to check if it's a value token + const RegEx& Scanner::GetValueRegex() const + { + if(InBlockContext()) + return Exp::Value(); + + return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); + } + + // StartStream + // . Set the initial conditions for starting a stream. + void Scanner::StartStream() + { + m_startedStream = true; + m_simpleKeyAllowed = true; + std::auto_ptr pIndent(new IndentMarker(-1, IndentMarker::NONE)); + m_indentRefs.push_back(pIndent); + m_indents.push(&m_indentRefs.back()); + } + + // EndStream + // . Close out the stream, finish up, etc. + void Scanner::EndStream() + { + // force newline + if(INPUT.column() > 0) + INPUT.ResetColumn(); + + PopAllIndents(); + PopAllSimpleKeys(); + + m_simpleKeyAllowed = false; + m_endedStream = true; + } + + Token *Scanner::PushToken(Token::TYPE type) + { + m_tokens.push(Token(type, INPUT.mark())); + return &m_tokens.back(); + } + + Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const + { + switch(type) { + case IndentMarker::SEQ: return Token::BLOCK_SEQ_START; + case IndentMarker::MAP: return Token::BLOCK_MAP_START; + case IndentMarker::NONE: assert(false); break; + } + assert(false); + throw std::runtime_error("yaml-cpp: internal error, invalid indent type"); + } + + // PushIndentTo + // . Pushes an indentation onto the stack, and enqueues the + // proper token (sequence start or mapping start). + // . Returns the indent marker it generates (if any). + Scanner::IndentMarker *Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type) + { + // are we in flow? + if(InFlowContext()) + return 0; + + std::auto_ptr pIndent(new IndentMarker(column, type)); + IndentMarker& indent = *pIndent; + const IndentMarker& lastIndent = *m_indents.top(); + + // is this actually an indentation? + if(indent.column < lastIndent.column) + return 0; + if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP)) + return 0; + + // push a start token + indent.pStartToken = PushToken(GetStartTokenFor(type)); + + // and then the indent + m_indents.push(&indent); + m_indentRefs.push_back(pIndent); + return &m_indentRefs.back(); + } + + // PopIndentToHere + // . Pops indentations off the stack until we reach the current indentation level, + // and enqueues the proper token each time. + // . Then pops all invalid indentations off. + void Scanner::PopIndentToHere() + { + // are we in flow? + if(InFlowContext()) + return; + + // now pop away + while(!m_indents.empty()) { + const IndentMarker& indent = *m_indents.top(); + if(indent.column < INPUT.column()) + break; + if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT))) + break; + + PopIndent(); + } + + while(!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID) + PopIndent(); + } + + // PopAllIndents + // . Pops all indentations (except for the base empty one) off the stack, + // and enqueues the proper token each time. + void Scanner::PopAllIndents() + { + // are we in flow? + if(InFlowContext()) + return; + + // now pop away + while(!m_indents.empty()) { + const IndentMarker& indent = *m_indents.top(); + if(indent.type == IndentMarker::NONE) + break; + + PopIndent(); + } + } + + // PopIndent + // . Pops a single indent, pushing the proper token + void Scanner::PopIndent() + { + const IndentMarker& indent = *m_indents.top(); + m_indents.pop(); + + if(indent.status != IndentMarker::VALID) { + InvalidateSimpleKey(); + return; + } + + if(indent.type == IndentMarker::SEQ) + m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark())); + else if(indent.type == IndentMarker::MAP) + m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark())); + } + + // GetTopIndent + int Scanner::GetTopIndent() const + { + if(m_indents.empty()) + return 0; + return m_indents.top()->column; + } + + // ThrowParserException + // . Throws a ParserException with the current token location + // (if available). + // . Does not parse any more tokens. + void Scanner::ThrowParserException(const std::string& msg) const + { + Mark mark = Mark::null(); + if(!m_tokens.empty()) { + const Token& token = m_tokens.front(); + mark = token.mark; + } + throw ParserException(mark, msg); + } +} + diff --git a/yaml-cpp/src/scanner.h b/yaml-cpp/src/scanner.h new file mode 100644 index 0000000..bc8dcbe --- /dev/null +++ b/yaml-cpp/src/scanner.h @@ -0,0 +1,132 @@ +#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include +#include +#include +#include +#include +#include "ptr_vector.h" +#include "stream.h" +#include "token.h" + +namespace YAML +{ + class Node; + class RegEx; + + class Scanner + { + public: + Scanner(std::istream& in); + ~Scanner(); + + // token queue management (hopefully this looks kinda stl-ish) + bool empty(); + void pop(); + Token& peek(); + + private: + struct IndentMarker { + enum INDENT_TYPE { MAP, SEQ, NONE }; + enum STATUS { VALID, INVALID, UNKNOWN }; + IndentMarker(int column_, INDENT_TYPE type_): column(column_), type(type_), status(VALID), pStartToken(0) {} + + int column; + INDENT_TYPE type; + STATUS status; + Token *pStartToken; + }; + + enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ }; + + private: + // scanning + void EnsureTokensInQueue(); + void ScanNextToken(); + void ScanToNextToken(); + void StartStream(); + void EndStream(); + Token *PushToken(Token::TYPE type); + + bool InFlowContext() const { return !m_flows.empty(); } + bool InBlockContext() const { return m_flows.empty(); } + int GetFlowLevel() const { return m_flows.size(); } + + Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const; + IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type); + void PopIndentToHere(); + void PopAllIndents(); + void PopIndent(); + int GetTopIndent() const; + + // checking input + bool CanInsertPotentialSimpleKey() const; + bool ExistsActiveSimpleKey() const; + void InsertPotentialSimpleKey(); + void InvalidateSimpleKey(); + bool VerifySimpleKey(); + void PopAllSimpleKeys(); + + void ThrowParserException(const std::string& msg) const; + + bool IsWhitespaceToBeEaten(char ch); + const RegEx& GetValueRegex() const; + + struct SimpleKey { + SimpleKey(const Mark& mark_, int flowLevel_); + + void Validate(); + void Invalidate(); + + Mark mark; + int flowLevel; + IndentMarker *pIndent; + Token *pMapStart, *pKey; + }; + + // and the tokens + void ScanDirective(); + void ScanDocStart(); + void ScanDocEnd(); + void ScanBlockSeqStart(); + void ScanBlockMapSTart(); + void ScanBlockEnd(); + void ScanBlockEntry(); + void ScanFlowStart(); + void ScanFlowEnd(); + void ScanFlowEntry(); + void ScanKey(); + void ScanValue(); + void ScanAnchorOrAlias(); + void ScanTag(); + void ScanPlainScalar(); + void ScanQuotedScalar(); + void ScanBlockScalar(); + + private: + // the stream + Stream INPUT; + + // the output (tokens) + std::queue m_tokens; + + // state info + bool m_startedStream, m_endedStream; + bool m_simpleKeyAllowed; + bool m_canBeJSONFlow; + std::stack m_simpleKeys; + std::stack m_indents; + ptr_vector m_indentRefs; // for "garbage collection" + std::stack m_flows; + }; +} + +#endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + diff --git a/yaml-cpp/src/scanscalar.cpp b/yaml-cpp/src/scanscalar.cpp new file mode 100644 index 0000000..064c086 --- /dev/null +++ b/yaml-cpp/src/scanscalar.cpp @@ -0,0 +1,214 @@ +#include "scanscalar.h" +#include "scanner.h" +#include "exp.h" +#include "yaml-cpp/exceptions.h" +#include "token.h" + +namespace YAML +{ + // ScanScalar + // . This is where the scalar magic happens. + // + // . We do the scanning in three phases: + // 1. Scan until newline + // 2. Eat newline + // 3. Scan leading blanks. + // + // . Depending on the parameters given, we store or stop + // and different places in the above flow. + std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) + { + bool foundNonEmptyLine = false; + bool pastOpeningBreak = (params.fold == FOLD_FLOW); + bool emptyLine = false, moreIndented = false; + int foldedNewlineCount = 0; + bool foldedNewlineStartedMoreIndented = false; + std::size_t lastEscapedChar = std::string::npos; + std::string scalar; + params.leadingSpaces = false; + + while(INPUT) { + // ******************************** + // Phase #1: scan until line ending + + std::size_t lastNonWhitespaceChar = scalar.size(); + bool escapedNewline = false; + while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) { + if(!INPUT) + break; + + // document indicator? + if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { + if(params.onDocIndicator == BREAK) + break; + else if(params.onDocIndicator == THROW) + throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR); + } + + foundNonEmptyLine = true; + pastOpeningBreak = true; + + // escaped newline? (only if we're escaping on slash) + if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { + // eat escape character and get out (but preserve trailing whitespace!) + INPUT.get(); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + escapedNewline = true; + break; + } + + // escape this? + if(INPUT.peek() == params.escape) { + scalar += Exp::Escape(INPUT); + lastNonWhitespaceChar = scalar.size(); + lastEscapedChar = scalar.size(); + continue; + } + + // otherwise, just add the damn character + char ch = INPUT.get(); + scalar += ch; + if(ch != ' ' && ch != '\t') + lastNonWhitespaceChar = scalar.size(); + } + + // eof? if we're looking to eat something, then we throw + if(!INPUT) { + if(params.eatEnd) + throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); + break; + } + + // doc indicator? + if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) + break; + + // are we done via character match? + int n = params.end.Match(INPUT); + if(n >= 0) { + if(params.eatEnd) + INPUT.eat(n); + break; + } + + // do we remove trailing whitespace? + if(params.fold == FOLD_FLOW) + scalar.erase(lastNonWhitespaceChar); + + // ******************************** + // Phase #2: eat line ending + n = Exp::Break().Match(INPUT); + INPUT.eat(n); + + // ******************************** + // Phase #3: scan initial spaces + + // first the required indentation + while(INPUT.peek() == ' ' && (INPUT.column() < params.indent || (params.detectIndent && !foundNonEmptyLine))) + INPUT.eat(1); + + // update indent if we're auto-detecting + if(params.detectIndent && !foundNonEmptyLine) + params.indent = std::max(params.indent, INPUT.column()); + + // and then the rest of the whitespace + while(Exp::Blank().Matches(INPUT)) { + // we check for tabs that masquerade as indentation + if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW) + throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); + + if(!params.eatLeadingWhitespace) + break; + + INPUT.eat(1); + } + + // was this an empty line? + bool nextEmptyLine = Exp::Break().Matches(INPUT); + bool nextMoreIndented = Exp::Blank().Matches(INPUT); + if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) + foldedNewlineStartedMoreIndented = moreIndented; + + // for block scalars, we always start with a newline, so we should ignore it (not fold or keep) + if(pastOpeningBreak) { + switch(params.fold) { + case DONT_FOLD: + scalar += "\n"; + break; + case FOLD_BLOCK: + if(!emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented && INPUT.column() >= params.indent) + scalar += " "; + else if(nextEmptyLine) + foldedNewlineCount++; + else + scalar += "\n"; + + if(!nextEmptyLine && foldedNewlineCount > 0) { + scalar += std::string(foldedNewlineCount - 1, '\n'); + if(foldedNewlineStartedMoreIndented || nextMoreIndented | !foundNonEmptyLine) + scalar += "\n"; + foldedNewlineCount = 0; + } + break; + case FOLD_FLOW: + if(nextEmptyLine) + scalar += "\n"; + else if(!emptyLine && !nextEmptyLine && !escapedNewline) + scalar += " "; + break; + } + } + + emptyLine = nextEmptyLine; + moreIndented = nextMoreIndented; + pastOpeningBreak = true; + + // are we done via indentation? + if(!emptyLine && INPUT.column() < params.indent) { + params.leadingSpaces = true; + break; + } + } + + // post-processing + if(params.trimTrailingSpaces) { + std::size_t pos = scalar.find_last_not_of(' '); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos < scalar.size()) + scalar.erase(pos + 1); + } + + switch(params.chomp) { + case CLIP: { + std::size_t pos = scalar.find_last_not_of('\n'); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos == std::string::npos) + scalar.erase(); + else if(pos + 1 < scalar.size()) + scalar.erase(pos + 2); + } break; + case STRIP: { + std::size_t pos = scalar.find_last_not_of('\n'); + if(lastEscapedChar != std::string::npos) { + if(pos < lastEscapedChar || pos == std::string::npos) + pos = lastEscapedChar; + } + if(pos == std::string::npos) + scalar.erase(); + else if(pos < scalar.size()) + scalar.erase(pos + 1); + } break; + default: + break; + } + + return scalar; + } +} diff --git a/yaml-cpp/src/scanscalar.h b/yaml-cpp/src/scanscalar.h new file mode 100644 index 0000000..c198cb1 --- /dev/null +++ b/yaml-cpp/src/scanscalar.h @@ -0,0 +1,45 @@ +#ifndef SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include "regex.h" +#include "stream.h" + +namespace YAML +{ + enum CHOMP { STRIP = -1, CLIP, KEEP }; + enum ACTION { NONE, BREAK, THROW }; + enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW }; + + struct ScanScalarParams { + ScanScalarParams(): eatEnd(false), indent(0), detectIndent(false), eatLeadingWhitespace(0), escape(0), fold(DONT_FOLD), + trimTrailingSpaces(0), chomp(CLIP), onDocIndicator(NONE), onTabInIndentation(NONE), leadingSpaces(false) {} + + // input: + RegEx end; // what condition ends this scalar? + bool eatEnd; // should we eat that condition when we see it? + int indent; // what level of indentation should be eaten and ignored? + bool detectIndent; // should we try to autodetect the indent? + bool eatLeadingWhitespace; // should we continue eating this delicious indentation after 'indent' spaces? + char escape; // what character do we escape on (i.e., slash or single quote) (0 for none) + FOLD fold; // how do we fold line ends? + bool trimTrailingSpaces; // do we remove all trailing spaces (at the very end) + CHOMP chomp; // do we strip, clip, or keep trailing newlines (at the very end) + // Note: strip means kill all, clip means keep at most one, keep means keep all + ACTION onDocIndicator; // what do we do if we see a document indicator? + ACTION onTabInIndentation; // what do we do if we see a tab where we should be seeing indentation spaces + + // output: + bool leadingSpaces; + }; + + std::string ScanScalar(Stream& INPUT, ScanScalarParams& info); +} + +#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + diff --git a/yaml-cpp/src/scantag.cpp b/yaml-cpp/src/scantag.cpp new file mode 100644 index 0000000..b71cbcc --- /dev/null +++ b/yaml-cpp/src/scantag.cpp @@ -0,0 +1,84 @@ +#include "scanner.h" +#include "regex.h" +#include "exp.h" +#include "yaml-cpp/exceptions.h" + +namespace YAML +{ + const std::string ScanVerbatimTag(Stream& INPUT) + { + std::string tag; + + // eat the start character + INPUT.get(); + + while(INPUT) { + if(INPUT.peek() == Keys::VerbatimTagEnd) { + // eat the end character + INPUT.get(); + return tag; + } + + int n = Exp::URI().Match(INPUT); + if(n <= 0) + break; + + tag += INPUT.get(n); + } + + throw ParserException(INPUT.mark(), ErrorMsg::END_OF_VERBATIM_TAG); + } + + const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) + { + std::string tag; + canBeHandle = true; + Mark firstNonWordChar; + + while(INPUT) { + if(INPUT.peek() == Keys::Tag) { + if(!canBeHandle) + throw ParserException(firstNonWordChar, ErrorMsg::CHAR_IN_TAG_HANDLE); + break; + } + + int n = 0; + if(canBeHandle) { + n = Exp::Word().Match(INPUT); + if(n <= 0) { + canBeHandle = false; + firstNonWordChar = INPUT.mark(); + } + } + + if(!canBeHandle) + n = Exp::Tag().Match(INPUT); + + if(n <= 0) + break; + + tag += INPUT.get(n); + } + + return tag; + } + + const std::string ScanTagSuffix(Stream& INPUT) + { + std::string tag; + + while(INPUT) { + int n = Exp::Tag().Match(INPUT); + if(n <= 0) + break; + + tag += INPUT.get(n); + } + + if(tag.empty()) + throw ParserException(INPUT.mark(), ErrorMsg::TAG_WITH_NO_SUFFIX); + + return tag; + } +} + diff --git a/yaml-cpp/src/scantag.h b/yaml-cpp/src/scantag.h new file mode 100644 index 0000000..38437c0 --- /dev/null +++ b/yaml-cpp/src/scantag.h @@ -0,0 +1,20 @@ +#ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include "stream.h" + +namespace YAML +{ + const std::string ScanVerbatimTag(Stream& INPUT); + const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle); + const std::string ScanTagSuffix(Stream& INPUT); +} + +#endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + diff --git a/yaml-cpp/src/scantoken.cpp b/yaml-cpp/src/scantoken.cpp new file mode 100644 index 0000000..06d9cd6 --- /dev/null +++ b/yaml-cpp/src/scantoken.cpp @@ -0,0 +1,439 @@ +#include "scanner.h" +#include "token.h" +#include "yaml-cpp/exceptions.h" +#include "exp.h" +#include "scanscalar.h" +#include "scantag.h" +#include "tag.h" +#include + +namespace YAML +{ + /////////////////////////////////////////////////////////////////////// + // Specialization for scanning specific tokens + + // Directive + // . Note: no semantic checking is done here (that's for the parser to do) + void Scanner::ScanDirective() + { + std::string name; + std::vector params; + + // pop indents and simple keys + PopAllIndents(); + PopAllSimpleKeys(); + + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // store pos and eat indicator + Token token(Token::DIRECTIVE, INPUT.mark()); + INPUT.eat(1); + + // read name + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + token.value += INPUT.get(); + + // read parameters + while(1) { + // first get rid of whitespace + while(Exp::Blank().Matches(INPUT)) + INPUT.eat(1); + + // break on newline or comment + if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) + break; + + // now read parameter + std::string param; + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) + param += INPUT.get(); + + token.params.push_back(param); + } + + m_tokens.push(token); + } + + // DocStart + void Scanner::ScanDocStart() + { + PopAllIndents(); + PopAllSimpleKeys(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(3); + m_tokens.push(Token(Token::DOC_START, mark)); + } + + // DocEnd + void Scanner::ScanDocEnd() + { + PopAllIndents(); + PopAllSimpleKeys(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(3); + m_tokens.push(Token(Token::DOC_END, mark)); + } + + // FlowStart + void Scanner::ScanFlowStart() + { + // flows can be simple keys + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + char ch = INPUT.get(); + FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP); + m_flows.push(flowType); + Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START); + m_tokens.push(Token(type, mark)); + } + + // FlowEnd + void Scanner::ScanFlowEnd() + { + if(InBlockContext()) + throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END); + + // we might have a solo entry in the flow context + if(InFlowContext()) { + if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) + m_tokens.push(Token(Token::VALUE, INPUT.mark())); + else if(m_flows.top() == FLOW_SEQ) + InvalidateSimpleKey(); + } + + m_simpleKeyAllowed = false; + m_canBeJSONFlow = true; + + // eat + Mark mark = INPUT.mark(); + char ch = INPUT.get(); + + // check that it matches the start + FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP); + if(m_flows.top() != flowType) + throw ParserException(mark, ErrorMsg::FLOW_END); + m_flows.pop(); + + Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END); + m_tokens.push(Token(type, mark)); + } + + // FlowEntry + void Scanner::ScanFlowEntry() + { + // we might have a solo entry in the flow context + if(InFlowContext()) { + if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) + m_tokens.push(Token(Token::VALUE, INPUT.mark())); + else if(m_flows.top() == FLOW_SEQ) + InvalidateSimpleKey(); + } + + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::FLOW_ENTRY, mark)); + } + + // BlockEntry + void Scanner::ScanBlockEntry() + { + // we better be in the block context! + if(InFlowContext()) + throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); + + // can we put it here? + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); + + PushIndentTo(INPUT.column(), IndentMarker::SEQ); + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::BLOCK_ENTRY, mark)); + } + + // Key + void Scanner::ScanKey() + { + // handle keys diffently in the block context (and manage indents) + if(InBlockContext()) { + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY); + + PushIndentTo(INPUT.column(), IndentMarker::MAP); + } + + // can only put a simple key here if we're in block context + m_simpleKeyAllowed = InBlockContext(); + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::KEY, mark)); + } + + // Value + void Scanner::ScanValue() + { + // and check that simple key + bool isSimpleKey = VerifySimpleKey(); + m_canBeJSONFlow = false; + + if(isSimpleKey) { + // can't follow a simple key with another simple key (dunno why, though - it seems fine) + m_simpleKeyAllowed = false; + } else { + // handle values diffently in the block context (and manage indents) + if(InBlockContext()) { + if(!m_simpleKeyAllowed) + throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE); + + PushIndentTo(INPUT.column(), IndentMarker::MAP); + } + + // can only put a simple key here if we're in block context + m_simpleKeyAllowed = InBlockContext(); + } + + // eat + Mark mark = INPUT.mark(); + INPUT.eat(1); + m_tokens.push(Token(Token::VALUE, mark)); + } + + // AnchorOrAlias + void Scanner::ScanAnchorOrAlias() + { + bool alias; + std::string name; + + // insert a potential simple key + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + // eat the indicator + Mark mark = INPUT.mark(); + char indicator = INPUT.get(); + alias = (indicator == Keys::Alias); + + // now eat the content + while(INPUT && Exp::Anchor().Matches(INPUT)) + name += INPUT.get(); + + // we need to have read SOMETHING! + if(name.empty()) + throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND); + + // and needs to end correctly + if(INPUT && !Exp::AnchorEnd().Matches(INPUT)) + throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR); + + // and we're done + Token token(alias ? Token::ALIAS : Token::ANCHOR, mark); + token.value = name; + m_tokens.push(token); + } + + // Tag + void Scanner::ScanTag() + { + // insert a potential simple key + InsertPotentialSimpleKey(); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = false; + + Token token(Token::TAG, INPUT.mark()); + + // eat the indicator + INPUT.get(); + + if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){ + std::string tag = ScanVerbatimTag(INPUT); + + token.value = tag; + token.data = Tag::VERBATIM; + } else { + bool canBeHandle; + token.value = ScanTagHandle(INPUT, canBeHandle); + if(!canBeHandle && token.value.empty()) + token.data = Tag::NON_SPECIFIC; + else if(token.value.empty()) + token.data = Tag::SECONDARY_HANDLE; + else + token.data = Tag::PRIMARY_HANDLE; + + // is there a suffix? + if(canBeHandle && INPUT.peek() == Keys::Tag) { + // eat the indicator + INPUT.get(); + token.params.push_back(ScanTagSuffix(INPUT)); + token.data = Tag::NAMED_HANDLE; + } + } + + m_tokens.push(token); + } + + // PlainScalar + void Scanner::ScanPlainScalar() + { + std::string scalar; + + // set up the scanning parameters + ScanScalarParams params; + params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment()); + params.eatEnd = false; + params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); + params.fold = FOLD_FLOW; + params.eatLeadingWhitespace = true; + params.trimTrailingSpaces = true; + params.chomp = STRIP; + params.onDocIndicator = BREAK; + params.onTabInIndentation = THROW; + + // insert a potential simple key + InsertPotentialSimpleKey(); + + Mark mark = INPUT.mark(); + scalar = ScanScalar(INPUT, params); + + // can have a simple key only if we ended the scalar by starting a new line + m_simpleKeyAllowed = params.leadingSpaces; + m_canBeJSONFlow = false; + + // finally, check and see if we ended on an illegal character + //if(Exp::IllegalCharInScalar.Matches(INPUT)) + // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR); + + Token token(Token::PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } + + // QuotedScalar + void Scanner::ScanQuotedScalar() + { + std::string scalar; + + // peek at single or double quote (don't eat because we need to preserve (for the time being) the input position) + char quote = INPUT.peek(); + bool single = (quote == '\''); + + // setup the scanning parameters + ScanScalarParams params; + params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); + params.eatEnd = true; + params.escape = (single ? '\'' : '\\'); + params.indent = 0; + params.fold = FOLD_FLOW; + params.eatLeadingWhitespace = true; + params.trimTrailingSpaces = false; + params.chomp = CLIP; + params.onDocIndicator = THROW; + + // insert a potential simple key + InsertPotentialSimpleKey(); + + Mark mark = INPUT.mark(); + + // now eat that opening quote + INPUT.get(); + + // and scan + scalar = ScanScalar(INPUT, params); + m_simpleKeyAllowed = false; + m_canBeJSONFlow = true; + + Token token(Token::NON_PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } + + // BlockScalarToken + // . These need a little extra processing beforehand. + // . We need to scan the line where the indicator is (this doesn't count as part of the scalar), + // and then we need to figure out what level of indentation we'll be using. + void Scanner::ScanBlockScalar() + { + std::string scalar; + + ScanScalarParams params; + params.indent = 1; + params.detectIndent = true; + + // eat block indicator ('|' or '>') + Mark mark = INPUT.mark(); + char indicator = INPUT.get(); + params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD); + + // eat chomping/indentation indicators + params.chomp = CLIP; + int n = Exp::Chomp().Match(INPUT); + for(int i=0;i= 0) + params.indent += GetTopIndent(); + + params.eatLeadingWhitespace = false; + params.trimTrailingSpaces = false; + params.onTabInIndentation = THROW; + + scalar = ScanScalar(INPUT, params); + + // simple keys always ok after block scalars (since we're gonna start a new line anyways) + m_simpleKeyAllowed = true; + m_canBeJSONFlow = false; + + Token token(Token::NON_PLAIN_SCALAR, mark); + token.value = scalar; + m_tokens.push(token); + } +} diff --git a/yaml-cpp/src/setting.h b/yaml-cpp/src/setting.h new file mode 100644 index 0000000..806ccda --- /dev/null +++ b/yaml-cpp/src/setting.h @@ -0,0 +1,105 @@ +#ifndef SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include +#include +#include "yaml-cpp/noncopyable.h" + +namespace YAML +{ + class SettingChangeBase; + + template + class Setting + { + public: + Setting(): m_value() {} + + const T get() const { return m_value; } + std::auto_ptr set(const T& value); + void restore(const Setting& oldSetting) { + m_value = oldSetting.get(); + } + + private: + T m_value; + }; + + class SettingChangeBase + { + public: + virtual ~SettingChangeBase() {} + virtual void pop() = 0; + }; + + template + class SettingChange: public SettingChangeBase + { + public: + SettingChange(Setting *pSetting): m_pCurSetting(pSetting) { + // copy old setting to save its state + m_oldSetting = *pSetting; + } + + virtual void pop() { + m_pCurSetting->restore(m_oldSetting); + } + + private: + Setting *m_pCurSetting; + Setting m_oldSetting; + }; + + template + inline std::auto_ptr Setting::set(const T& value) { + std::auto_ptr pChange(new SettingChange (this)); + m_value = value; + return pChange; + } + + class SettingChanges: private noncopyable + { + public: + SettingChanges() {} + ~SettingChanges() { clear(); } + + void clear() { + restore(); + + for(setting_changes::const_iterator it=m_settingChanges.begin();it!=m_settingChanges.end();++it) + delete *it; + m_settingChanges.clear(); + } + + void restore() { + for(setting_changes::const_iterator it=m_settingChanges.begin();it!=m_settingChanges.end();++it) + (*it)->pop(); + } + + void push(std::auto_ptr pSettingChange) { + m_settingChanges.push_back(pSettingChange.release()); + } + + // like std::auto_ptr - assignment is transfer of ownership + SettingChanges& operator = (SettingChanges& rhs) { + if(this == &rhs) + return *this; + + clear(); + m_settingChanges = rhs.m_settingChanges; + rhs.m_settingChanges.clear(); + return *this; + } + + private: + typedef std::vector setting_changes; + setting_changes m_settingChanges; + }; +} + +#endif // SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/simplekey.cpp b/yaml-cpp/src/simplekey.cpp new file mode 100644 index 0000000..857a9e0 --- /dev/null +++ b/yaml-cpp/src/simplekey.cpp @@ -0,0 +1,139 @@ +#include "scanner.h" +#include "token.h" +#include "yaml-cpp/exceptions.h" +#include "exp.h" + +namespace YAML +{ + Scanner::SimpleKey::SimpleKey(const Mark& mark_, int flowLevel_) + : mark(mark_), flowLevel(flowLevel_), pIndent(0), pMapStart(0), pKey(0) + { + } + + void Scanner::SimpleKey::Validate() + { + // Note: pIndent will *not* be garbage here; + // we "garbage collect" them so we can + // always refer to them + if(pIndent) + pIndent->status = IndentMarker::VALID; + if(pMapStart) + pMapStart->status = Token::VALID; + if(pKey) + pKey->status = Token::VALID; + } + + void Scanner::SimpleKey::Invalidate() + { + if(pIndent) + pIndent->status = IndentMarker::INVALID; + if(pMapStart) + pMapStart->status = Token::INVALID; + if(pKey) + pKey->status = Token::INVALID; + } + + // CanInsertPotentialSimpleKey + bool Scanner::CanInsertPotentialSimpleKey() const + { + if(!m_simpleKeyAllowed) + return false; + + return !ExistsActiveSimpleKey(); + } + + // ExistsActiveSimpleKey + // . Returns true if there's a potential simple key at our flow level + // (there's allowed at most one per flow level, i.e., at the start of the flow start token) + bool Scanner::ExistsActiveSimpleKey() const + { + if(m_simpleKeys.empty()) + return false; + + const SimpleKey& key = m_simpleKeys.top(); + return key.flowLevel == GetFlowLevel(); + } + + // InsertPotentialSimpleKey + // . If we can, add a potential simple key to the queue, + // and save it on a stack. + void Scanner::InsertPotentialSimpleKey() + { + if(!CanInsertPotentialSimpleKey()) + return; + + SimpleKey key(INPUT.mark(), GetFlowLevel()); + + // first add a map start, if necessary + if(InBlockContext()) { + key.pIndent = PushIndentTo(INPUT.column(), IndentMarker::MAP); + if(key.pIndent) { + key.pIndent->status = IndentMarker::UNKNOWN; + key.pMapStart = key.pIndent->pStartToken; + key.pMapStart->status = Token::UNVERIFIED; + } + } + + // then add the (now unverified) key + m_tokens.push(Token(Token::KEY, INPUT.mark())); + key.pKey = &m_tokens.back(); + key.pKey->status = Token::UNVERIFIED; + + m_simpleKeys.push(key); + } + + // InvalidateSimpleKey + // . Automatically invalidate the simple key in our flow level + void Scanner::InvalidateSimpleKey() + { + if(m_simpleKeys.empty()) + return; + + // grab top key + SimpleKey& key = m_simpleKeys.top(); + if(key.flowLevel != GetFlowLevel()) + return; + + key.Invalidate(); + m_simpleKeys.pop(); + } + + // VerifySimpleKey + // . Determines whether the latest simple key to be added is valid, + // and if so, makes it valid. + bool Scanner::VerifySimpleKey() + { + if(m_simpleKeys.empty()) + return false; + + // grab top key + SimpleKey key = m_simpleKeys.top(); + + // only validate if we're in the correct flow level + if(key.flowLevel != GetFlowLevel()) + return false; + + m_simpleKeys.pop(); + + bool isValid = true; + + // needs to be less than 1024 characters and inline + if(INPUT.line() != key.mark.line || INPUT.pos() - key.mark.pos > 1024) + isValid = false; + + // invalidate key + if(isValid) + key.Validate(); + else + key.Invalidate(); + + return isValid; + } + + void Scanner::PopAllSimpleKeys() + { + while(!m_simpleKeys.empty()) + m_simpleKeys.pop(); + } +} + diff --git a/yaml-cpp/src/singledocparser.cpp b/yaml-cpp/src/singledocparser.cpp new file mode 100644 index 0000000..47759c3 --- /dev/null +++ b/yaml-cpp/src/singledocparser.cpp @@ -0,0 +1,381 @@ +#include "singledocparser.h" +#include "collectionstack.h" +#include "directives.h" +#include "yaml-cpp/eventhandler.h" +#include "yaml-cpp/exceptions.h" +#include "scanner.h" +#include "tag.h" +#include "token.h" +#include +#include +#include + +namespace YAML +{ + SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives): m_scanner(scanner), m_directives(directives), m_pCollectionStack(new CollectionStack), m_curAnchor(0) + { + } + + SingleDocParser::~SingleDocParser() + { + } + + // HandleDocument + // . Handles the next document + // . Throws a ParserException on error. + void SingleDocParser::HandleDocument(EventHandler& eventHandler) + { + assert(!m_scanner.empty()); // guaranteed that there are tokens + assert(!m_curAnchor); + + eventHandler.OnDocumentStart(m_scanner.peek().mark); + + // eat doc start + if(m_scanner.peek().type == Token::DOC_START) + m_scanner.pop(); + + // recurse! + HandleNode(eventHandler); + + eventHandler.OnDocumentEnd(); + + // and finally eat any doc ends we see + while(!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END) + m_scanner.pop(); + } + + void SingleDocParser::HandleNode(EventHandler& eventHandler) + { + // an empty node *is* a possibility + if(m_scanner.empty()) { + eventHandler.OnNull(Mark::null(), NullAnchor); + return; + } + + // save location + Mark mark = m_scanner.peek().mark; + + // special case: a value node by itself must be a map, with no header + if(m_scanner.peek().type == Token::VALUE) { + eventHandler.OnMapStart(mark, "", NullAnchor); + HandleMap(eventHandler); + eventHandler.OnMapEnd(); + return; + } + + // special case: an alias node + if(m_scanner.peek().type == Token::ALIAS) { + eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value)); + m_scanner.pop(); + return; + } + + std::string tag; + anchor_t anchor; + ParseProperties(tag, anchor); + + const Token& token = m_scanner.peek(); + + // add non-specific tags + if(tag.empty()) + tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?"); + + // now split based on what kind of node we should be + switch(token.type) { + case Token::PLAIN_SCALAR: + case Token::NON_PLAIN_SCALAR: + eventHandler.OnScalar(mark, tag, anchor, token.value); + m_scanner.pop(); + return; + case Token::FLOW_SEQ_START: + case Token::BLOCK_SEQ_START: + eventHandler.OnSequenceStart(mark, tag, anchor); + HandleSequence(eventHandler); + eventHandler.OnSequenceEnd(); + return; + case Token::FLOW_MAP_START: + case Token::BLOCK_MAP_START: + eventHandler.OnMapStart(mark, tag, anchor); + HandleMap(eventHandler); + eventHandler.OnMapEnd(); + return; + case Token::KEY: + // compact maps can only go in a flow sequence + if(m_pCollectionStack->GetCurCollectionType() == CollectionType::FlowSeq) { + eventHandler.OnMapStart(mark, tag, anchor); + HandleMap(eventHandler); + eventHandler.OnMapEnd(); + return; + } + break; + default: + break; + } + + if(tag == "?") + eventHandler.OnNull(mark, anchor); + else + eventHandler.OnScalar(mark, tag, anchor, ""); + } + + void SingleDocParser::HandleSequence(EventHandler& eventHandler) + { + // split based on start token + switch(m_scanner.peek().type) { + case Token::BLOCK_SEQ_START: HandleBlockSequence(eventHandler); break; + case Token::FLOW_SEQ_START: HandleFlowSequence(eventHandler); break; + default: break; + } + } + + void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) + { + // eat start token + m_scanner.pop(); + m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq); + + while(1) { + if(m_scanner.empty()) + throw ParserException(Mark::null(), ErrorMsg::END_OF_SEQ); + + Token token = m_scanner.peek(); + if(token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END) + throw ParserException(token.mark, ErrorMsg::END_OF_SEQ); + + m_scanner.pop(); + if(token.type == Token::BLOCK_SEQ_END) + break; + + // check for null + if(!m_scanner.empty()) { + const Token& token = m_scanner.peek(); + if(token.type == Token::BLOCK_ENTRY || token.type == Token::BLOCK_SEQ_END) { + eventHandler.OnNull(token.mark, NullAnchor); + continue; + } + } + + HandleNode(eventHandler); + } + + m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq); + } + + void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) + { + // eat start token + m_scanner.pop(); + m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq); + + while(1) { + if(m_scanner.empty()) + throw ParserException(Mark::null(), ErrorMsg::END_OF_SEQ_FLOW); + + // first check for end + if(m_scanner.peek().type == Token::FLOW_SEQ_END) { + m_scanner.pop(); + break; + } + + // then read the node + HandleNode(eventHandler); + + // now eat the separator (or could be a sequence end, which we ignore - but if it's neither, then it's a bad node) + Token& token = m_scanner.peek(); + if(token.type == Token::FLOW_ENTRY) + m_scanner.pop(); + else if(token.type != Token::FLOW_SEQ_END) + throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW); + } + + m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq); + } + + void SingleDocParser::HandleMap(EventHandler& eventHandler) + { + // split based on start token + switch(m_scanner.peek().type) { + case Token::BLOCK_MAP_START: HandleBlockMap(eventHandler); break; + case Token::FLOW_MAP_START: HandleFlowMap(eventHandler); break; + case Token::KEY: HandleCompactMap(eventHandler); break; + case Token::VALUE: HandleCompactMapWithNoKey(eventHandler); break; + default: break; + } + } + + void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) + { + // eat start token + m_scanner.pop(); + m_pCollectionStack->PushCollectionType(CollectionType::BlockMap); + + while(1) { + if(m_scanner.empty()) + throw ParserException(Mark::null(), ErrorMsg::END_OF_MAP); + + Token token = m_scanner.peek(); + if(token.type != Token::KEY && token.type != Token::VALUE && token.type != Token::BLOCK_MAP_END) + throw ParserException(token.mark, ErrorMsg::END_OF_MAP); + + if(token.type == Token::BLOCK_MAP_END) { + m_scanner.pop(); + break; + } + + // grab key (if non-null) + if(token.type == Token::KEY) { + m_scanner.pop(); + HandleNode(eventHandler); + } else { + eventHandler.OnNull(token.mark, NullAnchor); + } + + // now grab value (optional) + if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) { + m_scanner.pop(); + HandleNode(eventHandler); + } else { + eventHandler.OnNull(token.mark, NullAnchor); + } + } + + m_pCollectionStack->PopCollectionType(CollectionType::BlockMap); + } + + void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) + { + // eat start token + m_scanner.pop(); + m_pCollectionStack->PushCollectionType(CollectionType::FlowMap); + + while(1) { + if(m_scanner.empty()) + throw ParserException(Mark::null(), ErrorMsg::END_OF_MAP_FLOW); + + Token& token = m_scanner.peek(); + // first check for end + if(token.type == Token::FLOW_MAP_END) { + m_scanner.pop(); + break; + } + + // grab key (if non-null) + if(token.type == Token::KEY) { + m_scanner.pop(); + HandleNode(eventHandler); + } else { + eventHandler.OnNull(token.mark, NullAnchor); + } + + // now grab value (optional) + if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) { + m_scanner.pop(); + HandleNode(eventHandler); + } else { + eventHandler.OnNull(token.mark, NullAnchor); + } + + // now eat the separator (or could be a map end, which we ignore - but if it's neither, then it's a bad node) + Token& nextToken = m_scanner.peek(); + if(nextToken.type == Token::FLOW_ENTRY) + m_scanner.pop(); + else if(nextToken.type != Token::FLOW_MAP_END) + throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW); + } + + m_pCollectionStack->PopCollectionType(CollectionType::FlowMap); + } + + // . Single "key: value" pair in a flow sequence + void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) + { + m_pCollectionStack->PushCollectionType(CollectionType::CompactMap); + + // grab key + Mark mark = m_scanner.peek().mark; + m_scanner.pop(); + HandleNode(eventHandler); + + // now grab value (optional) + if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) { + m_scanner.pop(); + HandleNode(eventHandler); + } else { + eventHandler.OnNull(mark, NullAnchor); + } + + m_pCollectionStack->PopCollectionType(CollectionType::CompactMap); + } + + // . Single ": value" pair in a flow sequence + void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) + { + m_pCollectionStack->PushCollectionType(CollectionType::CompactMap); + + // null key + eventHandler.OnNull(m_scanner.peek().mark, NullAnchor); + + // grab value + m_scanner.pop(); + HandleNode(eventHandler); + + m_pCollectionStack->PopCollectionType(CollectionType::CompactMap); + } + + // ParseProperties + // . Grabs any tag or anchor tokens and deals with them. + void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor) + { + tag.clear(); + anchor = NullAnchor; + + while(1) { + if(m_scanner.empty()) + return; + + switch(m_scanner.peek().type) { + case Token::TAG: ParseTag(tag); break; + case Token::ANCHOR: ParseAnchor(anchor); break; + default: return; + } + } + } + + void SingleDocParser::ParseTag(std::string& tag) + { + Token& token = m_scanner.peek(); + if(!tag.empty()) + throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS); + + Tag tagInfo(token); + tag = tagInfo.Translate(m_directives); + m_scanner.pop(); + } + + void SingleDocParser::ParseAnchor(anchor_t& anchor) + { + Token& token = m_scanner.peek(); + if(anchor) + throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS); + + anchor = RegisterAnchor(token.value); + m_scanner.pop(); + } + + anchor_t SingleDocParser::RegisterAnchor(const std::string& name) + { + if(name.empty()) + return NullAnchor; + + return m_anchors[name] = ++m_curAnchor; + } + + anchor_t SingleDocParser::LookupAnchor(const Mark& mark, const std::string& name) const + { + Anchors::const_iterator it = m_anchors.find(name); + if(it == m_anchors.end()) + throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR); + + return it->second; + } +} diff --git a/yaml-cpp/src/singledocparser.h b/yaml-cpp/src/singledocparser.h new file mode 100644 index 0000000..3798dcc --- /dev/null +++ b/yaml-cpp/src/singledocparser.h @@ -0,0 +1,65 @@ +#ifndef SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/anchor.h" +#include "yaml-cpp/noncopyable.h" +#include +#include +#include + +namespace YAML +{ + struct Directives; + struct Mark; + struct Token; + class CollectionStack; + class EventHandler; + class Node; + class Scanner; + + class SingleDocParser: private noncopyable + { + public: + SingleDocParser(Scanner& scanner, const Directives& directives); + ~SingleDocParser(); + + void HandleDocument(EventHandler& eventHandler); + + private: + void HandleNode(EventHandler& eventHandler); + + void HandleSequence(EventHandler& eventHandler); + void HandleBlockSequence(EventHandler& eventHandler); + void HandleFlowSequence(EventHandler& eventHandler); + + void HandleMap(EventHandler& eventHandler); + void HandleBlockMap(EventHandler& eventHandler); + void HandleFlowMap(EventHandler& eventHandler); + void HandleCompactMap(EventHandler& eventHandler); + void HandleCompactMapWithNoKey(EventHandler& eventHandler); + + void ParseProperties(std::string& tag, anchor_t& anchor); + void ParseTag(std::string& tag); + void ParseAnchor(anchor_t& anchor); + + anchor_t RegisterAnchor(const std::string& name); + anchor_t LookupAnchor(const Mark& mark, const std::string& name) const; + + private: + Scanner& m_scanner; + const Directives& m_directives; + std::auto_ptr m_pCollectionStack; + + typedef std::map Anchors; + Anchors m_anchors; + + anchor_t m_curAnchor; + }; +} + +#endif // SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/stream.cpp b/yaml-cpp/src/stream.cpp new file mode 100644 index 0000000..5cfb1bb --- /dev/null +++ b/yaml-cpp/src/stream.cpp @@ -0,0 +1,448 @@ +#include "stream.h" +#include +#include "exp.h" + +#ifndef YAML_PREFETCH_SIZE +#define YAML_PREFETCH_SIZE 2048 +#endif + +#define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A))) +#define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A)) + +#define CP_REPLACEMENT_CHARACTER (0xFFFD) + +namespace YAML +{ + enum UtfIntroState { + uis_start, + uis_utfbe_b1, + uis_utf32be_b2, + uis_utf32be_bom3, + uis_utf32be, + uis_utf16be, + uis_utf16be_bom1, + uis_utfle_bom1, + uis_utf16le_bom2, + uis_utf32le_bom3, + uis_utf16le, + uis_utf32le, + uis_utf8_imp, + uis_utf16le_imp, + uis_utf32le_imp3, + uis_utf8_bom1, + uis_utf8_bom2, + uis_utf8, + uis_error + }; + + enum UtfIntroCharType { + uict00, + uictBB, + uictBF, + uictEF, + uictFE, + uictFF, + uictAscii, + uictOther, + uictMax + }; + + static bool s_introFinalState[] = { + false, //uis_start + false, //uis_utfbe_b1 + false, //uis_utf32be_b2 + false, //uis_utf32be_bom3 + true, //uis_utf32be + true, //uis_utf16be + false, //uis_utf16be_bom1 + false, //uis_utfle_bom1 + false, //uis_utf16le_bom2 + false, //uis_utf32le_bom3 + true, //uis_utf16le + true, //uis_utf32le + false, //uis_utf8_imp + false, //uis_utf16le_imp + false, //uis_utf32le_imp3 + false, //uis_utf8_bom1 + false, //uis_utf8_bom2 + true, //uis_utf8 + true, //uis_error + }; + + static UtfIntroState s_introTransitions[][uictMax] = { + // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther + {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8}, + {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8}, + {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, uis_utf8}, + {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be}, + {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le}, + {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, + {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, + }; + + static char s_introUngetCount[][uictMax] = { + // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther + {0, 1, 1, 0, 0, 0, 0, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 0, 3, 3, 3}, + {4, 4, 4, 4, 4, 0, 4, 4}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 0, 2, 2}, + {2, 2, 2, 2, 0, 2, 2, 2}, + {0, 1, 1, 1, 1, 1, 1, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {0, 2, 2, 2, 2, 2, 2, 2}, + {0, 3, 3, 3, 3, 3, 3, 3}, + {4, 4, 4, 4, 4, 4, 4, 4}, + {2, 0, 2, 2, 2, 2, 2, 2}, + {3, 3, 0, 3, 3, 3, 3, 3}, + {1, 1, 1, 1, 1, 1, 1, 1}, + }; + + inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) + { + if (std::istream::traits_type::eof() == ch) { + return uictOther; + } + + switch (ch) { + case 0: return uict00; + case 0xBB: return uictBB; + case 0xBF: return uictBF; + case 0xEF: return uictEF; + case 0xFE: return uictFE; + case 0xFF: return uictFF; + } + + if ((ch > 0) && (ch < 0xFF)) { + return uictAscii; + } + + return uictOther; + } + + inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift) + { + const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits); + const unsigned char mask = (0xFF >> (lead_bits + 1)); + return static_cast(static_cast( + header | ((ch >> rshift) & mask) + )); + } + + inline void QueueUnicodeCodepoint(std::deque& q, unsigned long ch) + { + // We are not allowed to queue the Stream::eof() codepoint, so + // replace it with CP_REPLACEMENT_CHARACTER + if (static_cast(Stream::eof()) == ch) + { + ch = CP_REPLACEMENT_CHARACTER; + } + + if (ch < 0x80) + { + q.push_back(Utf8Adjust(ch, 0, 0)); + } + else if (ch < 0x800) + { + q.push_back(Utf8Adjust(ch, 2, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + else if (ch < 0x10000) + { + q.push_back(Utf8Adjust(ch, 3, 12)); + q.push_back(Utf8Adjust(ch, 1, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + else + { + q.push_back(Utf8Adjust(ch, 4, 18)); + q.push_back(Utf8Adjust(ch, 1, 12)); + q.push_back(Utf8Adjust(ch, 1, 6)); + q.push_back(Utf8Adjust(ch, 1, 0)); + } + } + + Stream::Stream(std::istream& input) + : m_input(input), + m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), + m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0) + { + typedef std::istream::traits_type char_traits; + + if(!input) + return; + + // Determine (or guess) the character-set by reading the BOM, if any. See + // the YAML specification for the determination algorithm. + char_traits::int_type intro[4]; + int nIntroUsed = 0; + UtfIntroState state = uis_start; + for(; !s_introFinalState[state]; ) { + std::istream::int_type ch = input.get(); + intro[nIntroUsed++] = ch; + UtfIntroCharType charType = IntroCharTypeOf(ch); + UtfIntroState newState = s_introTransitions[state][charType]; + int nUngets = s_introUngetCount[state][charType]; + if(nUngets > 0) { + input.clear(); + for(; nUngets > 0; --nUngets) { + if(char_traits::eof() != intro[--nIntroUsed]) + input.putback(char_traits::to_char_type(intro[nIntroUsed])); + } + } + state = newState; + } + + switch (state) { + case uis_utf8: m_charSet = utf8; break; + case uis_utf16le: m_charSet = utf16le; break; + case uis_utf16be: m_charSet = utf16be; break; + case uis_utf32le: m_charSet = utf32le; break; + case uis_utf32be: m_charSet = utf32be; break; + default: m_charSet = utf8; break; + } + + ReadAheadTo(0); + } + + Stream::~Stream() + { + delete[] m_pPrefetched; + } + + char Stream::peek() const + { + if (m_readahead.empty()) + { + return Stream::eof(); + } + + return m_readahead[0]; + } + + Stream::operator bool() const + { + return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof()); + } + + // get + // . Extracts a character from the stream and updates our position + char Stream::get() + { + char ch = peek(); + AdvanceCurrent(); + m_mark.column++; + + if(ch == '\n') { + m_mark.column = 0; + m_mark.line++; + } + + return ch; + } + + // get + // . Extracts 'n' characters from the stream and updates our position + std::string Stream::get(int n) + { + std::string ret; + ret.reserve(n); + for(int i=0;i i; + } + + void Stream::StreamInUtf8() const + { + unsigned char b = GetNextByte(); + if (m_input.good()) + { + m_readahead.push_back(b); + } + } + + void Stream::StreamInUtf16() const + { + unsigned long ch = 0; + unsigned char bytes[2]; + int nBigEnd = (m_charSet == utf16be) ? 0 : 1; + + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + if (!m_input.good()) + { + return; + } + ch = (static_cast(bytes[nBigEnd]) << 8) | + static_cast(bytes[1 ^ nBigEnd]); + + if (ch >= 0xDC00 && ch < 0xE000) + { + // Trailing (low) surrogate...ugh, wrong order + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + return; + } + else if (ch >= 0xD800 && ch < 0xDC00) + { + // ch is a leading (high) surrogate + + // Four byte UTF-8 code point + + // Read the trailing (low) surrogate + for (;;) + { + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + if (!m_input.good()) + { + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + return; + } + unsigned long chLow = (static_cast(bytes[nBigEnd]) << 8) | + static_cast(bytes[1 ^ nBigEnd]); + if (chLow < 0xDC00 || ch >= 0xE000) + { + // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the stream. + QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); + + // Deal with the next UTF-16 unit + if (chLow < 0xD800 || ch >= 0xE000) + { + // Easiest case: queue the codepoint and return + QueueUnicodeCodepoint(m_readahead, ch); + return; + } + else + { + // Start the loop over with the new high surrogate + ch = chLow; + continue; + } + } + + // Select the payload bits from the high surrogate + ch &= 0x3FF; + ch <<= 10; + + // Include bits from low surrogate + ch |= (chLow & 0x3FF); + + // Add the surrogacy offset + ch += 0x10000; + } + } + + QueueUnicodeCodepoint(m_readahead, ch); + } + + inline char* ReadBuffer(unsigned char* pBuffer) + { + return reinterpret_cast(pBuffer); + } + + unsigned char Stream::GetNextByte() const + { + if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) + { + std::streambuf *pBuf = m_input.rdbuf(); + m_nPrefetchedAvailable = pBuf->sgetn(ReadBuffer(m_pPrefetched), + YAML_PREFETCH_SIZE); + m_nPrefetchedUsed = 0; + if (!m_nPrefetchedAvailable) + { + m_input.setstate(std::ios_base::eofbit); + } + + if (0 == m_nPrefetchedAvailable) + { + return 0; + } + } + + return m_pPrefetched[m_nPrefetchedUsed++]; + } + + void Stream::StreamInUtf32() const + { + static int indexes[2][4] = { + {3, 2, 1, 0}, + {0, 1, 2, 3} + }; + + unsigned long ch = 0; + unsigned char bytes[4]; + int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0]; + + bytes[0] = GetNextByte(); + bytes[1] = GetNextByte(); + bytes[2] = GetNextByte(); + bytes[3] = GetNextByte(); + if (!m_input.good()) + { + return; + } + + for (int i = 0; i < 4; ++i) + { + ch <<= 8; + ch |= bytes[pIndexes[i]]; + } + + QueueUnicodeCodepoint(m_readahead, ch); + } +} diff --git a/yaml-cpp/src/stream.h b/yaml-cpp/src/stream.h new file mode 100644 index 0000000..87f48dc --- /dev/null +++ b/yaml-cpp/src/stream.h @@ -0,0 +1,79 @@ +#ifndef STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/noncopyable.h" +#include "yaml-cpp/mark.h" +#include +#include +#include +#include +#include +#include + +namespace YAML +{ + class Stream: private noncopyable + { + public: + friend class StreamCharSource; + + Stream(std::istream& input); + ~Stream(); + + operator bool() const; + bool operator !() const { return !static_cast (*this); } + + char peek() const; + char get(); + std::string get(int n); + void eat(int n = 1); + + static char eof() { return 0x04; } + + const Mark mark() const { return m_mark; } + int pos() const { return m_mark.pos; } + int line() const { return m_mark.line; } + int column() const { return m_mark.column; } + void ResetColumn() { m_mark.column = 0; } + + private: + enum CharacterSet {utf8, utf16le, utf16be, utf32le, utf32be}; + + std::istream& m_input; + Mark m_mark; + + CharacterSet m_charSet; + mutable std::deque m_readahead; + unsigned char* const m_pPrefetched; + mutable size_t m_nPrefetchedAvailable; + mutable size_t m_nPrefetchedUsed; + + void AdvanceCurrent(); + char CharAt(size_t i) const; + bool ReadAheadTo(size_t i) const; + bool _ReadAheadTo(size_t i) const; + void StreamInUtf8() const; + void StreamInUtf16() const; + void StreamInUtf32() const; + unsigned char GetNextByte() const; + }; + + // CharAt + // . Unchecked access + inline char Stream::CharAt(size_t i) const { + return m_readahead[i]; + } + + inline bool Stream::ReadAheadTo(size_t i) const { + if(m_readahead.size() > i) + return true; + return _ReadAheadTo(i); + } +} + +#endif // STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/streamcharsource.h b/yaml-cpp/src/streamcharsource.h new file mode 100644 index 0000000..21fae4e --- /dev/null +++ b/yaml-cpp/src/streamcharsource.h @@ -0,0 +1,48 @@ +#ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/noncopyable.h" +#include + +namespace YAML +{ + class StreamCharSource + { + public: + StreamCharSource(const Stream& stream): m_offset(0), m_stream(stream) {} + StreamCharSource(const StreamCharSource& source): m_offset(source.m_offset), m_stream(source.m_stream) {} + ~StreamCharSource() {} + + operator bool() const; + char operator [] (std::size_t i) const { return m_stream.CharAt(m_offset + i); } + bool operator !() const { return !static_cast(*this); } + + const StreamCharSource operator + (int i) const; + + private: + std::size_t m_offset; + const Stream& m_stream; + + StreamCharSource& operator = (const StreamCharSource&); // non-assignable + }; + + inline StreamCharSource::operator bool() const { + return m_stream.ReadAheadTo(m_offset); + } + + inline const StreamCharSource StreamCharSource::operator + (int i) const { + StreamCharSource source(*this); + if(static_cast (source.m_offset) + i >= 0) + source.m_offset += i; + else + source.m_offset = 0; + return source; + } +} + +#endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/stringsource.h b/yaml-cpp/src/stringsource.h new file mode 100644 index 0000000..21be3c9 --- /dev/null +++ b/yaml-cpp/src/stringsource.h @@ -0,0 +1,47 @@ +#ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include + +namespace YAML +{ + class StringCharSource + { + public: + StringCharSource(const char *str, std::size_t size): m_str(str), m_size(size), m_offset(0) {} + + operator bool() const { return m_offset < m_size; } + char operator [] (std::size_t i) const { return m_str[m_offset + i]; } + bool operator !() const { return !static_cast(*this); } + + const StringCharSource operator + (int i) const { + StringCharSource source(*this); + if(static_cast (source.m_offset) + i >= 0) + source.m_offset += i; + else + source.m_offset = 0; + return source; + } + + StringCharSource& operator ++ () { + ++m_offset; + return *this; + } + + StringCharSource& operator += (std::size_t offset) { + m_offset += offset; + return *this; + } + private: + const char *m_str; + std::size_t m_size; + std::size_t m_offset; + }; +} + +#endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/tag.cpp b/yaml-cpp/src/tag.cpp new file mode 100644 index 0000000..82a4704 --- /dev/null +++ b/yaml-cpp/src/tag.cpp @@ -0,0 +1,52 @@ +#include "tag.h" +#include "directives.h" +#include "token.h" +#include +#include + +namespace YAML +{ + Tag::Tag(const Token& token): type(static_cast(token.data)) + { + switch(type) { + case VERBATIM: + value = token.value; + break; + case PRIMARY_HANDLE: + value = token.value; + break; + case SECONDARY_HANDLE: + value = token.value; + break; + case NAMED_HANDLE: + handle = token.value; + value = token.params[0]; + break; + case NON_SPECIFIC: + break; + default: + assert(false); + } + } + + const std::string Tag::Translate(const Directives& directives) + { + switch(type) { + case VERBATIM: + return value; + case PRIMARY_HANDLE: + return directives.TranslateTagHandle("!") + value; + case SECONDARY_HANDLE: + return directives.TranslateTagHandle("!!") + value; + case NAMED_HANDLE: + return directives.TranslateTagHandle("!" + handle + "!") + value; + case NON_SPECIFIC: + // TODO: + return "!"; + default: + assert(false); + } + throw std::runtime_error("yaml-cpp: internal error, bad tag type"); + } +} + diff --git a/yaml-cpp/src/tag.h b/yaml-cpp/src/tag.h new file mode 100644 index 0000000..5f77548 --- /dev/null +++ b/yaml-cpp/src/tag.h @@ -0,0 +1,28 @@ +#ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + +#include + +namespace YAML +{ + struct Token; + struct Directives; + + struct Tag { + enum TYPE { + VERBATIM, PRIMARY_HANDLE, SECONDARY_HANDLE, NAMED_HANDLE, NON_SPECIFIC + }; + + Tag(const Token& token); + const std::string Translate(const Directives& directives); + + TYPE type; + std::string handle, value; + }; +} + +#endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/src/token.h b/yaml-cpp/src/token.h new file mode 100644 index 0000000..9807e25 --- /dev/null +++ b/yaml-cpp/src/token.h @@ -0,0 +1,85 @@ +#ifndef TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66 +#define TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66 + +#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 +#pragma once +#endif + + +#include "yaml-cpp/mark.h" +#include +#include +#include + +namespace YAML +{ + const std::string TokenNames[] = { + "DIRECTIVE", + "DOC_START", + "DOC_END", + "BLOCK_SEQ_START", + "BLOCK_MAP_START", + "BLOCK_SEQ_END", + "BLOCK_MAP_END", + "BLOCK_ENTRY", + "FLOW_SEQ_START", + "FLOW_MAP_START", + "FLOW_SEQ_END", + "FLOW_MAP_END", + "FLOW_MAP_COMPACT", + "FLOW_ENTRY", + "KEY", + "VALUE", + "ANCHOR", + "ALIAS", + "TAG", + "SCALAR" + }; + + struct Token { + // enums + enum STATUS { VALID, INVALID, UNVERIFIED }; + enum TYPE { + DIRECTIVE, + DOC_START, + DOC_END, + BLOCK_SEQ_START, + BLOCK_MAP_START, + BLOCK_SEQ_END, + BLOCK_MAP_END, + BLOCK_ENTRY, + FLOW_SEQ_START, + FLOW_MAP_START, + FLOW_SEQ_END, + FLOW_MAP_END, + FLOW_MAP_COMPACT, + FLOW_ENTRY, + KEY, + VALUE, + ANCHOR, + ALIAS, + TAG, + PLAIN_SCALAR, + NON_PLAIN_SCALAR + }; + + // data + Token(TYPE type_, const Mark& mark_): status(VALID), type(type_), mark(mark_), data(0) {} + + friend std::ostream& operator << (std::ostream& out, const Token& token) { + out << TokenNames[token.type] << std::string(": ") << token.value; + for(std::size_t i=0;i params; + int data; + }; +} + +#endif // TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66 diff --git a/yaml-cpp/util/api.cpp b/yaml-cpp/util/api.cpp new file mode 100644 index 0000000..e5180a8 --- /dev/null +++ b/yaml-cpp/util/api.cpp @@ -0,0 +1,129 @@ +// a sketch of what the new API might look like + +#include "yaml-cpp/yaml.h" +#include + +int main() +{ + { + // test.yaml + // - foo + // - primes: [2, 3, 5, 7, 11] + // odds: [1, 3, 5, 7, 9, 11] + // - [x, y] + + // move-like semantics + YAML::Value root = YAML::Parse("test.yaml"); + + std::cout << root[0].as(); // "foo" + std::cout << str(root[0]); // "foo", shorthand? + std::cout << root[1]["primes"][3].as(); // "7" + std::cout << root[1]["odds"][6].as(); // throws? + + root[2].push_back(5); + root[3] = "Hello, World"; + root[0].reset(); + root[0]["key"] = "value"; + + std::cout << root; + // # not sure about formatting + // - {key: value} + // - primes: [2, 3, 5, 7, 11] + // odds: [1, 3, 5, 7, 9, 11] + // - [x, y, 5] + // - Hello, World + } + + { + // for all copy-like commands, think of python's "name/value" semantics + YAML::Value root = "Hello"; // Hello + root = YAML::Sequence(); // [] + root[0] = 0; // [0] + root[2] = "two"; // [0, ~, two] # forces root[1] to be initialized to null + + YAML::Value other = root; // both point to the same thing + other[0] = 5; // now root[0] is 0 also + other.push_back(root); // &1 [5, ~, two, *1] + other[3][0] = 0; // &1 [0, ~, two, *1] # since it's a true alias + other.push_back(Copy(root)); // &1 [0, ~, two, *1, &2 [0, ~, two, *2]] + other[4][0] = 5; // &1 [0, ~, two, *1, &2 [5, ~, two, *2]] # they're really different + } + + { + YAML::Value node; // ~ + node[0] = 1; // [1] # auto-construct a sequence + node["key"] = 5; // {0: 1, key: 5} # auto-turn it into a map + node.push_back(10); // error, can't turn a map into a sequence + node.erase("key"); // {0: 1} # still a map, even if we remove the key that caused the problem + node = "Hello"; // Hello # assignment overwrites everything, so it's now just a plain scalar + } + + { + YAML::Value map; // ~ + map[3] = 1; // {3: 1} # auto-constructs a map, *not* a sequence + + YAML::Value seq; // ~ + seq = YAML::Sequence(); // [] + seq[3] = 1; // [~, ~, ~, 1] + } + + { + YAML::Value node; // ~ + node[0] = node; // &1 [*1] # fun stuff + } + + { + YAML::Value node; + YAML::Value subnode = node["key"]; // 'subnode' is not instantiated ('node' is still null) + subnode = "value"; // {key: value} # now it is + YAML::Value subnode2 = node["key2"]; + node["key3"] = subnode2; // subnode2 is still not instantiated, but node["key3"] is "pseudo" aliased to it + subnode2 = "monkey"; // {key: value, key2: &1 monkey, key3: *1} # bam! it instantiates both + } + + { + YAML::Value seq = YAML::Sequence(); + seq[0] = "zero"; // [zero] + seq[1] = seq[0]; // [&1 zero, *1] + seq[0] = seq[1]; // [&1 zero, *1] # no-op (they both alias the same thing, so setting them equal is nothing) + Is(seq[0], seq[1]); // true + seq[1] = "one"; // [&1 one, *1] + UnAlias(seq[1]); // [one, one] + Is(seq[0], seq[1]); // false + } + + { + YAML::Value root; + root.push_back("zero"); + root.push_back("one"); + root.push_back("two"); + YAML::Value two = root[2]; + root = "scalar"; // 'two' is still "two", even though 'root' is "scalar" (the sequence effectively no longer exists) + + // Note: in all likelihood, the memory for nodes "zero" and "one" is still allocated. How can it go away? Weak pointers? + } + + { + YAML::Value root; // ~ + root[0] = root; // &1 [*1] + root[0] = 5; // [5] + } + + { + YAML::Value root; + YAML::Value key; + key["key"] = "value"; + root[key] = key; // &1 {key: value}: *1 + } + + { + YAML::Value root; + root[0] = "hi"; + root[1][0] = "bye"; + root[1][1] = root; // &1 [hi, [bye, *1]] # root + YAML::Value sub = root[1]; // &1 [bye, [hi, *1]] # sub + root = "gone"; // [bye, gone] # sub + } + + return 0; +} diff --git a/yaml-cpp/util/parse.cpp b/yaml-cpp/util/parse.cpp new file mode 100644 index 0000000..d02a76a --- /dev/null +++ b/yaml-cpp/util/parse.cpp @@ -0,0 +1,65 @@ +#include "yaml-cpp/yaml.h" +#include "yaml-cpp/eventhandler.h" +#include +#include +#include + +struct Params { + bool hasFile; + std::string fileName; +}; + +Params ParseArgs(int argc, char **argv) { + Params p; + + std::vector args(argv + 1, argv + argc); + + return p; +} + +class NullEventHandler: public YAML::EventHandler +{ +public: + virtual void OnDocumentStart(const YAML::Mark&) {} + virtual void OnDocumentEnd() {} + + virtual void OnNull(const YAML::Mark&, YAML::anchor_t) {} + virtual void OnAlias(const YAML::Mark&, YAML::anchor_t) {} + virtual void OnScalar(const YAML::Mark&, const std::string&, YAML::anchor_t, const std::string&) {} + + virtual void OnSequenceStart(const YAML::Mark&, const std::string&, YAML::anchor_t) {} + virtual void OnSequenceEnd() {} + + virtual void OnMapStart(const YAML::Mark&, const std::string&, YAML::anchor_t) {} + virtual void OnMapEnd() {} +}; + +void parse(std::istream& input) +{ + try { + YAML::Parser parser(input); + YAML::Node doc; + while(parser.GetNextDocument(doc)) { + YAML::Emitter emitter; + emitter << doc; + std::cout << emitter.c_str() << "\n"; + } + } catch(const YAML::Exception& e) { + std::cerr << e.what() << "\n"; + } +} + +int main(int argc, char **argv) +{ + Params p = ParseArgs(argc, argv); + + if(argc > 1) { + std::ifstream fin; + fin.open(argv[1]); + parse(fin); + } else { + parse(std::cin); + } + + return 0; +} diff --git a/yaml-cpp/yaml-cpp.pc.cmake b/yaml-cpp/yaml-cpp.pc.cmake new file mode 100644 index 0000000..04d343f --- /dev/null +++ b/yaml-cpp/yaml-cpp.pc.cmake @@ -0,0 +1,11 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=${prefix}/@LIB_INSTALL_DIR@ +includedir=${prefix}/@INCLUDE_INSTALL_ROOT_DIR@ + +Name: Yaml-cpp +Description: A YAML parser and emitter for C++ +Version: @YAML_CPP_VERSION@ +Requires: +Libs: -L${libdir} -lyaml-cpp +Cflags: -I${includedir}