PLYwoot
Header-only C++17 library for parsing and writing PLY files
Loading...
Searching...
No Matches
header_scanner.hpp
Go to the documentation of this file.
1/*
2 This file is part of PLYwoot, a header-only PLY parser.
3
4 Copyright (C) 2023-2025, Ton van den Heuvel
5
6 PLYwoot is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20#ifndef PLYWOOT_HEADER_SCANNER_HPP
21#define PLYWOOT_HEADER_SCANNER_HPP
22
24
25#include "std.hpp"
26
27#include <cstdint>
28#include <cstring>
29#include <istream>
30#include <string>
31#include <string_view>
32
33namespace plywoot {
34
37{
41 HeaderScannerException(const std::string &message) : Exception("scanner error: " + message) {}
42};
43
51
52}
53
54namespace plywoot::detail {
55
56static constexpr const char endHeaderToken[] = "end_header";
57
67// clang-format off
68constexpr bool isTokenDelimiter[256] = {
69 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
70 false, false, false, false, false, false, false, false, false, true, true, false, false, true, false, false, // 0 - 15
71 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 16 - 31
72 true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 32 - 47
73 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 48 - 63
74 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 64 - 79
75 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 80 - 95
76 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 96 - 111
77 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 112 - 127
78 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 128 - 143
79 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 144 - 159
80 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 160 - 175
81 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 176 - 191
82 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 192 - 207
83 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 208 - 223
84 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 224 - 239
85 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true , // 240 - 255
86};
87// clang-format on
88
91class HeaderScanner
92{
93public:
96 HeaderScanner(std::istream &is)
97 {
98 if (!is) { throw InvalidInputStream{}; }
99
100 std::string line;
101 while (bool(std::getline(is, line)) && line != endHeaderToken)
102 {
103 buffer_.append(line);
104 buffer_.push_back('\n');
105 }
106
107 if (line == endHeaderToken)
108 {
109 buffer_.append(line);
110 buffer_.push_back('\n');
111 }
112 else { buffer_.push_back(EOF); }
113
114 // Initialize the read head to the start of the buffered data.
115 c_ = buffer_.data();
116 }
117
119 enum class Token {
120 Unknown = 0,
121 Ascii,
122 BinaryBigEndian,
123 BinaryLittleEndian,
124 Char,
125 Comment,
126 Double,
127 Element,
128 EndHeader,
129 Eof,
130 Float,
131 FloatingPointNumber,
132 Format,
133 Identifier,
134 Int,
135 List,
136 MagicNumber,
137 Number,
138 Property,
139 Short,
140 UChar,
141 UInt,
142 UShort,
143 };
144
146 Token nextToken() noexcept
147 {
148 // Ignore all whitespace, read upto the first non-whitespace character.
149 const char *last = buffer_.data() + buffer_.size();
150 while (c_ < last && 0 <= *c_ && *c_ <= 0x20)
151 {
152 if (*c_ == '\n') ++line_;
153 ++c_;
154 }
155
156 // Read an identifier. After an identifier is read, the read head is
157 // positioned at the start of the next token or whitespace.
158 const char *tokenStart = c_;
159 while (!isTokenDelimiter[(unsigned char)*c_]) { c_++; }
160 tokenString_ = std::string_view(tokenStart, c_ - tokenStart);
161
162 // In case the identifier is one of the reserved keywords, handle it as
163 // such. Use first character for quick comparison.
164 switch (tokenString_.front())
165 {
166 case 'a':
167 token_ = !tokenString_.compare("ascii") ? Token::Ascii : Token::Identifier;
168 break;
169 case 'b':
170 if (!tokenString_.compare("binary_big_endian"))
171 token_ = Token::BinaryBigEndian;
172 else if (!tokenString_.compare("binary_little_endian"))
173 token_ = Token::BinaryLittleEndian;
174 else
175 token_ = Token::Identifier;
176 break;
177 case 'c':
178 if (!tokenString_.compare("char")) { token_ = Token::Char; }
179 else if (!tokenString_.compare("comment"))
180 {
181 token_ = Token::Comment;
182 readComment();
183 }
184 else { token_ = Token::Identifier; }
185 break;
186 case 'd':
187 token_ = (!tokenString_.compare("double") ? Token::Double : Token::Identifier);
188 break;
189 case 'e':
190 if (!tokenString_.compare("element"))
191 token_ = Token::Element;
192 else if (!tokenString_.compare("end_header"))
193 token_ = Token::EndHeader;
194 else
195 token_ = Token::Identifier;
196 break;
197 case 'f':
198 if (!tokenString_.compare("format"))
199 token_ = Token::Format;
200 else if (!tokenString_.compare("float") || !tokenString_.compare("float32"))
201 token_ = Token::Float;
202 else if (!tokenString_.compare("float64"))
203 token_ = Token::Double;
204 else
205 token_ = Token::Identifier;
206 break;
207 case 'l':
208 token_ = (!tokenString_.compare("list") ? Token::List : Token::Identifier);
209 break;
210 case 'i':
211 if (!tokenString_.compare("int") || !tokenString_.compare("int32"))
212 token_ = Token::Int;
213 else if (!tokenString_.compare("int8"))
214 token_ = Token::Char;
215 else if (!tokenString_.compare("int16"))
216 token_ = Token::Short;
217 else
218 token_ = Token::Identifier;
219 break;
220 case 'p':
221 if (!tokenString_.compare("ply"))
222 token_ = Token::MagicNumber;
223 else if (!tokenString_.compare("property"))
224 token_ = Token::Property;
225 else
226 token_ = Token::Identifier;
227 break;
228 case 's':
229 token_ = (!tokenString_.compare("short") ? Token::Short : Token::Identifier);
230 break;
231 case 'u':
232 if (!tokenString_.compare("uint8") || !tokenString_.compare("uchar"))
233 token_ = Token::UChar;
234 else if (!tokenString_.compare("uint16") || !tokenString_.compare("ushort"))
235 token_ = Token::UShort;
236 else if (!tokenString_.compare("uint32") || !tokenString_.compare("uint"))
237 token_ = Token::UInt;
238 else
239 token_ = Token::Identifier;
240 break;
241 case '-':
242 case '+':
243 case '.':
244 case '0':
245 case '1':
246 case '2':
247 case '3':
248 case '4':
249 case '5':
250 case '6':
251 case '7':
252 case '8':
253 case '9':
254 // TODO(ton): scientific notation for floating point numbers?
255 token_ = (tokenString_.find('.') != std::string::npos) ? Token::FloatingPointNumber : Token::Number;
256 break;
257 case EOF:
258 token_ = Token::Eof;
259 break;
260 default:
261 token_ = Token::Identifier;
262 break;
263 }
264
265 return token_;
266 }
267
269 static constexpr bool isKeyword(Token token)
270 {
271 switch (token)
272 {
273 case Token::Ascii:
274 case Token::BinaryBigEndian:
275 case Token::BinaryLittleEndian:
276 case Token::Char:
277 case Token::Double:
278 case Token::Element:
279 case Token::EndHeader:
280 case Token::Float:
281 case Token::Format:
282 case Token::Int:
283 case Token::List:
284 case Token::Property:
285 case Token::Short:
286 case Token::UChar:
287 case Token::UInt:
288 case Token::UShort:
289 return true;
290 default:
291 break;
292 }
293
294 return false;
295 }
296
300 Comment comment() const { return {line_, tokenString()}; }
301
303 Token token() const noexcept { return token_; }
304
306 std::size_t tokenNumber() const noexcept
307 {
308 return static_cast<std::size_t>(std::strtoull(tokenString_.data(), nullptr, 10));
309 }
310
312 std::string tokenString() const noexcept { return std::string(tokenString_.data(), tokenString_.size()); }
313
314private:
317 void readComment()
318 {
319 // Skip spaces and tabs and the first non-whitespace character.
320 const char *end = buffer_.data() + buffer_.size();
321 while (c_ < end && (*c_ == ' ' || *c_ == '\t')) { ++c_; }
322
323 const std::size_t remainingBytes = buffer_.size() - (c_ - buffer_.data());
324 const char *last = static_cast<const char *>(::memchr(c_, '\n', remainingBytes));
325 if (last != nullptr)
326 {
327 tokenString_ = std::string_view(c_, last - c_);
328 c_ = last;
329 }
330 }
331
333 std::string buffer_;
339 const char *c_{buffer_.data()};
340
342 Token token_{Token::Unknown};
344 std::string_view tokenString_;
346 std::uint32_t line_{0};
347};
348
349}
350
351#endif
Base class for all exceptions thrown by PLYwoot.
constexpr bool isTokenDelimiter[256]
Base class for all header scanner exceptions.
HeaderScannerException(const std::string &message)
InvalidInputStream()
Constructs an invalid input stream exception.