diff --git a/libdap-chain-global-db b/libdap-chain-global-db deleted file mode 160000 index b7223d98a7a262c2a7a56039e10c5fccd5fc79cb..0000000000000000000000000000000000000000 --- a/libdap-chain-global-db +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b7223d98a7a262c2a7a56039e10c5fccd5fc79cb diff --git a/libdap-chain-global-db/.gitignore b/libdap-chain-global-db/.gitignore new file mode 100755 index 0000000000000000000000000000000000000000..c6127b38c1aa25968a88db3940604d41529e4cf5 --- /dev/null +++ b/libdap-chain-global-db/.gitignore @@ -0,0 +1,52 @@ +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf diff --git a/libdap-chain-global-db/CMakeLists.txt b/libdap-chain-global-db/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..4f677d879eebfcf558c2237f5c5200fa526fb0cd --- /dev/null +++ b/libdap-chain-global-db/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.1) +project (dap_chain_global_db C) + +file(GLOB DAP_CHAIN_GLOBAL_DB_SRC *.c) +file(GLOB DAP_CHAIN_GLOBAL_DB_HDR *.h) + +if(WIN32) + include_directories(../3rdparty/wepoll/) + include_directories(../3rdparty/uthash/src/) +endif() + + +add_library(${PROJECT_NAME} STATIC ${DAP_CHAIN_GLOBAL_DB_SRC} ${DAP_CHAIN_GLOBAL_DB_HDR}) + +add_subdirectory(libdap-cuttdb) + +target_link_libraries(${PROJECT_NAME} dap_core dap_crypto dap_chain dap_chain_crypto sqlite3 dap_cuttdb json-c) + +target_include_directories(dap_chain_global_db INTERFACE .) + +#set(${PROJECT_NAME}_DEFINITIONS CACHE INTERNAL "${PROJECT_NAME}: Definitions" FORCE) + +#set(${PROJECT_NAME}_INCLUDE_DIRS ${PROJECT_SOURCE_DIR} CACHE INTERNAL "${PROJECT_NAME}: Include Directories" FORCE) diff --git a/libdap-chain-global-db/LICENSE b/libdap-chain-global-db/LICENSE new file mode 100755 index 0000000000000000000000000000000000000000..94a9ed024d3859793618152ea559a168bbcbb5e2 --- /dev/null +++ b/libdap-chain-global-db/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/libdap-chain-global-db/README.md b/libdap-chain-global-db/README.md new file mode 100755 index 0000000000000000000000000000000000000000..0cc1c25d41d34ffc88c7f96a7130c91a79922ceb --- /dev/null +++ b/libdap-chain-global-db/README.md @@ -0,0 +1,2 @@ +# libdap-chain-mine +DapChain mining library for PoW tokens diff --git a/libdap-chain-global-db/dap_chain_global_db.c b/libdap-chain-global-db/dap_chain_global_db.c new file mode 100755 index 0000000000000000000000000000000000000000..fa753cb3c3aad93d3bda43bf74d86473c12d2250 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db.c @@ -0,0 +1,911 @@ +/* + * Authors: + * Alexander Lysikov <alexander.lysikov@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * Kelvin Project https://github.com/kelvinblockchain +* Copyright (c) 2019-2020 + * All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <pthread.h> +#include <time.h> +#include <assert.h> +//#include <string.h> + +#include "uthash.h" + +#include "dap_chain_common.h" +#include "dap_strfuncs.h" +//#include "dap_chain_global_db_pvt.h" +#include "dap_chain_global_db_hist.h" +#include "dap_chain_global_db.h" + +#ifdef WIN32 +#include "registry.h" +#include <string.h> +#endif + +#ifndef MAX_PATH +#define MAX_PATH 120 +#endif + +#define LOG_TAG "dap_global_db" + + +// for access from several streams +//static pthread_mutex_t ldb_mutex_ = PTHREAD_MUTEX_INITIALIZER; + +static inline void lock() +{ + //pthread_mutex_lock(&ldb_mutex_); +} + +static inline void unlock() +{ + //pthread_mutex_unlock(&ldb_mutex_); +} + +// Callback table item +typedef struct history_group_item +{ + char prefix[32]; + uint8_t padding[7]; + bool auto_track; // Track history actions automaticly + dap_global_db_obj_callback_notify_t callback_notify; + void * callback_arg; + char *group_name_for_history; + UT_hash_handle hh; +} history_group_item_t; + +// Callback table item +typedef struct history_extra_group_item +{ + char *group_name; + char *group_name_for_history; + dap_global_db_obj_callback_notify_t callback_notify; + void * callback_arg; + UT_hash_handle hh; +} history_extra_group_item_t; + +// Tacked group callbacks +static history_group_item_t * s_history_group_items = NULL; +static char *s_storage_path = NULL; +static history_extra_group_item_t * s_history_extra_group_items = NULL; + +char * extract_group_prefix(const char * a_group); + +/** + * @brief extract_group_prefix + * @param a_group + * @return + */ +char * extract_group_prefix(const char* a_group) +{ + char * l_group_prefix = NULL, *l_delimeter; + size_t l_group_prefix_size; + +// l_delimeter = index(a_group, '.'); + l_delimeter = strchr(a_group, '.'); + + if(l_delimeter == NULL) { + l_group_prefix = dap_strdup(a_group); + l_group_prefix_size = dap_strlen(l_group_prefix) + 1; + } else { + l_group_prefix_size = (size_t) l_delimeter - (size_t) a_group; + if(l_group_prefix_size > 1) + l_group_prefix = strndup(a_group, l_group_prefix_size); + } + return l_group_prefix; +} + +/* + * Get history group by group name + */ +char* dap_chain_global_db_get_history_group_by_group_name(const char * a_group_name) +{ + if(!s_history_extra_group_items || !a_group_name) + return NULL; + history_extra_group_item_t * l_history_extra_group_item = NULL; + HASH_FIND_STR(s_history_extra_group_items, a_group_name, l_history_extra_group_item); + if(l_history_extra_group_item) { + return dap_strdup(l_history_extra_group_item->group_name_for_history); + } +} + +/** + * @brief dap_chain_global_db_add_history_group_prefix + * @details Add group prefix that will be tracking all changes + * @param a_group_prefix + * @param a_group_name_for_history + */ +void dap_chain_global_db_add_history_group_prefix(const char * a_group_prefix, const char * a_group_name_for_history) +{ + history_group_item_t * l_item = DAP_NEW_Z(history_group_item_t); + snprintf(l_item->prefix, sizeof(l_item->prefix), "%s", a_group_prefix); + l_item->group_name_for_history = dap_strdup(a_group_name_for_history);//GROUP_LOCAL_HISTORY + l_item->auto_track = true; + HASH_ADD_STR(s_history_group_items, prefix, l_item); +} + +/** + * @brief dap_chain_global_db_add_history_callback_notify + * @param a_group_prefix + * @param a_callback + */ +void dap_chain_global_db_add_history_callback_notify(const char * a_group_prefix, + dap_global_db_obj_callback_notify_t a_callback, void * a_arg) +{ + history_group_item_t * l_item = NULL; + HASH_FIND_STR(s_history_group_items, a_group_prefix, l_item); + if(l_item) { + l_item->callback_notify = a_callback; + l_item->callback_arg = a_arg; + } else + log_it(L_WARNING, "Can't setup notify callback for groups with prefix %s. Possible not in history track state", + a_group_prefix); +} + +/** + * @brief dap_chain_global_db_add_history_extra_group + * @details Add group prefix that will be tracking all changes + * @param a_group_prefix + */ +const char* dap_chain_global_db_add_history_extra_group(const char * a_group_name, dap_chain_node_addr_t *a_nodes, size_t *a_nodes_count) +{ + history_extra_group_item_t* l_item = DAP_NEW_Z(history_extra_group_item_t); + l_item->group_name = dap_strdup(a_group_name); + l_item->group_name_for_history = dap_strdup_printf("local.history.%s", a_group_name); + HASH_ADD_STR(s_history_extra_group_items, group_name, l_item); + return (const char*)l_item->group_name_for_history; +} + +/** + * @brief dap_chain_global_db_add_history_extra_group_callback_notify + * @param a_group_prefix + * @param a_callback + */ +void dap_chain_global_db_add_history_extra_group_callback_notify(const char * a_group_prefix, + dap_global_db_obj_callback_notify_t a_callback, void * a_arg) +{ + history_extra_group_item_t * l_item = NULL; + HASH_FIND_STR(s_history_extra_group_items, a_group_prefix, l_item); + if(l_item) { + l_item->callback_notify = a_callback; + l_item->callback_arg = a_arg; + } else + log_it(L_WARNING, "Can't setup notify callback for extra groups with prefix %s. Possible not in history track state", + a_group_prefix); +} + +/** + * Clean struct dap_global_db_obj_t + */ +void dap_chain_global_db_obj_clean(dap_global_db_obj_t *obj) +{ + if(!obj) + return; + DAP_DELETE(obj->key); + DAP_DELETE(obj->value); + obj->key = NULL; + obj->value = NULL; +} + +/** + * Delete struct dap_global_db_obj_t + */ +void dap_chain_global_db_obj_delete(dap_global_db_obj_t *obj) +{ + dap_chain_global_db_obj_clean(obj); + DAP_DELETE(obj); +} + +/** + * Delete mass of struct dap_global_db_obj_t + */ +void dap_chain_global_db_objs_delete(dap_global_db_obj_t *objs, size_t a_count) +{ + //int i = 0; + //while(objs) { + for(size_t i = 0; i < a_count; i++) { + //if(!(objs[i])) + // break; + dap_chain_global_db_obj_clean(objs + i); + //i++; + } + DAP_DELETE(objs); +} + +/** + * @brief dap_chain_global_db_init + * @param g_config + * @return + */ +int dap_chain_global_db_init(dap_config_t * g_config) +{ + s_storage_path= dap_strdup( + dap_config_get_item_str(g_config, "resources", "dap_global_db_path") ); + //const char *l_driver_name = dap_config_get_item_str_default(g_config, "resources", "dap_global_db_driver", "sqlite"); + const char *l_driver_name = dap_config_get_item_str_default(g_config, "resources", "dap_global_db_driver", "cdb"); + lock(); + int res = dap_db_driver_init(l_driver_name, s_storage_path); + unlock(); + if( res != 0 ) + log_it(L_CRITICAL, "Hadn't initialized db driver \"%s\" on path \"%s\"", l_driver_name, s_storage_path ); + return res; +} + +/** + * @brief dap_chain_global_db_deinit + */ +void dap_chain_global_db_deinit(void) +{ + lock(); + dap_db_driver_deinit(); + //dap_db_deinit(); + unlock(); + history_group_item_t * l_item = NULL, *l_item_tmp = NULL; + HASH_ITER(hh, s_history_group_items, l_item, l_item_tmp) + { + DAP_DELETE(l_item->group_name_for_history); + DAP_DELETE(l_item); + } + history_extra_group_item_t * l_add_item = NULL, *l_add_item_tmp = NULL; + HASH_ITER(hh, s_history_extra_group_items, l_add_item, l_add_item_tmp) + { + DAP_DELETE(l_add_item->group_name); + DAP_DELETE(l_add_item->group_name_for_history); + DAP_DELETE(l_add_item); + } + s_history_group_items = NULL; + +} + +/** + * @brief dap_chain_global_db_flush + * @return + */ +int dap_chain_global_db_flush(void){ + lock(); + int res = dap_db_driver_flush(); + unlock(); + return res; +} + +/** + * Get entry from base + * + * return dap_store_obj_t* + */ +void* dap_chain_global_db_obj_get(const char *a_key, const char *a_group) +{ + size_t l_count = 1; + // read one item + dap_store_obj_t *l_store_data = dap_chain_global_db_driver_read(a_group, a_key, &l_count); + return l_store_data; + + /* size_t count = 0; + if(!a_key) + return NULL; + size_t query_len = (size_t) snprintf(NULL, 0, "(&(cn=%s)(objectClass=%s))", a_key, a_group); + char *query = DAP_NEW_Z_SIZE(char, query_len + 1); //char query[32 + strlen(a_key)]; + snprintf(query, query_len + 1, "(&(cn=%s)(objectClass=%s))", a_key, a_group); // objectClass != ou + lock(); + dap_store_obj_t *store_data = dap_db_read_data(query, &count); + unlock(); + assert(count <= 1); + DAP_DELETE(query); + return store_data;*/ +} + +/** + * @brief dap_chain_global_db_obj_gr_get + * @param a_key + * @param a_data_out + * @param a_group + * @return + */ +dap_store_obj_t* dap_chain_global_db_obj_gr_get(const char *a_key, size_t *a_data_len_out, const char *a_group) +{ + //uint8_t *l_ret_value = NULL; + // read several items, 0 - no limits + size_t l_data_len_out = 0; + if(a_data_len_out) + l_data_len_out = *a_data_len_out; + dap_store_obj_t *l_store_data = dap_chain_global_db_driver_read(a_group, a_key, &l_data_len_out); + if(l_store_data) { + //l_ret_value = (l_store_data->value) ? DAP_NEW_SIZE(uint8_t, l_store_data->value_len) : NULL; //ret_value = (store_data->value) ? strdup(store_data->value) : NULL; + //memcpy(l_ret_value, l_store_data->value, l_store_data->value_len); + if(a_data_len_out) + *a_data_len_out = l_data_len_out;//l_store_data->value_len; + //dap_store_obj_free(l_store_data, l_data_len_out); + } + return l_store_data; +} + +/** + * @brief dap_chain_global_db_gr_get + * @param a_key + * @param a_data_out + * @param a_group + * @return + */ +uint8_t * dap_chain_global_db_gr_get(const char *a_key, size_t *a_data_len_out, const char *a_group) +{ + uint8_t *l_ret_value = NULL; + // read several items, 0 - no limits + size_t l_data_len_out = 0; + if(a_data_len_out) + l_data_len_out = *a_data_len_out; + dap_store_obj_t *l_store_data = dap_chain_global_db_driver_read(a_group, a_key, &l_data_len_out); + if(l_store_data) { + l_ret_value = (l_store_data->value) ? DAP_NEW_SIZE(uint8_t, l_store_data->value_len) : NULL; //ret_value = (store_data->value) ? strdup(store_data->value) : NULL; + memcpy(l_ret_value, l_store_data->value, l_store_data->value_len); + if(a_data_len_out) + *a_data_len_out = l_store_data->value_len; + dap_store_obj_free(l_store_data, l_data_len_out); + } + return l_ret_value; + + /*ldb + * uint8_t *l_ret_value = NULL; + size_t l_count = 0; + if(!a_key) + return NULL; + size_t l_query_len =(size_t) snprintf(NULL, 0, "(&(cn=%s)(objectClass=%s))", a_key, a_group); + + char *l_query = DAP_NEW_Z_SIZE(char, l_query_len + 1); //char query[32 + strlen(a_key)]; + snprintf(l_query, l_query_len + 1, "(&(cn=%s)(objectClass=%s))", a_key, a_group); // objectClass != ou + lock(); + pdap_store_obj_t store_data = dap_db_read_data(l_query, &l_count); + unlock(); + if(l_count == 1 && store_data && !strcmp(store_data->key, a_key)) { + l_ret_value = (store_data->value) ? DAP_NEW_SIZE(uint8_t, store_data->value_len) : NULL; //ret_value = (store_data->value) ? strdup(store_data->value) : NULL; + memcpy(l_ret_value, store_data->value, store_data->value_len); + if(a_data_out) + *a_data_out = store_data->value_len; + } + dap_store_obj_free(store_data, l_count); + DAP_DELETE(l_query); + return l_ret_value;*/ +} + +uint8_t * dap_chain_global_db_get(const char *a_key, size_t *a_data_out) +{ + return dap_chain_global_db_gr_get(a_key, a_data_out, GROUP_LOCAL_GENERAL); +} + + +/** + * Add info about the deleted entry to the base + */ +static bool global_db_gr_del_add(char *a_key,const char *a_group, time_t a_timestamp) +{ + dap_store_obj_t store_data;// = DAP_NEW_Z_SIZE(dap_store_obj_t, sizeof(struct dap_store_obj)); + memset(&store_data, 0, sizeof(dap_store_obj_t)); + store_data.type = 'a'; + store_data.key = a_key;//dap_strdup(a_key); + // no data + store_data.value = NULL; + store_data.value_len = 0; + // group = parent group + '.del' + store_data.group = dap_strdup_printf("%s.del", a_group); + store_data.timestamp = a_timestamp;//time(NULL); + lock(); + int l_res = dap_chain_global_db_driver_add(&store_data, 1); + unlock(); + DAP_DELETE(store_data.group); + if(l_res>=0) + return true; + return false; +} + +/** + * Delete info about the deleted entry from the base + */ +static bool global_db_gr_del_del(char *a_key,const char *a_group) +{ + if(!a_key) + return NULL; + dap_store_obj_t store_data;// = DAP_NEW_Z_SIZE(dap_store_obj_t, sizeof(struct dap_store_obj)); + memset(&store_data, 0, sizeof(dap_store_obj_t)); + store_data.key = a_key; + // store_data->c_key = a_key; + store_data.group = dap_strdup_printf("%s.del", a_group); + //store_data->c_group = a_group; + lock(); + int l_res = 0; + if(dap_chain_global_db_driver_is(store_data.group, store_data.key)) + l_res = dap_chain_global_db_driver_delete(&store_data, 1); + unlock(); + DAP_DELETE(store_data.group); + if(l_res>=0) + return true; + return false; +} + +/** + * Get timestamp of the deleted entry + */ +time_t global_db_gr_del_get_timestamp(const char *a_group, char *a_key) +{ + time_t l_timestamp = 0; + if(!a_key) + return l_timestamp; + dap_store_obj_t store_data; + memset(&store_data, 0, sizeof(dap_store_obj_t)); + store_data.key = a_key; + // store_data->c_key = a_key; + store_data.group = dap_strdup_printf("%s.del", a_group); + //store_data->c_group = a_group; + lock(); + if(dap_chain_global_db_driver_is(store_data.group, store_data.key)) { + size_t l_count_out = 0; + dap_store_obj_t *l_obj = dap_chain_global_db_driver_read(store_data.group, store_data.key, &l_count_out); + assert(l_count_out <= 1); + l_timestamp = l_obj->timestamp; + dap_store_obj_free(l_obj, l_count_out); + } + unlock(); + DAP_DELETE(store_data.group); + return l_timestamp; +} + +/** + * + */ + +/** + * @brief dap_chain_global_db_gr_set + * @param a_key + * @param a_value + * @param a_value_len + * @param a_group + * @details Set one entry to base. IMPORTANT: a_key and a_value should be passed without free after (it will be released by gdb itself) + * @return + */ +bool dap_chain_global_db_gr_set(char *a_key, void *a_value, size_t a_value_len, const char *a_group) +{ + dap_store_obj_t store_data;// = DAP_NEW_Z_SIZE(dap_store_obj_t, sizeof(struct dap_store_obj)); + memset(&store_data, 0, sizeof(dap_store_obj_t)); + store_data.type = 'a'; + store_data.key = a_key;//dap_strdup(a_key); + store_data.value = a_value;//DAP_NEW_Z_SIZE(uint8_t, a_value_len); + + //memcpy(store_data.value, a_value, a_value_len); + + store_data.value_len = (a_value_len == (size_t) -1) ? dap_strlen((const char*) a_value) : a_value_len; + store_data.group = dap_strdup(a_group); + store_data.timestamp = time(NULL); + lock(); + int l_res = dap_chain_global_db_driver_add(&store_data, 1); + unlock(); + + // Extract prefix if added successfuly, add history log and call notify callback if present + if(!l_res) { + // Delete info about the deleted entry from the base if one present + global_db_gr_del_del(a_key, a_group); + + char * l_group_prefix = extract_group_prefix(a_group); + history_group_item_t * l_history_group_item = NULL; + if(l_group_prefix) + HASH_FIND_STR(s_history_group_items, l_group_prefix, l_history_group_item); + + if(l_history_group_item) { + if(l_history_group_item->auto_track) { + lock(); + dap_db_history_add('a', &store_data, 1, l_history_group_item->group_name_for_history); + unlock(); + } + if(l_history_group_item->callback_notify) + l_history_group_item->callback_notify(l_history_group_item->callback_arg, 'a', l_group_prefix, a_group, + a_key, a_value, a_value_len); + } + // looking for extra group + else { + history_extra_group_item_t * l_history_extra_group_item = NULL; + HASH_FIND_STR(s_history_extra_group_items, a_group, l_history_extra_group_item); + + if(l_history_extra_group_item) { + lock(); + dap_db_history_add('a', &store_data, 1, l_history_extra_group_item->group_name_for_history); + unlock(); + if(l_history_extra_group_item->callback_notify) + l_history_extra_group_item->callback_notify(l_history_extra_group_item->callback_arg, 'a', + l_group_prefix, + a_group, + a_key, a_value, a_value_len); + } + } + if(l_group_prefix) + DAP_DELETE(l_group_prefix); + } else { + log_it(L_ERROR, "Save error: %d", l_res); + } + //DAP_DELETE(store_data); + + return !l_res; +} + +bool dap_chain_global_db_set( char *a_key, void *a_value, size_t a_value_len) +{ + return dap_chain_global_db_gr_set(a_key, a_value, a_value_len, GROUP_LOCAL_GENERAL); +} + +/** + * Delete entry from base + */ +bool dap_chain_global_db_gr_del(char *a_key,const char *a_group) +{ + if(!a_key) + return NULL; + pdap_store_obj_t store_data = DAP_NEW_Z_SIZE(dap_store_obj_t, sizeof(struct dap_store_obj)); + store_data->key = a_key; + // store_data->c_key = a_key; + store_data->group = dap_strdup(a_group); + //store_data->c_group = a_group; + lock(); + int l_res = dap_chain_global_db_driver_delete(store_data, 1); + unlock(); + // do not add to history if l_res=1 (already deleted) + if(!l_res) { + // added to Del group + global_db_gr_del_add(a_key, a_group, time(NULL)); + // Extract prefix + char * l_group_prefix = extract_group_prefix(a_group); + history_group_item_t * l_history_group_item = NULL; + if(l_group_prefix) + HASH_FIND_STR(s_history_group_items, l_group_prefix, l_history_group_item); + if(l_history_group_item) { + if(l_history_group_item->auto_track) { + lock(); + dap_db_history_add('d', store_data, 1, l_history_group_item->group_name_for_history); + unlock(); + } + if(l_history_group_item->callback_notify) + l_history_group_item->callback_notify(l_history_group_item->callback_arg, 'd', l_group_prefix, a_group, + a_key, NULL, 0); + } + // looking for extra group + else { + history_extra_group_item_t * l_history_extra_group_item = NULL; + HASH_FIND_STR(s_history_extra_group_items, a_group, l_history_extra_group_item); + + if(l_history_extra_group_item) { + lock(); + dap_db_history_add('d', store_data, 1, l_history_extra_group_item->group_name_for_history); + unlock(); + if(l_history_extra_group_item->callback_notify) + l_history_extra_group_item->callback_notify(l_history_extra_group_item->callback_arg, 'd', + l_group_prefix, a_group, a_key, NULL, 0); + } + } + if(l_group_prefix) + DAP_DELETE(l_group_prefix); + } + //DAP_DELETE(store_data); + if(l_res>=0){ + // added to Del group + global_db_gr_del_add(a_key, a_group, time(NULL)); + /*/ read del info + char *l_group = dap_strdup_printf("%s.del", a_group); + size_t l_data_size_out = 0; + dap_store_obj_t *l_objs = dap_chain_global_db_obj_gr_get(a_key, &l_data_size_out,l_group); + // update timestamp + if(l_objs){ + if(l_objs->timestamp<time(NULL)) + dap_store_obj_free(l_objs, l_data_size_out); + } + DAP_DELETE(l_group);*/ + return true; + } + return false; +} +bool dap_chain_global_db_del(char *a_key) +{ + return dap_chain_global_db_gr_del(a_key, GROUP_LOCAL_GENERAL); +} + +/** + * Read last item in global_db + * + * @param data_size[out] size of output array + * @return array (note:not Null-terminated string) on NULL in case of an error + */ +dap_store_obj_t* dap_chain_global_db_get_last(const char *a_group) +{ + // Read data + lock(); + dap_store_obj_t *l_store_obj = dap_chain_global_db_driver_read_last(a_group); + unlock(); + return l_store_obj; +} + +/** + * Read the entire database with condition into an array of size bytes + * + * @param data_size[out] size of output array + * @return array (note:not Null-terminated string) on NULL in case of an error + */ +dap_store_obj_t* dap_chain_global_db_cond_load(const char *a_group, uint64_t a_first_id, size_t *a_data_size_out) +{ + // Read data + lock(); + dap_store_obj_t *l_store_obj = dap_chain_global_db_driver_cond_read(a_group, a_first_id, a_data_size_out); + unlock(); + return l_store_obj; +} + +/** + * Read the entire database into an array of size bytes + * + * @param data_size[out] size of output array + * @return array (note:not Null-terminated string) on NULL in case of an error + */ +dap_global_db_obj_t* dap_chain_global_db_gr_load(const char *a_group, size_t *a_data_size_out) +{ + size_t count = 0; + // Read data + lock(); + dap_store_obj_t *l_store_obj = dap_chain_global_db_driver_read(a_group, NULL, &count); + unlock(); + if(!l_store_obj || !count){ + if(a_data_size_out) + *a_data_size_out = 0; + return NULL; + } + dap_global_db_obj_t *l_data = DAP_NEW_Z_SIZE(dap_global_db_obj_t, + (count + 1) * sizeof(dap_global_db_obj_t)); // last item in mass must be zero + // clear only last item + //memset(&l_data[count], 0, sizeof(dap_global_db_obj_t)); + for(size_t i = 0; i < count; i++) { + dap_store_obj_t *l_store_obj_cur = l_store_obj + i; + /*assert(l_store_obj_cur); + l_data[i] = DAP_NEW(dap_global_db_obj_t); + l_data[i]->key = dap_strdup(l_store_obj_cur->key); + l_data[i]->value_len = l_store_obj_cur->value_len; + l_data[i]->value = DAP_NEW_Z_SIZE(uint8_t, l_store_obj_cur->value_len + 1); + memcpy(l_data[i]->value, l_store_obj_cur->value, l_store_obj_cur->value_len);*/ + //dap_global_db_obj_t *l_data = l_data0 + i; + l_data[i].key = l_store_obj_cur->key; + l_data[i].value_len = l_store_obj_cur->value_len; + l_data[i].value = l_store_obj_cur->value; + DAP_DELETE(l_store_obj_cur->group); + } + // inner data are use in l_data0 + DAP_DELETE(l_store_obj); //dap_store_obj_free(l_store_obj, count); + if(a_data_size_out) + *a_data_size_out = count; + return l_data; +} + +dap_global_db_obj_t* dap_chain_global_db_load(size_t *a_data_size_out) +{ + return dap_chain_global_db_gr_load(GROUP_LOCAL_GENERAL, a_data_size_out); +} +/** + * Write to the database from an array of data_size bytes + * + * @return + */ +bool dap_chain_global_db_obj_save(void* a_store_data, size_t a_objs_count) +{ + // save/delete data + if(!a_objs_count) + return true; + + lock(); + int l_res = dap_chain_global_db_driver_appy(a_store_data, a_objs_count); + unlock(); + + // Extract prefix if added successfuly, add history log and call notify callback if present + if(!l_res) { + for(size_t i = 0; i < a_objs_count; i++) { + + dap_store_obj_t *a_store_obj = a_store_data + i; + if(a_store_obj->type == 'a') + // delete info about the deleted entry from the base if one present + global_db_gr_del_del(a_store_obj->key, a_store_obj->group); + else if(a_store_obj->type == 'd') + // add to Del group + global_db_gr_del_add(a_store_obj->key, a_store_obj->group, a_store_obj->timestamp); + + + history_group_item_t * l_history_group_item = NULL; + dap_store_obj_t* l_obj = (dap_store_obj_t*)a_store_data + i; + char * l_group_prefix = extract_group_prefix(l_obj->group); + if(l_group_prefix) + HASH_FIND_STR(s_history_group_items, l_group_prefix, l_history_group_item); + + if(l_history_group_item) { + if(l_history_group_item->auto_track) { + lock(); + dap_db_history_add((char)l_obj->type, l_obj, 1, l_history_group_item->group_name_for_history); + unlock(); + } + if(l_history_group_item->callback_notify) { + if(l_obj) { + l_history_group_item->callback_notify(l_history_group_item->callback_arg, + (const char)l_obj->type, + l_group_prefix, l_obj->group, l_obj->key, + l_obj->value, l_obj->value_len); + } else { + break; + } + } + } + // looking for extra group + else { + history_extra_group_item_t * l_history_extra_group_item = NULL; + HASH_FIND_STR(s_history_extra_group_items, l_obj->group, l_history_extra_group_item); + + if(l_history_extra_group_item) { + lock(); + dap_db_history_add((char)l_obj->type, l_obj, 1, l_history_extra_group_item->group_name_for_history); + unlock(); + if(l_history_extra_group_item->callback_notify) + l_history_extra_group_item->callback_notify(l_history_extra_group_item->callback_arg, + (const char)l_obj->type, + l_group_prefix, l_obj->group, l_obj->key, + l_obj->value, l_obj->value_len); + } + } + + DAP_DELETE(l_group_prefix); + } + + } + if(l_res >= 0) { + return true; + } + return false; +} + +bool dap_chain_global_db_gr_save(dap_global_db_obj_t* a_objs, size_t a_objs_count, const char *a_group) +{ + dap_store_obj_t *l_store_data = DAP_NEW_Z_SIZE(dap_store_obj_t, a_objs_count * sizeof(struct dap_store_obj)); + time_t l_timestamp = time(NULL); + char *l_group = dap_strdup(a_group); + for(size_t q = 0; q < a_objs_count; ++q) { + dap_store_obj_t *store_data_cur = l_store_data + q; + dap_global_db_obj_t *a_obj_cur = a_objs + q; + store_data_cur->key = a_obj_cur->key; + store_data_cur->group = l_group; + store_data_cur->value = a_obj_cur->value; + store_data_cur->value_len = a_obj_cur->value_len; + store_data_cur->timestamp = l_timestamp; + } + if(l_store_data) { + lock(); + //log_it(L_DEBUG,"Added %u objects", a_objs_count); + int l_res = dap_chain_global_db_driver_add(l_store_data, a_objs_count); + unlock(); + if(!l_res) { + for(size_t i = 0; i < a_objs_count; i++) { + history_group_item_t * l_history_group_item = NULL; + dap_store_obj_t *l_obj = l_store_data + i; + + char * l_group_prefix = extract_group_prefix(l_obj->group); + if(l_group_prefix) + HASH_FIND_STR(s_history_group_items, l_group_prefix, l_history_group_item); + + if(l_history_group_item) { + if(l_history_group_item->auto_track) { + lock(); + dap_db_history_add('a', l_store_data, 1, l_history_group_item->group_name_for_history); + unlock(); + } + if(l_history_group_item->callback_notify) { + if(l_obj) { + l_history_group_item->callback_notify(l_history_group_item->callback_arg, 'a', + l_group_prefix, l_obj->group, l_obj->key, + l_obj->value, l_obj->value_len); + } else { + break; + } + } + } + DAP_DELETE(l_group_prefix); + } + + } + DAP_DELETE(l_store_data); //dap_store_obj_free(store_data, a_objs_count); + if(!l_res) { + DAP_DELETE(l_group); + return true; + } + } + DAP_DELETE(l_group); + return false; +} + +bool dap_chain_global_db_save(dap_global_db_obj_t* a_objs, size_t a_objs_count) +{ + return dap_chain_global_db_gr_save(a_objs, a_objs_count, GROUP_LOCAL_GENERAL); +} + +/** + * Calc hash for data + * + * return hash or NULL + */ +char* dap_chain_global_db_hash(const uint8_t *data, size_t data_size) +{ + return dap_chain_global_db_driver_hash(data, data_size); +} + +/** + * Parse data from dap_db_log_pack() + * + * return dap_store_obj_t* + */ +void* dap_db_log_unpack(const void *a_data, size_t a_data_size, size_t *a_store_obj_count) +{ + const dap_store_obj_pkt_t *l_pkt = (const dap_store_obj_pkt_t*) a_data; + if(!l_pkt || l_pkt->data_size != ((size_t) a_data_size - sizeof(dap_store_obj_pkt_t))) + return NULL; + size_t l_store_obj_count = 0; + dap_store_obj_t *l_obj = dap_store_unpacket_multiple(l_pkt, &l_store_obj_count); + if(a_store_obj_count) + *a_store_obj_count = l_store_obj_count; + + return l_obj; +} + +/** + * Get timestamp from dap_db_log_pack() + */ +time_t dap_db_log_unpack_get_timestamp(uint8_t *a_data, size_t a_data_size) +{ + dap_store_obj_pkt_t *l_pkt = (dap_store_obj_pkt_t*) a_data; + if(!l_pkt || l_pkt->data_size != (a_data_size - sizeof(dap_store_obj_pkt_t))) + return 0; + return l_pkt->timestamp; +} + +/** + * Get log diff as string + */ +char* dap_db_log_get_diff(size_t *a_data_size_out) +{ + //DapList *l_group_list = dap_list_append(l_group_list,GROUP_HISTORY); + size_t l_data_size_out = 0; + dap_global_db_obj_t *l_objs = dap_chain_global_db_gr_load(GROUP_LOCAL_HISTORY, &l_data_size_out); + // make keys & val vector + char **l_keys_vals0 = DAP_NEW_SIZE(char*, sizeof(char*) * (l_data_size_out * 2 + 2)); + char **l_keys_vals = l_keys_vals0 + 1; + size_t i; + // first element - number of records + l_keys_vals0[0] = dap_strdup_printf("%d", l_data_size_out); + for(i = 0; i < l_data_size_out; i++) { + dap_global_db_obj_t *l_obj_cur = l_objs + i; + l_keys_vals[i] = l_obj_cur->key; + l_keys_vals[i + l_data_size_out] = (char*) l_obj_cur->value; + } + if(a_data_size_out) + *a_data_size_out = l_data_size_out; + // last element - NULL (marker) + l_keys_vals[l_data_size_out * 2] = NULL; + char *l_keys_vals_flat = dap_strjoinv(GLOBAL_DB_HIST_KEY_SEPARATOR, l_keys_vals0); + DAP_DELETE(l_keys_vals0[0]); + DAP_DELETE(l_keys_vals0); + //dap_strfreev(l_keys_vals0); + dap_chain_global_db_objs_delete(l_objs, l_data_size_out); + return l_keys_vals_flat; +} diff --git a/libdap-chain-global-db/dap_chain_global_db.h b/libdap-chain-global-db/dap_chain_global_db.h new file mode 100755 index 0000000000000000000000000000000000000000..0a0ecda751202a7c9370571baee4a83a295b61c6 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db.h @@ -0,0 +1,142 @@ +#pragma once + +#include <stdint.h> +#include <stdbool.h> +#include <time.h> + +#include "dap_common.h" +#include "dap_config.h" +#include "dap_list.h" +#include "dap_chain_common.h" +#include "dap_chain_global_db_driver.h" + + +#define GROUP_LOCAL_HISTORY "local.history" +#define GROUP_LOCAL_NODE_LAST_ID "local.node.last_id" +#define GROUP_LOCAL_GENERAL "local.general" +#define GROUP_LOCAL_NODE_ADDR "local.node-addr" + +typedef struct dap_global_db_obj { + uint64_t id; + char *key; + uint8_t *value; + size_t value_len; +}DAP_ALIGN_PACKED dap_global_db_obj_t, *pdap_global_db_obj_t; + +typedef void (*dap_global_db_obj_callback_notify_t) (void * a_arg, const char a_op_code, const char * a_prefix, const char * a_group, + const char * a_key, const void * a_value, + const size_t a_value_len); + +/** + * Flush DB + */ +int dap_chain_global_db_flush(void); + +/** + * Clean struct dap_global_db_obj_t + */ +void dap_chain_global_db_obj_clean(dap_global_db_obj_t *obj); +/** + * Delete struct dap_global_db_obj_t + */ +void dap_chain_global_db_obj_delete(dap_global_db_obj_t *obj); + +/** + * Delete mass of struct dap_global_db_obj_t + */ +void dap_chain_global_db_objs_delete(dap_global_db_obj_t *objs, size_t a_count); + +int dap_chain_global_db_init(dap_config_t * a_config); + +void dap_chain_global_db_deinit(void); + +/* + * Get history group by group name + */ +char* dap_chain_global_db_get_history_group_by_group_name(const char * a_group_name); + +/** + * Setup callbacks and filters + */ +// Add group prefix that will be tracking all changes +void dap_chain_global_db_add_history_group_prefix(const char * a_group_prefix, const char * a_group_name_for_history); +void dap_chain_global_db_add_history_callback_notify(const char * a_group_prefix, + dap_global_db_obj_callback_notify_t a_callback, void * a_arg); +const char* dap_chain_global_db_add_history_extra_group(const char * a_group_name, dap_chain_node_addr_t *a_nodes, size_t *a_nodes_count); +void dap_chain_global_db_add_history_extra_group_callback_notify(const char * a_group_prefix, + dap_global_db_obj_callback_notify_t a_callback, void * a_arg); +/** + * Get entry from base + */ +void* dap_chain_global_db_obj_get(const char *a_key, const char *a_group); +dap_store_obj_t* dap_chain_global_db_obj_gr_get(const char *a_key, size_t *a_data_len_out, const char *a_group); +uint8_t * dap_chain_global_db_gr_get(const char *a_key, size_t *a_data_out, const char *a_group); +uint8_t * dap_chain_global_db_get(const char *a_key, size_t *a_data_out); + +/** + * Set one entry to base + */ +bool dap_chain_global_db_gr_set(char *a_key, void *a_value, size_t a_value_len,const char *a_group); +bool dap_chain_global_db_set( char *a_key, void *a_value, size_t a_value_len); + +/** + * Delete entry from base + */ +bool dap_chain_global_db_gr_del(char *a_key, const char *a_group); +bool dap_chain_global_db_del(char *a_key); + +/** + * Get timestamp of the deleted entry + */ +time_t global_db_gr_del_get_timestamp(const char *a_group, char *a_key); + +/** + * Read the entire database into an array of size bytes + * + * @param data_size[out] size of output array + * @return array (note:not Null-terminated string) on NULL in case of an error + */ +dap_store_obj_t* dap_chain_global_db_get_last(const char *a_group); +dap_store_obj_t* dap_chain_global_db_cond_load(const char *a_group, uint64_t a_first_id, size_t *a_data_size_out); +dap_global_db_obj_t* dap_chain_global_db_gr_load(const char *a_group, size_t *a_data_size_out); +dap_global_db_obj_t* dap_chain_global_db_load(size_t *a_data_size_out); + +/** + * Write to the database from an array of data_size bytes + * + * @return + */ +bool dap_chain_global_db_obj_save(void* a_store_data, size_t a_objs_count); +bool dap_chain_global_db_gr_save(dap_global_db_obj_t* a_objs, size_t a_objs_count, const char *a_group); +bool dap_chain_global_db_save(dap_global_db_obj_t* a_objs, size_t a_objs_count); + +/** + * Calc hash for data + * + * return hash or NULL + */ +char* dap_chain_global_db_hash(const uint8_t *data, size_t data_size); +char* dap_chain_global_db_hash_fast(const uint8_t *data, size_t data_size); + +// Get data according the history log +uint8_t* dap_db_log_pack(dap_global_db_obj_t *a_obj, size_t *a_data_size_out); + +// Get data according the history log +//char* dap_db_history_tx(dap_chain_hash_fast_t * a_tx_hash, const char *a_group_mempool); +//char* dap_db_history_addr(dap_chain_addr_t * a_addr, const char *a_group_mempool); +//char* dap_db_history_filter(dap_chain_addr_t * a_addr, const char *a_group_mempool); + +// Parse data from dap_db_log_pack() +void* dap_db_log_unpack(const void *a_data, size_t a_data_size, size_t *a_store_obj_count); +// Get timestamp from dap_db_log_pack() +//time_t dap_db_log_unpack_get_timestamp(uint8_t *a_data, size_t a_data_size); + +// Get last id in log +uint64_t dap_db_log_get_group_history_last_id(const char *a_history_group_name); +uint64_t dap_db_log_get_last_id(void); +// Get log diff as list +dap_list_t* dap_db_log_get_list(uint64_t first_id); +// Free list getting from dap_db_log_get_list() +void dap_db_log_del_list(dap_list_t *a_list); +// Get log diff as string +char* dap_db_log_get_diff(size_t *a_data_size_out); diff --git a/libdap-chain-global-db/dap_chain_global_db_driver.c b/libdap-chain-global-db/dap_chain_global_db_driver.c new file mode 100755 index 0000000000000000000000000000000000000000..775bf47fda6b453fd61c004c2f055fd70c3bd53c --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver.c @@ -0,0 +1,645 @@ +/* + * Authors: + * Alexander Lysikov <alexander.lysikov@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * Kelvin Project https://github.com/kelvinblockchain + * Copyright (c) 2019 + * All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <pthread.h> +#include <assert.h> + +#include "dap_common.h" +#include "dap_strfuncs.h" +#include "dap_list.h" +#include "dap_hash.h" + +#include "dap_chain_global_db_driver_sqlite.h" +#include "dap_chain_global_db_driver_cdb.h" +#include "dap_chain_global_db_driver.h" + +#define LOG_TAG "db_driver" + +static char *s_used_driver = NULL; + +//#define USE_WRITE_BUFFER + +#ifdef USE_WRITE_BUFFER +static int save_write_buf(void); + +// for write buffer +pthread_mutex_t s_mutex_add_start = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t s_mutex_add_end = PTHREAD_MUTEX_INITIALIZER; +//pthread_rwlock_rdlock +// new data in buffer to write +pthread_mutex_t s_mutex_cond = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t s_cond_add_end; // = PTHREAD_COND_INITIALIZER; +// writing ended +pthread_mutex_t s_mutex_write_end = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t s_cond_write_end; // = PTHREAD_COND_INITIALIZER; + +dap_list_t *s_list_begin = NULL; +dap_list_t *s_list_end = NULL; + +pthread_t s_write_buf_thread; +volatile static bool s_write_buf_state = 0; +static void* func_write_buf(void * arg); +#endif //USE_WRITE_BUFFER + +static dap_db_driver_callbacks_t s_drv_callback; + +/** + * Select driver + * driver_name may be "ldb", "sqlite" + * + * return 0 OK, <0 Error + */ +int dap_db_driver_init(const char *a_driver_name, const char *a_filename_db) +{ + int l_ret = -1; + if(s_used_driver) + dap_db_driver_deinit(); + s_used_driver = dap_strdup(a_driver_name); + memset(&s_drv_callback, 0, sizeof(dap_db_driver_callbacks_t)); + if(!dap_strcmp(s_used_driver, "ldb")) + l_ret = -1; + else if(!dap_strcmp(s_used_driver, "sqlite")) + l_ret = dap_db_driver_sqlite_init(a_filename_db, &s_drv_callback); + else if(!dap_strcmp(s_used_driver, "cdb")) + l_ret = dap_db_driver_cdb_init(a_filename_db, &s_drv_callback); + else + log_it(L_ERROR, "Unknown global_db driver \"%s\"", a_driver_name); +#ifdef USE_WRITE_BUFFER + if(!l_ret) { + pthread_condattr_t l_condattr; + pthread_condattr_init(&l_condattr); + pthread_condattr_setclock(&l_condattr, CLOCK_MONOTONIC); + pthread_cond_init(&s_cond_add_end, &l_condattr); + pthread_cond_init(&s_cond_write_end, &l_condattr); + // thread for save buffer to database + s_write_buf_state = true; + pthread_create(&s_write_buf_thread, NULL, func_write_buf, NULL); + } +#endif + return l_ret; +} + +/** + * Shutting down the db library + */ + +void dap_db_driver_deinit(void) +{ +#ifdef USE_WRITE_BUFFER + // wait for close thread + { + pthread_mutex_lock(&s_mutex_cond); + pthread_cond_broadcast(&s_cond_add_end); + pthread_mutex_unlock(&s_mutex_cond); + + s_write_buf_state = false; + pthread_join(s_write_buf_thread, NULL); + } + + //save_write_buf(); + pthread_mutex_lock(&s_mutex_add_end); + pthread_mutex_lock(&s_mutex_add_start); + while(s_list_begin != s_list_end) { + // free memory + dap_store_obj_free((dap_store_obj_t*) s_list_begin->data, 1); + dap_list_free1(s_list_begin); + s_list_begin = dap_list_next(s_list_begin); + } + //dap_store_obj_free((dap_store_obj_t*) s_list_begin->data, 1); + dap_list_free1(s_list_begin); + s_list_begin = s_list_end = NULL; + pthread_mutex_unlock(&s_mutex_add_start); + pthread_mutex_unlock(&s_mutex_add_end); + pthread_cond_destroy(&s_cond_add_end); +#endif + // deinit driver + if(s_drv_callback.deinit) + s_drv_callback.deinit(); + if(s_used_driver){ + DAP_DELETE(s_used_driver); + s_used_driver = NULL; + } +} + +int dap_db_driver_flush(void) +{ + return s_drv_callback.flush(); +} + +dap_store_obj_t* dap_store_obj_copy(dap_store_obj_t *a_store_obj, size_t a_store_count) +{ + if(!a_store_obj || !a_store_count) + return NULL; + dap_store_obj_t *l_store_obj = DAP_NEW_SIZE(dap_store_obj_t, sizeof(dap_store_obj_t) * a_store_count); + for(size_t i = 0; i < a_store_count; i++) { + dap_store_obj_t *l_store_obj_dst = l_store_obj + i; + dap_store_obj_t *l_store_obj_src = a_store_obj + i; + memcpy(l_store_obj_dst, l_store_obj_src, sizeof(dap_store_obj_t)); + l_store_obj_dst->group = dap_strdup(l_store_obj_src->group); + l_store_obj_dst->key = dap_strdup(l_store_obj_src->key); + l_store_obj_dst->value = DAP_NEW_SIZE(uint8_t, l_store_obj_dst->value_len); + memcpy(l_store_obj_dst->value, l_store_obj_src->value, l_store_obj_dst->value_len); + } + return l_store_obj; +} + +void dap_store_obj_free(dap_store_obj_t *a_store_obj, size_t a_store_count) +{ + if(!a_store_obj) + return; + for(size_t i = 0; i < a_store_count; i++) { + dap_store_obj_t *l_store_obj_cur = a_store_obj + i; + DAP_DELETE(l_store_obj_cur->group); + DAP_DELETE(l_store_obj_cur->key); + DAP_DELETE(l_store_obj_cur->value); + } + DAP_DELETE(a_store_obj); +} + +static size_t dap_db_get_size_pdap_store_obj_t(pdap_store_obj_t store_obj) +{ + size_t size = sizeof(uint32_t) + 2 * sizeof(uint16_t) + sizeof(size_t) + sizeof(time_t) + + sizeof(uint64_t) + dap_strlen(store_obj->group) + + dap_strlen(store_obj->key) + store_obj->value_len; + return size; +} + +/** + * serialization + * @param a_store_obj_count count of structures store_obj + * @param a_timestamp create data time + * @param a_size_out[out] size of output structure + * @return NULL in case of an error + */ +dap_store_obj_pkt_t *dap_store_packet_multiple(pdap_store_obj_t a_store_obj, time_t a_timestamp, + size_t a_store_obj_count) +{ + if(!a_store_obj || a_store_obj_count < 1) + return NULL; + size_t l_data_size_out = sizeof(uint32_t); // size of output data + // calculate output structure size + for(size_t l_q = 0; l_q < a_store_obj_count; ++l_q) + l_data_size_out += dap_db_get_size_pdap_store_obj_t(&a_store_obj[l_q]); + + dap_store_obj_pkt_t *l_pkt = DAP_NEW_Z_SIZE(dap_store_obj_pkt_t, sizeof(dap_store_obj_pkt_t) + l_data_size_out); + l_pkt->data_size = l_data_size_out; + l_pkt->timestamp = a_timestamp; + uint64_t l_offset = 0; + uint32_t l_count = (uint32_t) a_store_obj_count; + memcpy(l_pkt->data + l_offset, &l_count, sizeof(uint32_t)); + l_offset += sizeof(uint32_t); + for(size_t l_q = 0; l_q < a_store_obj_count; ++l_q) { + dap_store_obj_t obj = a_store_obj[l_q]; + //uint16_t section_size = (uint16_t) dap_strlen(obj.section); + uint16_t group_size = (uint16_t) dap_strlen(obj.group); + uint16_t key_size = (uint16_t) dap_strlen(obj.key); + memcpy(l_pkt->data + l_offset, &obj.type, sizeof(int)); + l_offset += sizeof(int); + //memcpy(l_pkt->data + l_offset, §ion_size, sizeof(uint16_t)); + //l_offset += sizeof(uint16_t); + //memcpy(l_pkt->data + l_offset, obj.section, section_size); + //l_offset += section_size; + memcpy(l_pkt->data + l_offset, &group_size, sizeof(uint16_t)); + l_offset += sizeof(uint16_t); + memcpy(l_pkt->data + l_offset, obj.group, group_size); + l_offset += group_size; + memcpy(l_pkt->data + l_offset, &obj.id, sizeof(uint64_t)); + l_offset += sizeof(uint64_t); + memcpy(l_pkt->data + l_offset, &obj.timestamp, sizeof(time_t)); + l_offset += sizeof(time_t); + memcpy(l_pkt->data + l_offset, &key_size, sizeof(uint16_t)); + l_offset += sizeof(uint16_t); + memcpy(l_pkt->data + l_offset, obj.key, key_size); + l_offset += key_size; + memcpy(l_pkt->data + l_offset, &obj.value_len, sizeof(size_t)); + l_offset += sizeof(size_t); + memcpy(l_pkt->data + l_offset, obj.value, obj.value_len); + l_offset += obj.value_len; + } + assert(l_data_size_out == l_offset); + return l_pkt; +} +/** + * deserialization + * @param store_obj_count[out] count of the output structures store_obj + * @return NULL in case of an error* + */ + +dap_store_obj_t *dap_store_unpacket_multiple(const dap_store_obj_pkt_t *pkt, size_t *store_obj_count) +{ + if(!pkt || pkt->data_size < 1) + return NULL; + uint64_t offset = 0; + uint32_t count; + memcpy(&count, pkt->data, sizeof(uint32_t)); + offset += sizeof(uint32_t); + dap_store_obj_t *store_obj = DAP_NEW_Z_SIZE(dap_store_obj_t, count * sizeof(struct dap_store_obj)); + for(size_t q = 0; q < count; ++q) { + dap_store_obj_t *obj = store_obj + q; + uint16_t str_size; + memcpy(&obj->type, pkt->data + offset, sizeof(int)); + offset += sizeof(int); + + //memcpy(&str_size, pkt->data + offset, sizeof(uint16_t)); + //offset += sizeof(uint16_t); + //obj->section = DAP_NEW_Z_SIZE(char, str_size + 1); + //memcpy(obj->section, pkt->data + offset, str_size); + //offset += str_size; + + memcpy(&str_size, pkt->data + offset, sizeof(uint16_t)); + offset += sizeof(uint16_t); + obj->group = DAP_NEW_Z_SIZE(char, str_size + 1); + memcpy(obj->group, pkt->data + offset, str_size); + offset += str_size; + + memcpy(&obj->id, pkt->data + offset, sizeof(uint64_t)); + offset += sizeof(uint64_t); + + memcpy(&obj->timestamp, pkt->data + offset, sizeof(time_t)); + offset += sizeof(time_t); + + memcpy(&str_size, pkt->data + offset, sizeof(uint16_t)); + offset += sizeof(uint16_t); + obj->key = DAP_NEW_Z_SIZE(char, str_size + 1); + memcpy(obj->key, pkt->data + offset, str_size); + offset += str_size; + + memcpy(&obj->value_len, pkt->data + offset, sizeof(size_t)); + offset += sizeof(size_t); + + obj->value = DAP_NEW_Z_SIZE(uint8_t, obj->value_len + 1); + memcpy(obj->value, pkt->data + offset, obj->value_len); + offset += obj->value_len; + } + assert(pkt->data_size == offset); + if(store_obj_count) + *store_obj_count = count; + return store_obj; +} + +/** + * Calc hash for data + * + * return hash or NULL + */ +char* dap_chain_global_db_driver_hash(const uint8_t *data, size_t data_size) +{ + if(!data || data_size <= 0) + return NULL; + dap_chain_hash_fast_t l_hash; + memset(&l_hash, 0, sizeof(dap_chain_hash_fast_t)); + dap_hash_fast(data, data_size, &l_hash); + size_t a_str_max = (sizeof(l_hash.raw) + 1) * 2 + 2; /* heading 0x */ + char *a_str = DAP_NEW_Z_SIZE(char, a_str_max); + size_t hash_len = (size_t)dap_chain_hash_fast_to_str(&l_hash, a_str, a_str_max); + if(!hash_len) { + DAP_DELETE(a_str); + return NULL; + } + return a_str; +} + +/** + * Wait data to write buffer + * return 0 - Ok, 1 - timeout + */ +static int wait_data(pthread_mutex_t *a_mutex, pthread_cond_t *a_cond, int l_timeout_ms) +{ + int l_res = 0; + pthread_mutex_lock(a_mutex); + // endless waiting + if(l_timeout_ms == -1) + l_res = pthread_cond_wait(a_cond, a_mutex); + // waiting no more than timeout in milliseconds + else { + struct timespec l_to; + clock_gettime(CLOCK_MONOTONIC, &l_to); + int64_t l_nsec_new = l_to.tv_nsec + l_timeout_ms * 1000000ll; + // if the new number of nanoseconds is more than a second + if(l_nsec_new > (long) 1e9) { + l_to.tv_sec += l_nsec_new / (long) 1e9; + l_to.tv_nsec = l_nsec_new % (long) 1e9; + } + else + l_to.tv_nsec = (long) l_nsec_new; + l_res = pthread_cond_timedwait(a_cond, a_mutex, &l_to); + } + pthread_mutex_unlock(a_mutex); + if(l_res == ETIMEDOUT) + return 1; + return l_res; +} + +#ifdef USE_WRITE_BUFFER +// return 0 if buffer empty, 1 data present +static bool check_fill_buf(void) +{ + dap_list_t *l_list_begin; + dap_list_t *l_list_end; + pthread_mutex_lock(&s_mutex_add_start); + pthread_mutex_lock(&s_mutex_add_end); + l_list_end = s_list_end; + l_list_begin = s_list_begin; + pthread_mutex_unlock(&s_mutex_add_end); + pthread_mutex_unlock(&s_mutex_add_start); + + bool l_ret = (l_list_begin != l_list_end) ? 1 : 0; +// if(l_ret) +// printf("** Wait s_beg=0x%x s_end=0x%x \n", l_list_begin, l_list_end); + return l_ret; +} + +// wait apply write buffer +static void wait_write_buf() +{ +// printf("** Start wait data\n"); + // wait data + while(1) { + if(!check_fill_buf()) + break; + if(!wait_data(&s_mutex_write_end, &s_cond_write_end, 50)) + break; + } +// printf("** End wait data\n"); +} + +// save data from buffer to database +static int save_write_buf(void) +{ + dap_list_t *l_list_end; + // fix end of buffer + pthread_mutex_lock(&s_mutex_add_end); + l_list_end = s_list_end; + pthread_mutex_unlock(&s_mutex_add_end); + // save data from begin to fixed end + pthread_mutex_lock(&s_mutex_add_start); + if(s_list_begin != l_list_end) { + if(s_drv_callback.transaction_start) + s_drv_callback.transaction_start(); + int cnt = 0; + while(s_list_begin != l_list_end) { + // apply to database + dap_store_obj_t *l_obj = s_list_begin->data; + assert(l_obj); + if(s_drv_callback.apply_store_obj) { + int l_ret_tmp = s_drv_callback.apply_store_obj(l_obj); + if(l_ret_tmp == 1) { + log_it(L_INFO, "item is missing (may be already deleted) %s/%s\n", l_obj->group, l_obj->key); + l_ret = 1; + } + if(l_ret_tmp < 0) { + log_it(L_ERROR, "Can't write item %s/%s\n", l_obj->group, l_obj->key); + l_ret -= 1; + } + /*if(!s_drv_callback.apply_store_obj(l_obj)) { + //log_it(L_INFO, "Write item Ok %s/%s\n", l_obj->group, l_obj->key); + } + else { + log_it(L_ERROR, "Can't write item %s/%s\n", l_obj->group, l_obj->key); + }*/ + } + + s_list_begin = dap_list_next(s_list_begin); +// printf("** ap2*record *l_beg=0x%x l_nex=0x%x d_beg=0x%x l_end=0x%x d_end=0x%x sl_end=0x%x\n", s_list_begin, + // s_list_begin->next, s_list_begin->data, l_list_end, l_list_end->data, s_list_end); + + //printf("** free data=0x%x list=0x%x\n", s_list_begin->prev->data, s_list_begin->prev); + // free memory + dap_store_obj_free((dap_store_obj_t*) s_list_begin->prev->data, 1); + dap_list_free1(s_list_begin->prev); + s_list_begin->prev = NULL; + cnt++; + } + if(s_drv_callback.transaction_end) + s_drv_callback.transaction_end(); + //printf("** writing ended cnt=%d\n", cnt); + // writing ended + pthread_mutex_lock(&s_mutex_write_end); + pthread_cond_broadcast(&s_cond_write_end); + pthread_mutex_unlock(&s_mutex_write_end); + } + pthread_mutex_unlock(&s_mutex_add_start); + return 0; +} + +// thread for save data from buffer to database +static void* func_write_buf(void * arg) +{ + while(1) { + if(!s_write_buf_state) + break; + //save_write_buf + if(save_write_buf() == 0) { + if(!s_write_buf_state) + break; + // wait data + wait_data(&s_mutex_cond, &s_cond_add_end, 2000); // 2 sec + } + } + pthread_exit(0); +} +#endif //USE_WRITE_BUFFER + +int dap_chain_global_db_driver_appy(pdap_store_obj_t a_store_obj, size_t a_store_count) +{ + //dap_store_obj_t *l_store_obj = dap_store_obj_copy(a_store_obj, a_store_count); + if(!a_store_obj || !a_store_count) + return -1; +#ifdef USE_WRITE_BUFFER + // add all records into write buffer + pthread_mutex_lock(&s_mutex_add_end); + for(size_t i = 0; i < a_store_count; i++) { + dap_store_obj_t *l_store_obj_cur = dap_store_obj_copy(a_store_obj + i, 1); + // first record in buf + if(!s_list_end) { + s_list_end = dap_list_append(s_list_end, l_store_obj_cur); + pthread_mutex_lock(&s_mutex_add_start); + s_list_begin = s_list_end; + pthread_mutex_unlock(&s_mutex_add_start); + //log_it(L_DEBUG,"First record in list: *!!add record=0x%x / 0x%x obj=0x%x / 0x%x\n", s_list_end, s_list_end->data, s_list_end->prev); + } + else + s_list_end->data = l_store_obj_cur; + dap_list_append(s_list_end, NULL); + s_list_end = dap_list_last(s_list_end); + //log_it(L_DEBUG, "**+add record l_cur=0x%x / 0x%x l_new=0x%x / 0x%x\n", s_list_end->prev, s_list_end->prev->data,s_list_end, s_list_end->data); + } + // buffer changed + pthread_mutex_lock(&s_mutex_cond); + pthread_cond_broadcast(&s_cond_add_end); + pthread_mutex_unlock(&s_mutex_cond); + pthread_mutex_unlock(&s_mutex_add_end); + return 0; +#else + int l_ret = 0; + // apply to database + if(a_store_count > 1 && s_drv_callback.transaction_start) + s_drv_callback.transaction_start(); + + if(s_drv_callback.apply_store_obj) + for(size_t i = 0; i < a_store_count; i++) { + dap_store_obj_t *l_store_obj_cur = a_store_obj + i; + assert(l_store_obj_cur); + int l_ret_tmp = s_drv_callback.apply_store_obj(l_store_obj_cur); + if(l_ret_tmp == 1) { + log_it(L_INFO, "item is missing (may be already deleted) %s/%s\n", l_store_obj_cur->group, l_store_obj_cur->key); + l_ret = 1; + } + if(l_ret_tmp < 0) { + log_it(L_ERROR, "Can't write item %s/%s\n", l_store_obj_cur->group, l_store_obj_cur->key); + l_ret -= 1; + } + } + + if(a_store_count > 1 && s_drv_callback.transaction_end) + s_drv_callback.transaction_end(); + return l_ret; +#endif + +} + +int dap_chain_global_db_driver_add(pdap_store_obj_t a_store_obj, size_t a_store_count) +{ + for(size_t i = 0; i < a_store_count; i++) + a_store_obj[i].type = 'a'; + return dap_chain_global_db_driver_appy(a_store_obj, a_store_count); +} + +int dap_chain_global_db_driver_delete(pdap_store_obj_t a_store_obj, size_t a_store_count) +{ + for(size_t i = 0; i < a_store_count; i++) + a_store_obj[i].type = 'd'; + return dap_chain_global_db_driver_appy(a_store_obj, a_store_count); +} + +/** + * Read the number of items + * + * a_group - group name + * a_id - from this id + */ +size_t dap_chain_global_db_driver_count(const char *a_group, uint64_t id) +{ + size_t l_count_out = 0; + // read the number of items + if(s_drv_callback.read_count_store) + l_count_out = s_drv_callback.read_count_store(a_group, id); + return l_count_out; +} + +/** + * Get group matching the pattern + * Check whether the groups match the pattern a_group_mask, which is a shell wildcard pattern + * patterns: [] {} [!] * ? + * https://en.wikipedia.org/wiki/Glob_(programming) + * a_group_mask - group mask + */ +dap_list_t* dap_chain_global_db_driver_get_groups_by_mask(const char *a_group_mask) +{ + dap_list_t *l_list = NULL; + if(s_drv_callback.get_groups_by_mask) + l_list = s_drv_callback.get_groups_by_mask(a_group_mask); + return l_list; +} + + +/** + * Read last items + * + * a_group - group name + */ +dap_store_obj_t* dap_chain_global_db_driver_read_last(const char *a_group) +{ + dap_store_obj_t *l_ret = NULL; +#ifdef USE_WRITE_BUFFER + // wait apply write buffer + wait_write_buf(); +#endif + // read records using the selected database engine + if(s_drv_callback.read_last_store_obj) + l_ret = s_drv_callback.read_last_store_obj(a_group); + return l_ret; +} + +/** + * Read several items + * + * a_group - group name + * a_key - key name, may by NULL, it means reading the whole group + * a_id - from this id + * a_count_out[in], how many items to read, 0 - no limits + * a_count_out[out], how many items was read + */ +dap_store_obj_t* dap_chain_global_db_driver_cond_read(const char *a_group, uint64_t id, size_t *a_count_out) +{ + dap_store_obj_t *l_ret = NULL; +#ifdef USE_WRITE_BUFFER + // wait apply write buffer + wait_write_buf(); +#endif + // read records using the selected database engine + if(s_drv_callback.read_cond_store_obj) + l_ret = s_drv_callback.read_cond_store_obj(a_group, id, a_count_out); + return l_ret; +} + +/** + * Read several items + * + * a_group - group name + * a_key - key name, may by NULL, it means reading the whole group + * a_count_out[in], how many items to read, 0 - no limits + * a_count_out[out], how many items was read + */ +dap_store_obj_t* dap_chain_global_db_driver_read(const char *a_group, const char *a_key, size_t *a_count_out) +{ + dap_store_obj_t *l_ret = NULL; +#ifdef USE_WRITE_BUFFER + // wait apply write buffer + wait_write_buf(); +#endif + // read records using the selected database engine + if(s_drv_callback.read_store_obj) + l_ret = s_drv_callback.read_store_obj(a_group, a_key, a_count_out); + return l_ret; +} + +/** + * Check an element in the database + * + * a_group - group name + * a_key - key name + */ +bool dap_chain_global_db_driver_is(const char *a_group, const char *a_key) +{ + bool l_ret = NULL; + // read records using the selected database engine + if(s_drv_callback.is_obj) + l_ret = s_drv_callback.is_obj(a_group, a_key); + return l_ret; +} diff --git a/libdap-chain-global-db/dap_chain_global_db_driver.h b/libdap-chain-global-db/dap_chain_global_db_driver.h new file mode 100755 index 0000000000000000000000000000000000000000..c8d7ef9c5297971699256e5cfe12ed8e68f53e9d --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver.h @@ -0,0 +1,96 @@ +/* + * Authors: + * Alexander Lysikov <alexander.lysikov@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * Kelvin Project https://github.com/kelvinblockchain + * Copyright (c) 2019 + * All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stddef.h> +#include <stdint.h> +#include "dap_common.h" +#include "dap_list.h" + +typedef struct dap_store_obj { + uint64_t id; + time_t timestamp; + uint8_t type; + char *group; + char *key; + //const char *c_group; + const char *c_key; + uint8_t *value; + size_t value_len; +}DAP_ALIGN_PACKED dap_store_obj_t, *pdap_store_obj_t; + +typedef struct dap_store_obj_pkt { + time_t timestamp; + size_t data_size; + uint8_t data[]; +}__attribute__((packed)) dap_store_obj_pkt_t; + +typedef int (*dap_db_driver_write_callback_t)(dap_store_obj_t*); +typedef dap_store_obj_t* (*dap_db_driver_read_callback_t)(const char *,const char *, size_t *); +typedef dap_store_obj_t* (*dap_db_driver_read_cond_callback_t)(const char *,uint64_t , size_t *); +typedef dap_store_obj_t* (*dap_db_driver_read_last_callback_t)(const char *); +typedef size_t (*dap_db_driver_read_count_callback_t)(const char *,uint64_t); +typedef dap_list_t* (*dap_db_driver_get_groups_callback_t)(const char *); +typedef bool (*dap_db_driver_is_obj_callback_t)(const char *, const char *); +typedef int (*dap_db_driver_callback_t)(void); + +typedef struct dap_db_driver_callbacks { + dap_db_driver_write_callback_t apply_store_obj; + dap_db_driver_read_callback_t read_store_obj; + dap_db_driver_read_last_callback_t read_last_store_obj; + dap_db_driver_read_cond_callback_t read_cond_store_obj; + dap_db_driver_read_count_callback_t read_count_store; + dap_db_driver_get_groups_callback_t get_groups_by_mask; + dap_db_driver_is_obj_callback_t is_obj; + dap_db_driver_callback_t transaction_start; + dap_db_driver_callback_t transaction_end; + dap_db_driver_callback_t deinit; + dap_db_driver_callback_t flush; +} dap_db_driver_callbacks_t; + + +int dap_db_driver_init(const char *driver_name, const char *a_filename_db); +void dap_db_driver_deinit(void); + +dap_store_obj_t* dap_store_obj_copy(dap_store_obj_t *a_store_obj, size_t a_store_count); +void dap_store_obj_free(dap_store_obj_t *a_store_obj, size_t a_store_count); +int dap_db_driver_flush(void); + +char* dap_chain_global_db_driver_hash(const uint8_t *data, size_t data_size); + +int dap_chain_global_db_driver_appy(pdap_store_obj_t a_store_obj, size_t a_store_count); +int dap_chain_global_db_driver_add(pdap_store_obj_t a_store_obj, size_t a_store_count); +int dap_chain_global_db_driver_delete(pdap_store_obj_t a_store_obj, size_t a_store_count); +dap_store_obj_t* dap_chain_global_db_driver_read_last(const char *a_group); +dap_store_obj_t* dap_chain_global_db_driver_cond_read(const char *a_group, uint64_t id, size_t *a_count_out); +dap_store_obj_t* dap_chain_global_db_driver_read(const char *a_group, const char *a_key, size_t *count_out); +bool dap_chain_global_db_driver_is(const char *a_group, const char *a_key); +size_t dap_chain_global_db_driver_count(const char *a_group, uint64_t id); +dap_list_t* dap_chain_global_db_driver_get_groups_by_mask(const char *a_group_mask); + +dap_store_obj_pkt_t *dap_store_packet_multiple(pdap_store_obj_t a_store_obj, + time_t a_timestamp, size_t a_store_obj_count); +dap_store_obj_t *dap_store_unpacket_multiple(const dap_store_obj_pkt_t *a_pkt, + size_t *a_store_obj_count); diff --git a/libdap-chain-global-db/dap_chain_global_db_driver_cdb.c b/libdap-chain-global-db/dap_chain_global_db_driver_cdb.c new file mode 100644 index 0000000000000000000000000000000000000000..d6a19b116b4d09c61ef1858ac49c5f8b68fd977f --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver_cdb.c @@ -0,0 +1,534 @@ +/* + * Authors: + * Konstantin Papizh <konstantin.papizh@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * Kelvin Project https://github.com/kelvinblockchain + * Copyright (c) 2019 + * All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <string.h> +#include <dirent.h> +#include <pthread.h> +#include <sys/stat.h> +#include <uthash.h> +#define _GNU_SOURCE +#include <fnmatch.h> + +#include "dap_common.h" +#include "dap_hash.h" +#include "dap_strfuncs.h" +#include "dap_chain_global_db_driver_cdb.h" +#include "dap_file_utils.h" + +#define LOG_TAG "dap_chain_global_db_cdb" + +typedef struct _obj_arg { + pdap_store_obj_t o; + uint64_t q; + uint64_t n; + uint64_t id; +} obj_arg, *pobj_arg; + +typedef struct _cdb_instance { + CDB *cdb; + char *local_group; + uint64_t id; + UT_hash_handle hh; +} cdb_instance, *pcdb_instance; + +static char *s_cdb_path = NULL; +static pcdb_instance s_cdb = NULL; +static pthread_mutex_t cdb_mutex = PTHREAD_MUTEX_INITIALIZER; + +static inline void dap_cdb_uint_to_hex(char *arr, uint64_t val, short size) { + short i = 0; + for (i = 0; i < size; ++i) { + arr[i] = (char)(((uint64_t) val >> (8 * (size - 1 - i))) & 0xFFu); + } +} + +static inline uint64_t dap_cdb_hex_to_uint(const char *arr, short size) { + uint64_t val = 0; + short i = 0; + for (i = 0; i < size; ++i){ + uint8_t byte = (uint8_t) *arr++; + /*if (byte >= 'a' && byte <='f'){ + byte = byte - 'a' + 10; + } else if (byte >= 'A' && byte <='F') { + byte = byte - 'A' + 10; + }*/ + val = (val << 8) | (byte & 0xFFu); + } + return val; +} + +static void cdb_serialize_val_to_dap_store_obj(pdap_store_obj_t a_obj, const char *key, const char *val) { + if (!key || !val) { + a_obj = NULL; + return; + } + int offset = 0; + a_obj->key = dap_strdup(key); + a_obj->id = dap_cdb_hex_to_uint(val, sizeof(uint64_t)); + offset += sizeof(uint64_t); + a_obj->value_len = dap_cdb_hex_to_uint(val + offset, sizeof(unsigned long)); + offset += sizeof(unsigned long); + a_obj->value = DAP_NEW_SIZE(uint8_t, a_obj->value_len); + memcpy(a_obj->value, val + offset, a_obj->value_len); + offset += a_obj->value_len; + a_obj->timestamp = (time_t)dap_cdb_hex_to_uint(val + offset, sizeof(time_t)); +} + +bool dap_cdb_get_last_obj_iter_callback(void *arg, const char *key, int ksize, const char *val, int vsize, uint32_t expire, uint64_t oid) { + UNUSED(ksize); + UNUSED(val); + UNUSED(vsize); + UNUSED(expire); + UNUSED(oid); + + if (--((pobj_arg)arg)->q == 0) { + cdb_serialize_val_to_dap_store_obj((pdap_store_obj_t)(((pobj_arg)arg)->o), key, val); + return false; + } + return true; +} + +bool dap_cdb_get_some_obj_iter_callback(void *arg, const char *key, int ksize, const char *val, int vsize, uint32_t expire, uint64_t oid) { + UNUSED(ksize); + UNUSED(val); + UNUSED(vsize); + UNUSED(expire); + UNUSED(oid); + + pdap_store_obj_t l_obj = (pdap_store_obj_t)((pobj_arg)arg)->o; + cdb_serialize_val_to_dap_store_obj(&l_obj[((pobj_arg)arg)->n - ((pobj_arg)arg)->q], key, val); + if (--((pobj_arg)arg)->q == 0) { + return false; + } + return true; +} + +bool dap_cdb_get_cond_obj_iter_callback(void *arg, const char *key, int ksize, const char *val, int vsize, uint32_t expire, uint64_t oid) { + UNUSED(ksize); + UNUSED(val); + UNUSED(vsize); + UNUSED(expire); + UNUSED(oid); + + if (dap_cdb_hex_to_uint(val, sizeof(uint64_t)) < ((pobj_arg)arg)->id) { + return true; + } + pdap_store_obj_t l_obj = (pdap_store_obj_t)((pobj_arg)arg)->o; + cdb_serialize_val_to_dap_store_obj(&l_obj[((pobj_arg)arg)->n - ((pobj_arg)arg)->q], key, val); + if (--((pobj_arg)arg)->q == 0) { + return false; + } + return true; +} + +pcdb_instance dap_cdb_init_group(char *a_group, int a_flags) { + pcdb_instance l_cdb_i = NULL; + pthread_mutex_lock(&cdb_mutex); + char l_cdb_path[strlen(s_cdb_path) + strlen(a_group) + 2]; + HASH_FIND_STR(s_cdb, a_group, l_cdb_i); + if (l_cdb_i && !(a_flags & (1 << 1))) { + goto FIN; + } + l_cdb_i = DAP_NEW(cdb_instance); + l_cdb_i->local_group = dap_strdup(a_group); + l_cdb_i->cdb = cdb_new(); + memset(l_cdb_path, '\0', sizeof(l_cdb_path)); + dap_snprintf(l_cdb_path, sizeof(l_cdb_path), "%s/%s", s_cdb_path, a_group); + cdb_options l_opts = { 1000000, 128, 1024 }; + if (cdb_option(l_cdb_i->cdb, l_opts.hsize, l_opts.pcacheMB, l_opts.rcacheMB) != CDB_SUCCESS) { + log_it(L_ERROR, "Options are inacceptable: \"%s\"", cdb_errmsg(cdb_errno(l_cdb_i->cdb))); + goto ERR; + } + if (cdb_open(l_cdb_i->cdb, l_cdb_path, a_flags) != CDB_SUCCESS) { + log_it(L_ERROR, "An error occured while opening CDB: \"%s\"", cdb_errmsg(cdb_errno(l_cdb_i->cdb))); + goto ERR; + } + if (!(a_flags & (1 << 1))) { + CDBSTAT l_cdb_stat; + cdb_stat(l_cdb_i->cdb, &l_cdb_stat); + if (l_cdb_stat.rnum > 0) { + void *l_iter = cdb_iterate_new(l_cdb_i->cdb, 0); + obj_arg l_arg; + l_arg.o = DAP_NEW_Z(dap_store_obj_t); + l_arg.q = l_cdb_stat.rnum; + cdb_iterate(l_cdb_i->cdb, dap_cdb_get_last_obj_iter_callback, (void*)&l_arg, l_iter); + cdb_iterate_destroy(l_cdb_i->cdb, l_iter); + l_cdb_i->id = l_arg.o->id; + log_it(L_INFO, "Group \"%s\" found" , l_cdb_i->local_group); + log_it(L_INFO, "Records: %-24u" , l_cdb_stat.rnum); + log_it(L_INFO, "Average read latency: %-24u" , l_cdb_stat.rlatcy); + log_it(L_INFO, "Average write latency: %-24u" , l_cdb_stat.wlatcy); + log_it(L_INFO, "Last id: %-24u" , l_cdb_i->id); + DAP_DELETE(l_arg.o); + } else { + log_it(L_INFO, "Group \"%s\" created" , l_cdb_i->local_group); + l_cdb_i->id = 0; + } + HASH_ADD_KEYPTR(hh, s_cdb, l_cdb_i->local_group, strlen(l_cdb_i->local_group), l_cdb_i); + } else { + log_it(L_INFO, "Group \"%s\" truncated" , l_cdb_i->local_group); + l_cdb_i->id = 0; + } + +FIN: + pthread_mutex_unlock(&cdb_mutex); + return l_cdb_i; +ERR: + cdb_destroy(l_cdb_i->cdb); + DAP_DELETE(l_cdb_i->local_group); + DAP_DELETE(l_cdb_i); + pthread_mutex_unlock(&cdb_mutex); + return NULL; +} + +int dap_db_driver_cdb_init(const char *a_cdb_path, dap_db_driver_callbacks_t *a_drv_callback) { + s_cdb_path = dap_strdup(a_cdb_path); + if(s_cdb_path[strlen(s_cdb_path)] == '/') { + s_cdb_path[strlen(s_cdb_path)] = '\0'; + } + dap_mkdir_with_parents(s_cdb_path); + struct dirent *d; + DIR *dir = opendir(s_cdb_path); + if (!dir) { + log_it(L_ERROR, "Couldn't open db directory"); + return -1; + } + for (d = readdir(dir); d; d = readdir(dir)) { + if (!dap_strcmp(d->d_name, ".") || !dap_strcmp(d->d_name, "..")) { + continue; + } + pcdb_instance l_cdb_i = dap_cdb_init_group(d->d_name, CDB_CREAT | CDB_PAGEWARMUP); + if (!l_cdb_i) { + dap_db_driver_cdb_deinit(); + closedir(dir); + return -2; + } + } + a_drv_callback->read_last_store_obj = dap_db_driver_cdb_read_last_store_obj; + a_drv_callback->apply_store_obj = dap_db_driver_cdb_apply_store_obj; + a_drv_callback->read_store_obj = dap_db_driver_cdb_read_store_obj; + a_drv_callback->read_cond_store_obj = dap_db_driver_cdb_read_cond_store_obj; + a_drv_callback->read_count_store = dap_db_driver_cdb_read_count_store; + a_drv_callback->get_groups_by_mask = dap_db_driver_cdb_get_groups_by_mask; + a_drv_callback->is_obj = dap_db_driver_cdb_is_obj; + a_drv_callback->deinit = dap_db_driver_cdb_deinit; + a_drv_callback->flush = dap_db_driver_cdb_flush; + + closedir(dir); + return CDB_SUCCESS; +} + +pcdb_instance dap_cdb_get_db_by_group(const char *a_group) { + pcdb_instance l_cdb_i = NULL; + pthread_mutex_lock(&cdb_mutex); + HASH_FIND_STR(s_cdb, a_group, l_cdb_i); + pthread_mutex_unlock(&cdb_mutex); + return l_cdb_i; +} + +int dap_cdb_add_group(const char *a_group) { + char l_cdb_path[strlen(s_cdb_path) + strlen(a_group) + 2]; + memset(l_cdb_path, '\0', sizeof(l_cdb_path)); + dap_snprintf(l_cdb_path, sizeof(l_cdb_path), "%s/%s", s_cdb_path, a_group); +#ifdef _WIN32 + mkdir(l_cdb_path); +#else + mkdir(l_cdb_path, 0755); +#endif + return 0; +} + +int dap_db_driver_cdb_deinit() { + pcdb_instance cur_cdb, tmp; + pthread_mutex_lock(&cdb_mutex); + HASH_ITER(hh, s_cdb, cur_cdb, tmp) { + DAP_DELETE(cur_cdb->local_group); + cdb_destroy(cur_cdb->cdb); + HASH_DEL(s_cdb, cur_cdb); + DAP_DELETE(cur_cdb); + } + pthread_mutex_unlock(&cdb_mutex); + if (s_cdb_path) { + DAP_DELETE(s_cdb_path); + } + return CDB_SUCCESS; +} + +int dap_db_driver_cdb_flush(void) { + int ret = 0; + log_it(L_INFO, "Flushing CDB to disk"); + cdb_instance *cur_cdb, *tmp; + pthread_mutex_lock(&cdb_mutex); + HASH_ITER(hh, s_cdb, cur_cdb, tmp) { + cdb_close(cur_cdb->cdb); + char l_cdb_path[strlen(s_cdb_path) + strlen(cur_cdb->local_group) + 2]; + memset(l_cdb_path, '\0', sizeof(l_cdb_path)); + dap_snprintf(l_cdb_path, sizeof(l_cdb_path), "%s/%s", s_cdb_path, cur_cdb->local_group); +// Re-application of options might be required + cdb_options l_opts = { 1000000, 128, 1024 }; + if (cdb_option(cur_cdb->cdb, l_opts.hsize, l_opts.pcacheMB, l_opts.rcacheMB) != CDB_SUCCESS) { + log_it(L_ERROR, "Options are inacceptable: \"%s\"", cdb_errmsg(cdb_errno(cur_cdb->cdb))); + ret = -1; + goto RET; + } + if(cdb_open(cur_cdb->cdb, l_cdb_path, CDB_CREAT | CDB_PAGEWARMUP) != CDB_SUCCESS) { + log_it(L_ERROR, "An error occured while opening CDB: \"%s\"", cdb_errmsg(cdb_errno(cur_cdb->cdb))); + ret = -2; + goto RET; + } + } + log_it(L_INFO, "All data dumped"); +RET: + pthread_mutex_unlock(&cdb_mutex); + return ret; +} + +dap_store_obj_t *dap_db_driver_cdb_read_last_store_obj(const char* a_group) { + if (!a_group) { + return NULL; + } + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_group); + if (!l_cdb_i) { + return NULL; + } + CDB *l_cdb = l_cdb_i->cdb; + CDBSTAT l_cdb_stat; + cdb_stat(l_cdb, &l_cdb_stat); + void *l_iter = cdb_iterate_new(l_cdb, 0); + obj_arg l_arg; + l_arg.o = DAP_NEW_Z(dap_store_obj_t); + l_arg.q = l_cdb_stat.rnum; + cdb_iterate(l_cdb, dap_cdb_get_last_obj_iter_callback, (void*)&l_arg, l_iter); + cdb_iterate_destroy(l_cdb, l_iter); + l_arg.o->group = dap_strdup(a_group); + return l_arg.o; +} + +bool dap_db_driver_cdb_is_obj(const char *a_group, const char *a_key) +{ + bool l_ret = false; + if(!a_group) { + return false; + } + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_group); + if(!l_cdb_i) { + return false; + } + CDB *l_cdb = l_cdb_i->cdb; + if(a_key) { + //int l_vsize; + if(!cdb_is(l_cdb, a_key, (int) dap_strlen(a_key))) + l_ret = true; + } + return l_ret; +} + +dap_store_obj_t *dap_db_driver_cdb_read_store_obj(const char *a_group, const char *a_key, size_t *a_count_out) { + if (!a_group) { + return NULL; + } + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_group); + if (!l_cdb_i) { + return NULL; + } + CDB *l_cdb = l_cdb_i->cdb; + dap_store_obj_t *l_obj = NULL; + if (a_key) { + char *l_value; + int l_vsize; + cdb_get(l_cdb, a_key, (int)strlen(a_key), (void**)&l_value, &l_vsize); + if (!l_value) { + return NULL; + } + l_obj = DAP_NEW_Z(dap_store_obj_t); + cdb_serialize_val_to_dap_store_obj(l_obj, a_key, l_value); + l_obj->group = dap_strdup(a_group); + cdb_free_val((void**)&l_value); + if(a_count_out) { + *a_count_out = 1; + } + } else { + uint64_t l_count_out = 0; + if(a_count_out) { + l_count_out = *a_count_out; + } + CDBSTAT l_cdb_stat; + cdb_stat(l_cdb, &l_cdb_stat); + if ((l_count_out == 0) || (l_count_out > l_cdb_stat.rnum)) { + l_count_out = l_cdb_stat.rnum; + } + obj_arg l_arg; + l_arg.o = DAP_NEW_Z_SIZE(dap_store_obj_t, l_count_out * sizeof(dap_store_obj_t)); + l_arg.q = l_count_out; + l_arg.n = l_count_out; + void *l_iter = cdb_iterate_new(l_cdb, 0); + /*l_count_out = */cdb_iterate(l_cdb, dap_cdb_get_some_obj_iter_callback, (void*)&l_arg, l_iter); + cdb_iterate_destroy(l_cdb, l_iter); + if(a_count_out) { + *a_count_out = l_count_out; + } + for (uint64_t i = 0; i < l_count_out; ++i) { + l_arg.o[i].group = dap_strdup(a_group); + } + l_obj = l_arg.o; + } + return l_obj; +} + +dap_store_obj_t* dap_db_driver_cdb_read_cond_store_obj(const char *a_group, uint64_t a_id, size_t *a_count_out) { + if (!a_group) { + return NULL; + } + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_group); + if (!l_cdb_i) { + return NULL; + } + CDB *l_cdb = l_cdb_i->cdb; + uint64_t l_count_out = 0; + if(a_count_out) { + l_count_out = *a_count_out; + } + CDBSTAT l_cdb_stat; + cdb_stat(l_cdb, &l_cdb_stat); + + if (l_count_out == 0 || l_count_out > l_cdb_stat.rnum) { + l_count_out = l_cdb_stat.rnum; + } + obj_arg l_arg; + l_arg.o = DAP_NEW_Z_SIZE(dap_store_obj_t, l_count_out * sizeof(dap_store_obj_t)); + l_arg.n = l_count_out; + l_arg.q = l_count_out; + l_arg.id = a_id; + void *l_iter = cdb_iterate_new(l_cdb, 0); + /*l_count_out = */cdb_iterate(l_cdb, dap_cdb_get_cond_obj_iter_callback, (void*)&l_arg, l_iter); + cdb_iterate_destroy(l_cdb, l_iter); + if (l_arg.q > 0) { + l_count_out = l_arg.n - l_arg.q; + void *tmp = DAP_REALLOC(l_arg.o, l_count_out * sizeof(dap_store_obj_t)); + if (!tmp && l_count_out) { + log_it(L_CRITICAL, "Couldn't re-allocate memory for portion of store objects!"); + DAP_DELETE(l_arg.o); + return NULL; + } + l_arg.o = tmp; + } + if(a_count_out) { + *a_count_out = l_count_out; + } + for (uint64_t i = 0; i < l_count_out; ++i) { + l_arg.o[i].group = dap_strdup(a_group); + } + return l_arg.o; +} + +size_t dap_db_driver_cdb_read_count_store(const char *a_group, uint64_t a_id) +{ + if(!a_group) { + return 0; + } + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_group); + if(!l_cdb_i) { + return 0; + } + CDB *l_cdb = l_cdb_i->cdb; + CDBSTAT l_cdb_stat; + cdb_stat(l_cdb, &l_cdb_stat); + if(a_id > l_cdb_stat.rnum) + return 0; + return (size_t) l_cdb_stat.rnum - a_id + 1; +} + +/** + * Check whether the groups match the pattern a_group_mask, which is a shell wildcard pattern + */ +dap_list_t* dap_db_driver_cdb_get_groups_by_mask(const char *a_group_mask) +{ + dap_list_t *l_ret_list = NULL; + if(!a_group_mask) + return NULL; + cdb_instance *cur_cdb, *tmp; + pthread_mutex_lock(&cdb_mutex); + HASH_ITER(hh, s_cdb, cur_cdb, tmp) + { + if(!fnmatch(a_group_mask, cur_cdb->local_group, 0)) + if(fnmatch("*.del", cur_cdb->local_group, 0)) + l_ret_list = dap_list_prepend(l_ret_list, dap_strdup(cur_cdb->local_group)); + } + pthread_mutex_unlock(&cdb_mutex); + return l_ret_list; +} + +int dap_db_driver_cdb_apply_store_obj(pdap_store_obj_t a_store_obj) { + if(!a_store_obj || !a_store_obj->group) { + return -1; + } + int ret = 0; + pcdb_instance l_cdb_i = dap_cdb_get_db_by_group(a_store_obj->group); + if (!l_cdb_i) { + dap_cdb_add_group(a_store_obj->group); + l_cdb_i = dap_cdb_init_group(a_store_obj->group, CDB_CREAT | CDB_PAGEWARMUP); + } + if (!l_cdb_i) { + return -1; + } + if(a_store_obj->type == 'a') { + if(!a_store_obj->key) {// || !a_store_obj->value || !a_store_obj->value_len){ + return -2; + } + cdb_record l_rec; + l_rec.key = dap_strdup(a_store_obj->key); + int offset = 0; + char *l_val = DAP_NEW_Z_SIZE(char, sizeof(uint64_t) + sizeof(unsigned long) + a_store_obj->value_len + sizeof(time_t)); + dap_cdb_uint_to_hex(l_val, ++l_cdb_i->id, sizeof(uint64_t)); + offset += sizeof(uint64_t); + dap_cdb_uint_to_hex(l_val + offset, a_store_obj->value_len, sizeof(unsigned long)); + offset += sizeof(unsigned long); + if(a_store_obj->value && a_store_obj->value_len){ + memcpy(l_val + offset, a_store_obj->value, a_store_obj->value_len); + } + offset += a_store_obj->value_len; + unsigned long l_time = (unsigned long)a_store_obj->timestamp; + dap_cdb_uint_to_hex(l_val + offset, l_time, sizeof(time_t)); + offset += sizeof(time_t); + l_rec.val = l_val; + if (cdb_set2(l_cdb_i->cdb, l_rec.key, (int)strlen(l_rec.key), l_rec.val, offset, CDB_INSERTCACHE | CDB_OVERWRITE, 0) != CDB_SUCCESS) { + log_it(L_ERROR, "Couldn't add record with key [%s] to CDB: \"%s\"", l_rec.key, cdb_errmsg(cdb_errno(l_cdb_i->cdb))); + ret = -1; + } + DAP_DELETE(l_rec.key); + DAP_DELETE(l_rec.val); + } else if(a_store_obj->type == 'd') { + if(a_store_obj->key) { + if(cdb_del(l_cdb_i->cdb, a_store_obj->key, (int) strlen(a_store_obj->key)) == -3) + ret = 1; + } else { + cdb_destroy(l_cdb_i->cdb); + if (!dap_cdb_init_group(a_store_obj->group, CDB_TRUNC | CDB_PAGEWARMUP)) { + ret = -1; + } + } + } + return ret; +} diff --git a/libdap-chain-global-db/dap_chain_global_db_driver_cdb.h b/libdap-chain-global-db/dap_chain_global_db_driver_cdb.h new file mode 100644 index 0000000000000000000000000000000000000000..f3e036f19f91e333d5a5797632b788523db3ee07 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver_cdb.h @@ -0,0 +1,51 @@ +/* + * Authors: + * Konstantin Papizh <konstantin.papizh@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * Kelvin Project https://github.com/kelvinblockchain + * Copyright (c) 2019 + * All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "cuttdb.h" +#include "dap_chain_global_db_driver.h" +#define UNUSED(x) (void)(x) + +typedef struct _cdb_options { + int hsize; // Main hash table size, 1%-10% of total records, immutable + int rcacheMB; // Record cache in MBytes + int pcacheMB; // Index page cache in MBytes +} cdb_options, *pcdb_options; + +typedef struct _cdb_record { + char *key; + char *val; +} cdb_record, *pcdb_record; + +int dap_db_driver_cdb_init(const char*, dap_db_driver_callbacks_t*); +int dap_db_driver_cdb_deinit(); +int dap_db_driver_cdb_flush(void); + +int dap_db_driver_cdb_apply_store_obj(pdap_store_obj_t); + +dap_store_obj_t *dap_db_driver_cdb_read_last_store_obj(const char*); +dap_store_obj_t *dap_db_driver_cdb_read_store_obj(const char*, const char*, size_t*); +size_t dap_db_driver_cdb_read_count_store(const char *a_group, uint64_t a_id); +dap_list_t* dap_db_driver_cdb_get_groups_by_mask(const char *a_group_mask); +dap_store_obj_t* dap_db_driver_cdb_read_cond_store_obj(const char*, uint64_t, size_t*); +bool dap_db_driver_cdb_is_obj(const char *a_group, const char *a_key); diff --git a/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.c b/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.c new file mode 100755 index 0000000000000000000000000000000000000000..e79d6f49844cc679e314f42abc359305b675ddf7 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.c @@ -0,0 +1,780 @@ +/* + * Authors: + * Dmitriy A. Gearasimov <gerasimov.dmitriy@demlabs.net> + * Alexander Lysikov <alexander.lysikov@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * CellFrame https://cellframe.net + * Sources https://gitlab.demlabs.net/cellframe + * Copyright (c) 2017-2019 + * All rights reserved. + + This file is part of CellFrame SDK the open source project + + CellFrame SDK is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + CellFrame SDK is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any CellFrame SDK based project. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stddef.h> +#include <string.h> +#ifdef DAP_OS_UNIX +#include <unistd.h> +#endif +#include "dap_common.h" +#include "dap_hash.h" +#include "dap_strfuncs.h" +#include "dap_chain_global_db_driver_sqlite.h" + +#define LOG_TAG "db_sqlite" + +static sqlite3 *s_db = NULL; +static char *s_filename_db = NULL; + +// Value of one field in the table +typedef struct _SQLITE_VALUE_ +{ + int32_t len; + char type; + /* + #define SQLITE_INTEGER 1 + #define SQLITE_FLOAT 2 + #define SQLITE_TEXT 3 + #define SQLITE_BLOB 4 + #define SQLITE_NULL 5 + */ + uint8_t reserv[3]; + union + { + int val_int; + long long val_int64; + double val_float; + const char *val_str; + const unsigned char *val_blob; + } val; +} SQLITE_VALUE; + +// Content of one row in the table +typedef struct _SQLITE_ROW_VALUE_ +{ + int count; // number of columns in a row + int reserv; + SQLITE_VALUE *val; // array of field values +} SQLITE_ROW_VALUE; + +static int dap_db_driver_sqlite_exec(sqlite3 *l_db, const char *l_query, char **l_error_message); + +/** + * SQLite library initialization, no thread safe + * + * return 0 if Ok, else error code >0 + */ +int dap_db_driver_sqlite_init(const char *a_filename_db, dap_db_driver_callbacks_t *a_drv_callback) +{ + int l_ret = -1; + if(sqlite3_threadsafe() && !sqlite3_config(SQLITE_CONFIG_SERIALIZED)) + l_ret = sqlite3_initialize(); + if(l_ret != SQLITE_OK) { + log_it(L_ERROR, "Can't init sqlite err=%d", l_ret); + return l_ret; + } + char *l_error_message = NULL; + s_db = dap_db_driver_sqlite_open(a_filename_db, SQLITE_OPEN_READWRITE, &l_error_message); + if(!s_db) { + log_it(L_ERROR, "Can't init sqlite err=%d", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + } + else { + if(!dap_db_driver_sqlite_set_pragma(s_db, "synchronous", "NORMAL")) // 0 | OFF | 1 | NORMAL | 2 | FULL + printf("can't set new synchronous mode\n"); + if(!dap_db_driver_sqlite_set_pragma(s_db, "journal_mode", "OFF")) // DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + printf("can't set new journal mode\n"); + + if(!dap_db_driver_sqlite_set_pragma(s_db, "page_size", "1024")) // DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + printf("can't set page_size\n"); + // *PRAGMA page_size = bytes; // page size DB; it is reasonable to make it equal to the size of the disk cluster 4096 + // *PRAGMA cache_size = -kibibytes; // by default it is equal to 2000 pages of database +// + a_drv_callback->apply_store_obj = dap_db_driver_sqlite_apply_store_obj; + a_drv_callback->read_store_obj = dap_db_driver_sqlite_read_store_obj; + a_drv_callback->read_cond_store_obj = dap_db_driver_sqlite_read_cond_store_obj; + a_drv_callback->read_last_store_obj = dap_db_driver_sqlite_read_last_store_obj; + a_drv_callback->transaction_start = dap_db_driver_sqlite_start_transaction; + a_drv_callback->transaction_end = dap_db_driver_sqlite_end_transaction; + a_drv_callback->deinit = dap_db_driver_sqlite_deinit; + a_drv_callback->flush = dap_db_driver_sqlite_flush; + s_filename_db = strdup(a_filename_db); + } + return l_ret; +} + +int dap_db_driver_sqlite_deinit(void) +{ + dap_db_driver_sqlite_close(s_db); + s_db = NULL; + return sqlite3_shutdown(); +} + +// additional function for sqlite to convert byte to number +static void byte_to_bin(sqlite3_context *l_context, int a_argc, sqlite3_value **a_argv) +{ + const unsigned char *l_text; + if(a_argc != 1) + sqlite3_result_null(l_context); + l_text = (const unsigned char *) sqlite3_value_blob(a_argv[0]); + if(l_text && l_text[0]) + { + int l_result = (int) l_text[0]; + sqlite3_result_int(l_context, l_result); + return; + } + sqlite3_result_null(l_context); +} + +/** + * Open SQLite database + * a_filename_utf8 - database file name + * a_flags - database access flags (SQLITE_OPEN_READONLY, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE) + * a_error_message[out] - Error messages (the memory requires deletion via sqlite_free ()) + * + * return: database identifier, NULL when an error occurs. + */ +sqlite3* dap_db_driver_sqlite_open(const char *a_filename_utf8, int a_flags, char **a_error_message) +{ + sqlite3 *l_db = NULL; + + int l_rc = sqlite3_open_v2(a_filename_utf8, &l_db, a_flags | SQLITE_OPEN_FULLMUTEX, NULL); + // if unable to open the database file + if(l_rc == SQLITE_CANTOPEN) { + sqlite3_close(l_db); + // try to create database + l_rc = sqlite3_open_v2(a_filename_utf8, &l_db, a_flags | SQLITE_OPEN_FULLMUTEX | SQLITE_OPEN_CREATE, NULL); + } + if(l_rc != SQLITE_OK) + { + if(a_error_message) + *a_error_message = sqlite3_mprintf("Can't open database: %s\n", sqlite3_errmsg(l_db)); + sqlite3_close(l_db); + return NULL; + } + // added user functions + sqlite3_create_function(l_db, "byte_to_bin", 1, SQLITE_UTF8, NULL, &byte_to_bin, NULL, NULL); + return l_db; +} + +/** + * Close the database + */ +void dap_db_driver_sqlite_close(sqlite3 *l_db) +{ + if(l_db) + sqlite3_close(l_db); +} +/* + * Clear the memory allocated via sqlite3_mprintf() + */ +void dap_db_driver_sqlite_free(char *memory) +{ + if(memory) + sqlite3_free(memory); +} + +/** + * Set specific pragma statements + * www.sqlite.org/pragma.html + * + *PRAGMA page_size = bytes; // page size DB; it is reasonable to make it equal to the size of the disk cluster 4096 + *PRAGMA cache_size = -kibibytes; // by default it is equal to 2000 pages of database + *PRAGMA encoding = "UTF-8"; // default = UTF-8 + *PRAGMA foreign_keys = 1; // default = 0 + *PRAGMA journal_mode = DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF; + *PRAGMA synchronous = 0 | OFF | 1 | NORMAL | 2 | FULL; + */ +bool dap_db_driver_sqlite_set_pragma(sqlite3 *a_db, char *a_param, char *a_mode) +{ + if(!a_param || !a_mode) + { + printf("[sqlite_set_pragma] err!!! no param or mode\n"); + return false; + } + char *l_str_query = sqlite3_mprintf("PRAGMA %s = %s", a_param, a_mode); + int l_rc = dap_db_driver_sqlite_exec(a_db, l_str_query, NULL); // default synchronous=FULL + sqlite3_free(l_str_query); + if(l_rc == SQLITE_OK) + return true; + return false; +} + +int dap_db_driver_sqlite_flush() +{ + log_it(L_DEBUG, "Start flush sqlite data base."); + dap_db_driver_sqlite_close(s_db); + char *l_error_message; + s_db = dap_db_driver_sqlite_open(s_filename_db, SQLITE_OPEN_READWRITE, &l_error_message); + if(!s_db) { + log_it(L_ERROR, "Can't init sqlite err=%d", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + return -3; + } +#ifndef _WIN32 + sync(); +#endif + if(!dap_db_driver_sqlite_set_pragma(s_db, "synchronous", "NORMAL")) // 0 | OFF | 1 | NORMAL | 2 | FULL + log_it(L_WARNING, "Can't set new synchronous mode\n"); + if(!dap_db_driver_sqlite_set_pragma(s_db, "journal_mode", "OFF")) // DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + log_it(L_WARNING, "Can't set new journal mode\n"); + + if(!dap_db_driver_sqlite_set_pragma(s_db, "page_size", "1024")) // DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + log_it(L_WARNING, "Can't set page_size\n"); + return 0; +} + +/** + * Execute SQL query to database that does not return data + * + * return 0 if Ok, else error code >0 + */ +static int dap_db_driver_sqlite_exec(sqlite3 *l_db, const char *l_query, char **l_error_message) +{ + char *l_zErrMsg = NULL; + int l_rc = sqlite3_exec(l_db, l_query, NULL, 0, &l_zErrMsg); + //printf("%s\n",l_query); + if(l_rc != SQLITE_OK) + { + if(l_error_message && l_zErrMsg) + *l_error_message = sqlite3_mprintf("SQL error: %s", l_zErrMsg); + if(l_zErrMsg) + sqlite3_free(l_zErrMsg); + return l_rc; + } + if(l_zErrMsg) + sqlite3_free(l_zErrMsg); + return l_rc; +} + +/** + * Create table + * + * return 0 if Ok, else error code + */ +static int dap_db_driver_sqlite_create_group_table(const char *a_table_name) +{ + char *l_error_message = NULL; + if(!s_db || !a_table_name) + return -1; + char *l_query = + dap_strdup_printf( + "create table if not exists '%s'(id INTEGER NOT NULL PRIMARY KEY, key TEXT KEY, hash BLOB, ts INTEGER KEY, value BLOB)", + a_table_name); + if(dap_db_driver_sqlite_exec(s_db, (const char*) l_query, &l_error_message) != SQLITE_OK) + { + log_it(L_ERROR, "Creatу_table : %s\n", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + DAP_DELETE(l_query); + return -1; + } + DAP_DELETE(l_query); + // create unique index - key + l_query = dap_strdup_printf("create unique index if not exists 'idx_key_%s' ON '%s' (key)", a_table_name, + a_table_name); + if(dap_db_driver_sqlite_exec(s_db, (const char*) l_query, &l_error_message) != SQLITE_OK) { + log_it(L_ERROR, "Create unique index : %s\n", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + DAP_DELETE(l_query); + return -1; + } + DAP_DELETE(l_query); + return 0; +} + +/** + * Prepare SQL query for database + * l_query [in] SQL-string with a query to database, example: + * SELECT * FROM data + * SELECT id, sd FROM data LIMIT 300 + * SELECT id, sd FROM data ORDER BY id ASC/DESC + * SELECT * FROM data WHERE time>449464766900000 and time<449464766910000" + * SELECT * FROM data WHERE hex(sd) LIKE '%370%' + * hex(x'0806') -> '08f6' или quote(sd) -> X'08f6' + * substr(x'031407301210361320690000',3,2) -> x'0730' + * + * CAST(substr(sd,5,2) as TEXT) + * additional function of line to number _uint8 + * byte_to_bin(x'ff') -> 255 + */ +static int dap_db_driver_sqlite_query(sqlite3 *db, char *query, sqlite3_stmt **l_res, char **l_error_message) +{ + const char *pzTail; // OUT: Pointer to unused portion of zSql + int l_rc = sqlite3_prepare_v2(db, query, -1, l_res, &pzTail); + if(l_rc != SQLITE_OK) + { + if(l_error_message) + { + const char *zErrMsg = sqlite3_errmsg(db); + if(zErrMsg) + *l_error_message = sqlite3_mprintf("SQL Query error: %s\n", zErrMsg); + } + return l_rc; + } + return l_rc; +} + +/** + * Clear memory after fetching a string + * + * return 0 if Ok, else -1 + */ +static void dap_db_driver_sqlite_row_free(SQLITE_ROW_VALUE *row) +{ + if(row) { + // delete the whole string + sqlite3_free(row->val); + // delete structure + sqlite3_free(row); + } +} + +/** + * Selects the next entry from the result of the query and returns an array + * + * l_res: identifier received in sqlite_query () + * l_row_out [out]: pointer to a column or NULL + * + * return: + * SQLITE_ROW(100) has another row ready + * SQLITE_DONE(101) finished executing, + * SQLITE_CONSTRAINT(19) data is not unique and will not be added + */ +static int dap_db_driver_sqlite_fetch_array(sqlite3_stmt *l_res, SQLITE_ROW_VALUE **l_row_out) +{ + SQLITE_ROW_VALUE *l_row = NULL; + // go to next the string + int l_rc = sqlite3_step(l_res); + if(l_rc == SQLITE_ROW) // SQLITE_ROW(100) or SQLITE_DONE(101) or SQLITE_BUSY(5) + { + int l_iCol; // number of the column in the row + // allocate memory for a row with data + l_row = (SQLITE_ROW_VALUE*) sqlite3_malloc(sizeof(SQLITE_ROW_VALUE)); + int l_count = sqlite3_column_count(l_res); // get the number of columns + // allocate memory for all columns + l_row->val = (SQLITE_VALUE*) sqlite3_malloc(l_count * (int)sizeof(SQLITE_VALUE)); + if(l_row->val) + { + l_row->count = l_count; // number of columns + for(l_iCol = 0; l_iCol < l_row->count; l_iCol++) + { + SQLITE_VALUE *cur_val = l_row->val + l_iCol; + cur_val->len = sqlite3_column_bytes(l_res, l_iCol); // how many bytes will be needed + cur_val->type = (signed char)sqlite3_column_type(l_res, l_iCol); // field type + if(cur_val->type == SQLITE_INTEGER) + { + cur_val->val.val_int64 = sqlite3_column_int64(l_res, l_iCol); + cur_val->val.val_int = sqlite3_column_int(l_res, l_iCol); + } + else if(cur_val->type == SQLITE_FLOAT) + cur_val->val.val_float = sqlite3_column_double(l_res, l_iCol); + else if(cur_val->type == SQLITE_BLOB) + cur_val->val.val_blob = (const unsigned char*) sqlite3_column_blob(l_res, l_iCol); + else if(cur_val->type == SQLITE_TEXT) + cur_val->val.val_str = (const char*) sqlite3_column_text(l_res, l_iCol); //sqlite3_mprintf("%s",sqlite3_column_text(l_res,iCol)); + else + cur_val->val.val_str = NULL; + } + } + else + l_row->count = 0; // number of columns + } + if(l_row_out) + *l_row_out = l_row; + else + dap_db_driver_sqlite_row_free(l_row); + return l_rc; +} + +/** + * Clear memory when request processing is complete + */ +static bool dap_db_driver_sqlite_query_free(sqlite3_stmt *l_res) +{ + if(!l_res) + return false; + int rc = sqlite3_finalize(l_res); + if(rc != SQLITE_OK) + return false; + return true; +} + +/** + * Convert the array into a string to save to blob + */ +static char* dap_db_driver_get_string_from_blob(uint8_t *blob, int len) +{ + char *str_out; + int ret; + if(!blob) + return NULL; + str_out = (char*) sqlite3_malloc(len * 2 + 1); + ret = (int)dap_bin2hex(str_out, (const void*)blob, (size_t)len); + str_out[len * 2] = 0; + return str_out; + +} + +/** + * Cleaning the database from the deleted data + * + * return 0 if Ok, else error code >0 + */ +int dap_db_driver_sqlite_vacuum(sqlite3 *l_db) +{ + if(!s_db) + return -1; + int l_rc = dap_db_driver_sqlite_exec(l_db, "VACUUM", NULL); + return l_rc; +} + +/** + * Start a transaction + */ +int dap_db_driver_sqlite_start_transaction(void) +{ + if(s_db) + { + if(SQLITE_OK == dap_db_driver_sqlite_exec(s_db, "BEGIN", NULL)) + return 0; + else + return -1; + } + else + return -1; +} + +/** + * End of transaction + */ +int dap_db_driver_sqlite_end_transaction(void) +{ + if(s_db) + { + if(SQLITE_OK == dap_db_driver_sqlite_exec(s_db, "COMMIT", NULL)) + return 0; + else + return -1; + } + else + return -1; +} + +char *dap_db_driver_sqlite_make_table_name(const char *a_group_name) +{ + char *l_group_name = dap_strdup(a_group_name); + ssize_t l_group_name_len = (ssize_t)dap_strlen(l_group_name); + const char *l_needle = "."; + // replace '.' to '_' + while(1){ + char *l_str = dap_strstr_len(l_group_name, l_group_name_len, l_needle); + if(l_str) + *l_str = '_'; + else + break; + } + return l_group_name; +} + +/** + * Apply data (write or delete) + * + */ +int dap_db_driver_sqlite_apply_store_obj(dap_store_obj_t *a_store_obj) +{ + if(!a_store_obj || !a_store_obj->group) + return -1; + char *l_query = NULL; + char *l_error_message = NULL; + if(a_store_obj->type == 'a') { + if(!a_store_obj->key || !a_store_obj->value || !a_store_obj->value_len) + return -1; + //dap_chain_hash_fast_t l_hash; + //dap_hash_fast(a_store_obj->value, a_store_obj->value_len, &l_hash); + + char *l_blob_hash = "";//dap_db_driver_get_string_from_blob((uint8_t*) &l_hash, sizeof(dap_chain_hash_fast_t)); + char *l_blob_value = dap_db_driver_get_string_from_blob(a_store_obj->value, (int)a_store_obj->value_len); + //add one record + char *table_name = dap_db_driver_sqlite_make_table_name(a_store_obj->group); + l_query = sqlite3_mprintf("insert into '%s' values(NULL, '%s', x'%s', '%lld', x'%s')", + table_name, a_store_obj->key, l_blob_hash, a_store_obj->timestamp, l_blob_value); + DAP_DELETE(table_name); + //dap_db_driver_sqlite_free(l_blob_hash); + dap_db_driver_sqlite_free(l_blob_value); + } + else if(a_store_obj->type == 'd') { + //delete one record + if(a_store_obj->key) + l_query = sqlite3_mprintf("delete from '%s' where key = '%s'", + a_store_obj->group, a_store_obj->key); + // remove all group + else + l_query = sqlite3_mprintf("drop table if exists '%s'", a_store_obj->group); + } + else { + log_it(L_ERROR, "Unknown store_obj type '0x%x'", a_store_obj->type); + return -1; + } + // execute request + int l_ret = dap_db_driver_sqlite_exec(s_db, l_query, &l_error_message); + if(l_ret == SQLITE_ERROR) { + dap_db_driver_sqlite_free(l_error_message); + l_error_message = NULL; + // create table + char *table_name = dap_db_driver_sqlite_make_table_name(a_store_obj->group); + dap_db_driver_sqlite_create_group_table(table_name); + DAP_DELETE(table_name); + // repeat request + l_ret = dap_db_driver_sqlite_exec(s_db, l_query, &l_error_message); + + } + // entry with the same hash is already present + if(l_ret == SQLITE_CONSTRAINT) { + dap_db_driver_sqlite_free(l_error_message); + l_error_message = NULL; + char *table_name = dap_db_driver_sqlite_make_table_name(a_store_obj->group); + //delete exist record + char *l_query_del = sqlite3_mprintf("delete from '%s' where key = '%s'", table_name, a_store_obj->key); + l_ret = dap_db_driver_sqlite_exec(s_db, l_query_del, &l_error_message); + DAP_DELETE(table_name); + dap_db_driver_sqlite_free(l_query_del); + if(l_ret != SQLITE_OK) { + log_it(L_INFO, "Entry with the same key is already present and can't delete, %s", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + l_error_message = NULL; + } + // repeat request + l_ret = dap_db_driver_sqlite_exec(s_db, l_query, &l_error_message); + } + // missing database + if(l_ret != SQLITE_OK) { + log_it(L_ERROR, "sqlite apply error: %s", l_error_message); + dap_db_driver_sqlite_free(l_error_message); + l_ret = -1; + } + dap_db_driver_sqlite_free(l_query); + return l_ret; +} + +static void fill_one_item(const char *a_group, dap_store_obj_t *a_obj, SQLITE_ROW_VALUE *a_row) +{ + a_obj->group = dap_strdup(a_group); + + for(int l_iCol = 0; l_iCol < a_row->count; l_iCol++) { + SQLITE_VALUE *l_cur_val = a_row->val + l_iCol; + switch (l_iCol) { + case 0: + if(l_cur_val->type == SQLITE_INTEGER) + a_obj->id = (uint64_t)l_cur_val->val.val_int64; + break; // id + case 1: + if(l_cur_val->type == SQLITE_INTEGER) + a_obj->timestamp = l_cur_val->val.val_int64; + break; // ts + case 2: + if(l_cur_val->type == SQLITE_TEXT) + a_obj->key = dap_strdup(l_cur_val->val.val_str); + break; // key + case 3: + if(l_cur_val->type == SQLITE_BLOB) + { + a_obj->value_len = (size_t) l_cur_val->len; + a_obj->value = DAP_NEW_SIZE(uint8_t, a_obj->value_len); + memcpy(a_obj->value, l_cur_val->val.val_blob, a_obj->value_len); + } + break; // value + } + } + +} + +/** + * Read last items + * + * a_group - group name + */ +dap_store_obj_t* dap_db_driver_sqlite_read_last_store_obj(const char *a_group) +{ + dap_store_obj_t *l_obj = NULL; + char *l_error_message = NULL; + sqlite3_stmt *l_res; + if(!a_group) + return NULL; + char *l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' ORDER BY id DESC LIMIT 1", a_group); + int l_ret = dap_db_driver_sqlite_query(s_db, l_str_query, &l_res, &l_error_message); + sqlite3_free(l_str_query); + if(l_ret != SQLITE_OK) { + log_it(L_ERROR, "read last l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + dap_db_driver_sqlite_free(l_error_message); + return NULL; + } + + SQLITE_ROW_VALUE *l_row = NULL; + l_ret = dap_db_driver_sqlite_fetch_array(l_res, &l_row); + if(l_ret != SQLITE_ROW && l_ret != SQLITE_DONE) + { + log_it(L_ERROR, "read l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + } + if(l_ret == SQLITE_ROW && l_row) { + l_obj = DAP_NEW_Z(dap_store_obj_t); + fill_one_item(a_group, l_obj, l_row); + } + dap_db_driver_sqlite_row_free(l_row); + dap_db_driver_sqlite_query_free(l_res); + + return l_obj; +} + +/** + * Read several items with conditoin + * + * a_group - group name + * a_id - read from this id + * a_count_out[in], how many items to read, 0 - no limits + * a_count_out[out], how many items was read + */ +dap_store_obj_t* dap_db_driver_sqlite_read_cond_store_obj(const char *a_group, uint64_t a_id, size_t *a_count_out) +{ + dap_store_obj_t *l_obj = NULL; + char *l_error_message = NULL; + sqlite3_stmt *l_res; + if(!a_group) + return NULL; + // no limit + int l_count_out = 0; + if(a_count_out) + l_count_out = (int)*a_count_out; + char *l_str_query; + if(l_count_out) + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' WHERE id>'%lld' ORDER BY id ASC LIMIT %d", + a_group, a_id, l_count_out); + else + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' WHERE id>'%lld' ORDER BY id ASC", + a_group, a_id); + int l_ret = dap_db_driver_sqlite_query(s_db, l_str_query, &l_res, &l_error_message); + sqlite3_free(l_str_query); + if(l_ret != SQLITE_OK) { + log_it(L_ERROR, "read l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + dap_db_driver_sqlite_free(l_error_message); + return NULL; + } + + //int b = qlite3_column_count(s_db); + SQLITE_ROW_VALUE *l_row = NULL; + l_count_out = 0; + int l_count_sized = 0; + do { + l_ret = dap_db_driver_sqlite_fetch_array(l_res, &l_row); + if(l_ret != SQLITE_ROW && l_ret != SQLITE_DONE) + { + log_it(L_ERROR, "read l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + } + if(l_ret == SQLITE_ROW && l_row) { + // realloc memory + if(l_count_out >= l_count_sized) { + l_count_sized += 10; + l_obj = DAP_REALLOC(l_obj, sizeof(dap_store_obj_t) * (uint64_t)l_count_sized); + memset(l_obj + l_count_out, 0, sizeof(dap_store_obj_t) * (uint64_t)(l_count_sized - l_count_out)); + } + // fill current item + dap_store_obj_t *l_obj_cur = l_obj + l_count_out; + fill_one_item(a_group, l_obj_cur, l_row); + l_count_out++; + } + dap_db_driver_sqlite_row_free(l_row); + } while(l_row); + + dap_db_driver_sqlite_query_free(l_res); + + if(a_count_out) + *a_count_out = (size_t)l_count_out; + return l_obj; +} + +/** + * Read several items + * + * a_group - group name + * a_key - key name, may by NULL, it means reading the whole group + * a_count_out[in], how many items to read, 0 - no limits + * a_count_out[out], how many items was read + */ +dap_store_obj_t* dap_db_driver_sqlite_read_store_obj(const char *a_group, const char *a_key, size_t *a_count_out) +{ + dap_store_obj_t *l_obj = NULL; + char *l_error_message = NULL; + sqlite3_stmt *l_res; + if(!a_group) + return NULL; + // no limit + uint64_t l_count_out = 0; + if(a_count_out) + l_count_out = *a_count_out; + char *l_str_query; + if(a_key) { + if(l_count_out) + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' WHERE key='%s' ORDER BY id ASC LIMIT %d", + a_group, a_key, l_count_out); + else + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' WHERE key='%s' ORDER BY id ASC", + a_group, a_key); + } + else { + if(l_count_out) + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' ORDER BY id ASC LIMIT %d", + a_group, l_count_out); + else + l_str_query = sqlite3_mprintf("SELECT id,ts,key,value FROM '%s' ORDER BY id ASC", a_group); + } + int l_ret = dap_db_driver_sqlite_query(s_db, l_str_query, &l_res, &l_error_message); + sqlite3_free(l_str_query); + if(l_ret != SQLITE_OK) { + log_it(L_ERROR, "read l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + dap_db_driver_sqlite_free(l_error_message); + return NULL; + } + + //int b = qlite3_column_count(s_db); + SQLITE_ROW_VALUE *l_row = NULL; + l_count_out = 0; + uint64_t l_count_sized = 0; + do { + l_ret = dap_db_driver_sqlite_fetch_array(l_res, &l_row); + if(l_ret != SQLITE_ROW && l_ret != SQLITE_DONE) + { + log_it(L_ERROR, "read l_ret=%d, %s\n", sqlite3_errcode(s_db), sqlite3_errmsg(s_db)); + } + if(l_ret == SQLITE_ROW && l_row) { + // realloc memory + if(l_count_out >= l_count_sized) { + l_count_sized += 10; + l_obj = DAP_REALLOC(l_obj, sizeof(dap_store_obj_t) * l_count_sized); + memset(l_obj + l_count_out, 0, sizeof(dap_store_obj_t) * (l_count_sized - l_count_out)); + } + // fill currrent item + dap_store_obj_t *l_obj_cur = l_obj + l_count_out; + fill_one_item(a_group, l_obj_cur, l_row); + l_count_out++; + } + dap_db_driver_sqlite_row_free(l_row); + } while(l_row); + + dap_db_driver_sqlite_query_free(l_res); + + if(a_count_out) + *a_count_out = l_count_out; + return l_obj; +} diff --git a/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.h b/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.h new file mode 100755 index 0000000000000000000000000000000000000000..2d1175df09dee149211876af69383abed045162f --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_driver_sqlite.h @@ -0,0 +1,52 @@ +/* + * Authors: + * Dmitriy A. Gearasimov <gerasimov.dmitriy@demlabs.net> + * Alexander Lysikov <alexander.lysikov@demlabs.net> + * DeM Labs Inc. https://demlabs.net + * CellFrame https://cellframe.net + * Sources https://gitlab.demlabs.net/cellframe + * Copyright (c) 2017-2019 + * All rights reserved. + + This file is part of CellFrame SDK the open source project + + CellFrame SDK is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + CellFrame SDK is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with any CellFrame SDK based project. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sqlite3.h> +#include "dap_chain_global_db_driver.h" + +int dap_db_driver_sqlite_init(const char *a_filename_db, dap_db_driver_callbacks_t *a_drv_callback); +int dap_db_driver_sqlite_deinit(void); + +sqlite3* dap_db_driver_sqlite_open(const char *a_filename_utf8, int a_flags, char **error_message); +void dap_db_driver_sqlite_close(sqlite3 *l_db); +void dap_db_driver_sqlite_free(char *memory); +bool dap_db_driver_sqlite_set_pragma(sqlite3 *a_db, char *a_param, char *a_mode); +int dap_db_driver_sqlite_flush(void); + + +// ** SQLite callbacks ** + +// Start a transaction +int dap_db_driver_sqlite_start_transaction(void); +// End of transaction +int dap_db_driver_sqlite_end_transaction(void); + +// Apply data (write or delete) +int dap_db_driver_sqlite_apply_store_obj(dap_store_obj_t *a_store_obj); +// Read data +dap_store_obj_t* dap_db_driver_sqlite_read_last_store_obj(const char *a_group); +dap_store_obj_t* dap_db_driver_sqlite_read_cond_store_obj(const char *a_group, uint64_t a_id, size_t *a_count_out); +dap_store_obj_t* dap_db_driver_sqlite_read_store_obj(const char *a_group, const char *a_key, size_t *a_count_out); diff --git a/libdap-chain-global-db/dap_chain_global_db_hist.c b/libdap-chain-global-db/dap_chain_global_db_hist.c new file mode 100755 index 0000000000000000000000000000000000000000..1f63e6839dd29ff2b9e19e664d9a5a40d1a3b3c1 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_hist.c @@ -0,0 +1,1492 @@ +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <pthread.h> + +#include <dap_common.h> +#include <dap_strfuncs.h> +#include <dap_string.h> +#include <dap_hash.h> +#include "dap_chain_datum_tx_items.h" + +#include "dap_chain_global_db_hist.h" + +#include "uthash.h" +// for dap_db_history_filter() +typedef struct dap_tx_data{ + dap_chain_hash_fast_t tx_hash; + char tx_hash_str[70]; + char token_ticker[10]; + size_t obj_num; + size_t pos_num; + dap_chain_addr_t addr; + char reserv[3]; + UT_hash_handle hh; +} dap_tx_data_t; + +#define LOG_TAG "dap_chain_global_db_hist" + +static char* dap_db_history_pack_hist(dap_global_db_hist_t *a_rec) +{ + char *l_ret = dap_strdup_printf("%c%s%u%s%s%s%s", a_rec->type, GLOBAL_DB_HIST_REC_SEPARATOR, a_rec->keys_count, + GLOBAL_DB_HIST_REC_SEPARATOR, a_rec->group, GLOBAL_DB_HIST_REC_SEPARATOR, a_rec->keys); + return l_ret; +} + +static int dap_db_history_unpack_hist(char *l_str_in, dap_global_db_hist_t *a_rec_out) +{ + char **l_strv = dap_strsplit(l_str_in, GLOBAL_DB_HIST_REC_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_strv); + if(l_count != 4) + return -1; + a_rec_out->type = l_strv[0][0]; + a_rec_out->keys_count = strtoul(l_strv[1], NULL, 10); + a_rec_out->group = dap_strdup(l_strv[2]); + a_rec_out->keys = dap_strdup(l_strv[3]); + dap_strfreev(l_strv); + return 1; +} + +static char* dap_db_new_history_timestamp() +{ + static pthread_mutex_t s_mutex = PTHREAD_MUTEX_INITIALIZER; + uint64_t l_suffix = 0; + time_t l_cur_time; + // get unique key + pthread_mutex_lock(&s_mutex); + static time_t s_last_time = 0; + static uint64_t s_suffix = 0; + time_t l_cur_time_tmp = time(NULL); + if(s_last_time == l_cur_time_tmp) + s_suffix++; + else { + s_suffix = 0; + s_last_time = l_cur_time_tmp; + } + // save tmp values + l_cur_time = l_cur_time_tmp; + l_suffix = s_suffix; + pthread_mutex_unlock(&s_mutex); + + char *l_str = dap_strdup_printf("%lld_%lld", (uint64_t) l_cur_time, l_suffix); + return l_str; +} + +/** + * Get data according the history log + * + * return dap_store_obj_pkt_t* + */ +uint8_t* dap_db_log_pack(dap_global_db_obj_t *a_obj, size_t *a_data_size_out) +{ + if(!a_obj) + return NULL; + dap_global_db_hist_t l_rec; + if(dap_db_history_unpack_hist((char*) a_obj->value, &l_rec) == -1) + return NULL; + time_t l_timestamp = strtoll(a_obj->key, NULL, 10); + + // parse global_db records in a history record + char **l_keys = dap_strsplit(l_rec.keys, GLOBAL_DB_HIST_KEY_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_keys); + // read records from global_db + int i = 0; + dap_store_obj_t *l_store_obj = DAP_NEW_Z_SIZE(dap_store_obj_t, l_count * sizeof(dap_store_obj_t)); + while(l_keys[i]) { + dap_store_obj_t *l_obj = NULL; + // add record - read record + if(l_rec.type == 'a'){ + l_obj = (dap_store_obj_t*) dap_chain_global_db_obj_get(l_keys[i], l_rec.group); + // l_obj may be NULL, if this record has been deleted but it is present in history + if(l_obj) + l_obj->id = a_obj->id; + } + // delete record - save only key for record + else if(l_rec.type == 'd') { // //section=strdup("kelvin_nodes"); + l_obj = (dap_store_obj_t*) DAP_NEW_Z(dap_store_obj_t); + l_obj->id = a_obj->id; + l_obj->group = dap_strdup(l_rec.group); + l_obj->key = dap_strdup(l_keys[i]); + l_obj->timestamp = global_db_gr_del_get_timestamp(l_obj->group, l_obj->key); + } + if(l_obj == NULL) { + dap_store_obj_free(l_store_obj, l_count); + dap_strfreev(l_keys); + return NULL; + } + // save record type: 'a' or 'd' + l_obj->type = (uint8_t)l_rec.type; + + memcpy(l_store_obj + i, l_obj, sizeof(dap_store_obj_t)); + DAP_DELETE(l_obj); + i++; + } + // serialize data + dap_store_obj_pkt_t *l_data_out = dap_store_packet_multiple(l_store_obj, l_timestamp, l_count); + + dap_store_obj_free(l_store_obj, l_count); + dap_strfreev(l_keys); + + if(l_data_out && a_data_size_out) { + *a_data_size_out = sizeof(dap_store_obj_pkt_t) + l_data_out->data_size; + } + return (uint8_t*) l_data_out; + +} + + +// for dap_db_history_filter() +static dap_store_obj_t* get_prev_tx(dap_global_db_obj_t *a_objs, dap_tx_data_t *a_tx_data) +{ + if(!a_objs || !a_tx_data) + return NULL; + dap_global_db_obj_t *l_obj_cur = a_objs + a_tx_data->obj_num; + dap_global_db_hist_t l_rec; + if(dap_db_history_unpack_hist((char*) l_obj_cur->value, &l_rec) == -1) + return NULL; + char **l_keys = dap_strsplit(l_rec.keys, GLOBAL_DB_HIST_KEY_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_keys); + if(a_tx_data->pos_num >= l_count) { + dap_strfreev(l_keys); + return NULL; + } + dap_store_obj_t *l_obj = + (dap_store_obj_t*) l_keys ? dap_chain_global_db_obj_get(l_keys[a_tx_data->pos_num], l_rec.group) : NULL; + dap_strfreev(l_keys); + return l_obj; +} + +/** + * Get data according the history log + * + * return history string + */ +#if 0 +char* dap_db_history_tx(dap_chain_hash_fast_t* a_tx_hash, const char *a_group_mempool) +{ + dap_string_t *l_str_out = dap_string_new(NULL); + // load history + size_t l_data_size_out = 0; + dap_global_db_obj_t *l_objs = dap_chain_global_db_gr_load(GROUP_LOCAL_HISTORY, &l_data_size_out); + size_t i, j; + bool l_tx_hash_found = false; + dap_tx_data_t *l_tx_data_hash = NULL; + for(i = 0; i < l_data_size_out; i++) { + dap_global_db_obj_t *l_obj_cur = l_objs + i; + + // parse global_db records in a history record + dap_global_db_hist_t l_rec; + if(dap_db_history_unpack_hist((char*) l_obj_cur->value, &l_rec) == -1) + continue; + // use only groups with datums + if(dap_strcmp(a_group_mempool, l_rec.group)) + continue; + + char **l_keys = dap_strsplit(l_rec.keys, GLOBAL_DB_HIST_KEY_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_keys); + dap_store_obj_t *l_obj = NULL; + // all objs in one history records + for(j = 0; j < l_count; j++) { + + if(l_rec.type != 'a') + continue; + l_obj = (dap_store_obj_t*) dap_chain_global_db_obj_get(l_keys[j], l_rec.group); + if(!l_obj) + continue; + // datum + dap_chain_datum_t *l_datum = (dap_chain_datum_t*) l_obj->value; + if(!l_datum && l_datum->header.type_id != DAP_CHAIN_DATUM_TX) + continue; + + dap_tx_data_t *l_tx_data = NULL; + + // transaction + dap_chain_datum_tx_t *l_tx = (dap_chain_datum_tx_t*) l_datum->data; + + // find Token items - present in emit transaction + dap_list_t *l_list_tx_token = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_TOKEN, NULL); + + // find OUT items + dap_list_t *l_list_out_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_OUT, NULL); + dap_list_t *l_list_tmp = l_list_out_items; + while(l_list_tmp) { + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_list_tmp->data; + // save OUT item l_tx_out + if(!l_tx_data) + { + // save tx hash + l_tx_data = DAP_NEW_Z(dap_tx_data_t); + dap_chain_hash_fast_t l_tx_hash; + dap_hash_fast(l_tx, dap_chain_datum_tx_get_size(l_tx), &l_tx_hash); + memcpy(&l_tx_data->tx_hash, &l_tx_hash, sizeof(dap_chain_hash_fast_t)); + memcpy(&l_tx_data->addr, &l_tx_out->addr, sizeof(dap_chain_addr_t)); + dap_chain_hash_fast_to_str(&l_tx_data->tx_hash, l_tx_data->tx_hash_str, + sizeof(l_tx_data->tx_hash_str)); + l_tx_data->obj_num = i; + l_tx_data->pos_num = j; + // save token name + if(l_list_tx_token) { + dap_chain_tx_token_t *tk = l_list_tx_token->data; + int d = sizeof(l_tx_data->token_ticker); + memcpy(l_tx_data->token_ticker, tk->header.ticker, sizeof(l_tx_data->token_ticker)); + } + // take token from prev out item + else { + + // find IN items + dap_list_t *l_list_in_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_IN, NULL); + dap_list_t *l_list_tmp_in = l_list_in_items; + // find token_ticker in prev OUT items + while(l_list_tmp_in) { + const dap_chain_tx_in_t *l_tx_in = + (const dap_chain_tx_in_t*) l_list_tmp_in->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), + l_tx_data_prev); + if(l_tx_data_prev != NULL) { + // fill token in l_tx_data from prev transaction + if(l_tx_data) { + // get token from prev tx + memcpy(l_tx_data->token_ticker, l_tx_data_prev->token_ticker, + sizeof(l_tx_data->token_ticker)); + break; + } + l_list_tmp_in = dap_list_next(l_list_tmp_in); + } + } + if(l_list_in_items) + dap_list_free(l_list_in_items); + } + HASH_ADD(hh, l_tx_data_hash, tx_hash, sizeof(dap_chain_hash_fast_t), l_tx_data); + } + l_list_tmp = dap_list_next(l_list_tmp); + } + if(l_list_out_items) + dap_list_free(l_list_out_items); + + // calc hash + dap_chain_hash_fast_t l_tx_hash; + dap_hash_fast(l_tx, dap_chain_datum_tx_get_size(l_tx), &l_tx_hash); + // search tx with a_tx_hash + if(!dap_hash_fast_compare(a_tx_hash, &l_tx_hash)) + continue; + // found a_tx_hash now + + // transaction time + char *l_time_str = NULL; + if(l_tx->header.ts_created > 0) { + time_t rawtime = (time_t) l_tx->header.ts_created; + struct tm * timeinfo; + timeinfo = localtime(&rawtime); + if(timeinfo) { + dap_string_append_printf(l_str_out, " %s", asctime(timeinfo)); + } + } + + // find all OUT items in transaction + l_list_out_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_OUT, NULL); + l_list_tmp = l_list_out_items; + while(l_list_tmp) { + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_list_tmp->data; + dap_tx_data_t *l_tx_data_prev = NULL; + + const char *l_token_str = NULL; + if(l_tx_data) + l_token_str = l_tx_data->token_ticker; + char *l_dst_to_str = + (l_tx_out) ? dap_chain_addr_to_str(&l_tx_out->addr) : + NULL; + dap_string_append_printf(l_str_out, " OUT item %lld %s to %s\n", + l_tx_out->header.value, + dap_strlen(l_token_str) > 0 ? l_token_str : "?", + l_dst_to_str ? l_dst_to_str : "?" + ); + DAP_DELETE(l_dst_to_str); + l_list_tmp = dap_list_next(l_list_tmp); + } + // find all IN items in transaction + dap_list_t *l_list_in_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_IN, NULL); + l_list_tmp = l_list_in_items; + // find cur addr in prev OUT items + while(l_list_tmp) { + const dap_chain_tx_in_t *l_tx_in = (const dap_chain_tx_in_t*) l_list_tmp->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + char l_tx_hash_str[70]; + if(!dap_hash_fast_is_blank(&tx_prev_hash)) + dap_chain_hash_fast_to_str(&tx_prev_hash, l_tx_hash_str, sizeof(l_tx_hash_str)); + else + strcpy(l_tx_hash_str,"Null"); + dap_string_append_printf(l_str_out, " IN item \n prev tx_hash %s\n", l_tx_hash_str); + + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), l_tx_data_prev); + if(l_tx_data_prev != NULL) { + + dap_store_obj_t *l_obj_prev = get_prev_tx(l_objs, l_tx_data_prev); + dap_chain_datum_t *l_datum_prev = + l_obj_prev ? (dap_chain_datum_t*) l_obj_prev->value : NULL; + dap_chain_datum_tx_t *l_tx_prev = + l_datum_prev ? (dap_chain_datum_tx_t*) l_datum_prev->data : NULL; + + // find OUT items in prev datum + dap_list_t *l_list_out_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_OUT, NULL); + // find OUT item for IN item; + dap_list_t *l_list_out_prev_item = dap_list_nth(l_list_out_prev_items, + l_tx_in->header.tx_out_prev_idx); + dap_chain_tx_out_t *l_tx_prev_out = + l_list_out_prev_item ? + (dap_chain_tx_out_t*) l_list_out_prev_item->data : + NULL; + // print value from prev out item + dap_string_append_printf(l_str_out, " prev OUT item value=%lld", + l_tx_prev_out->header.value + ); + } + dap_string_append_printf(l_str_out, "\n"); + l_list_tmp = dap_list_next(l_list_tmp); + } + + if(l_list_tx_token) + dap_list_free(l_list_tx_token); + if(l_list_out_items) + dap_list_free(l_list_out_items); + if(l_list_in_items) + dap_list_free(l_list_in_items); + l_tx_hash_found = true; + break; + } + dap_list_t *l_records_out = NULL; + + DAP_DELETE(l_obj); + dap_strfreev(l_keys); + // transaction was found -> exit + if(l_tx_hash_found) + break; + } + dap_chain_global_db_objs_delete(l_objs, l_data_size_out); + // if no history + if(!l_str_out->len) + dap_string_append(l_str_out, "empty"); + char *l_ret_str = l_str_out ? dap_string_free(l_str_out, false) : NULL; + return l_ret_str; +} +#endif + +/** + * Get data according the history log + * + * return history string + */ +#if 0 +char* dap_db_history_addr(dap_chain_addr_t * a_addr, const char *a_group_mempool) +{ + dap_string_t *l_str_out = dap_string_new(NULL); + // load history + size_t l_data_size_out = 0; + dap_global_db_obj_t *l_objs = dap_chain_global_db_gr_load(GROUP_LOCAL_HISTORY, &l_data_size_out); + size_t i, j; + dap_tx_data_t *l_tx_data_hash = NULL; + for(i = 0; i < l_data_size_out; i++) { + dap_global_db_obj_t *l_obj_cur = l_objs + i; + // parse global_db records in a history record + dap_global_db_hist_t l_rec; + if(dap_db_history_unpack_hist((char*) l_obj_cur->value, &l_rec) == -1) + continue; + // use only groups with datums + if(dap_strcmp(a_group_mempool, l_rec.group)) + continue; + + char **l_keys = dap_strsplit(l_rec.keys, GLOBAL_DB_HIST_KEY_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_keys); + dap_store_obj_t *l_obj = NULL; + // all objs in one history records + for(j = 0; j < l_count; j++) { + if(l_rec.type != 'a') + continue; + l_obj = (dap_store_obj_t*) dap_chain_global_db_obj_get(l_keys[j], l_rec.group); + if(!l_obj) + continue; + // datum + dap_chain_datum_t *l_datum = (dap_chain_datum_t*) l_obj->value; + if(!l_datum && l_datum->header.type_id != DAP_CHAIN_DATUM_TX) + continue; + + // transaction + dap_chain_datum_tx_t *l_tx = (dap_chain_datum_tx_t*) l_datum->data; + dap_list_t *l_records_out = NULL; + // transaction time + char *l_time_str = NULL; + { + if(l_tx->header.ts_created > 0) { + time_t rawtime = (time_t) l_tx->header.ts_created; + struct tm * timeinfo; + timeinfo = localtime(&rawtime); + if(timeinfo) + l_time_str = dap_strdup(asctime(timeinfo)); + } + else + l_time_str = dap_strdup(" "); + } + + // transaction + dap_tx_data_t *l_tx_data = NULL; + + // find Token items - present in emit transaction + dap_list_t *l_list_tx_token = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_TOKEN, NULL); + + // find OUT items + dap_list_t *l_list_out_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_OUT, NULL); + dap_list_t *l_list_tmp = l_list_out_items; + while(l_list_tmp) { + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_list_tmp->data; + // save OUT item l_tx_out + { + // save tx hash + l_tx_data = DAP_NEW_Z(dap_tx_data_t); + dap_chain_hash_fast_t l_tx_hash; + dap_hash_fast(l_tx, dap_chain_datum_tx_get_size(l_tx), &l_tx_hash); + memcpy(&l_tx_data->tx_hash, &l_tx_hash, sizeof(dap_chain_hash_fast_t)); + memcpy(&l_tx_data->addr, &l_tx_out->addr, sizeof(dap_chain_addr_t)); + dap_chain_hash_fast_to_str(&l_tx_data->tx_hash, l_tx_data->tx_hash_str, + sizeof(l_tx_data->tx_hash_str)); + l_tx_data->obj_num = i; + l_tx_data->pos_num = j; + // save token name + if(l_tx_data && l_list_tx_token) { + dap_chain_tx_token_t *tk = l_list_tx_token->data; + int d = sizeof(l_tx_data->token_ticker); + memcpy(l_tx_data->token_ticker, tk->header.ticker, sizeof(l_tx_data->token_ticker)); + } + HASH_ADD(hh, l_tx_data_hash, tx_hash, sizeof(dap_chain_hash_fast_t), l_tx_data); + + // save OUT items to list + { + l_records_out = dap_list_append(l_records_out, (void*) l_tx_out); + } + } + l_list_tmp = dap_list_next(l_list_tmp); + } + + // find IN items + l_count = 0; + dap_list_t *l_list_in_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_IN, NULL); + l_list_tmp = l_list_in_items; + // find cur addr in prev OUT items + bool l_is_use_all_cur_out = false; + { + while(l_list_tmp) { + const dap_chain_tx_in_t *l_tx_in = (const dap_chain_tx_in_t*) l_list_tmp->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), l_tx_data_prev); + if(l_tx_data_prev != NULL) { + // fill token in l_tx_data from prev transaction + if(l_tx_data) { + // get token from prev tx + memcpy(l_tx_data->token_ticker, l_tx_data_prev->token_ticker, + sizeof(l_tx_data->token_ticker)); + dap_store_obj_t *l_obj_prev = get_prev_tx(l_objs, l_tx_data_prev); + dap_chain_datum_t *l_datum_prev = + l_obj_prev ? (dap_chain_datum_t*) l_obj_prev->value : NULL; + dap_chain_datum_tx_t *l_tx_prev = + l_datum_prev ? (dap_chain_datum_tx_t*) l_datum_prev->data : NULL; + + // find OUT items in prev datum + dap_list_t *l_list_out_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_OUT, NULL); + // find OUT item for IN item; + dap_list_t *l_list_out_prev_item = dap_list_nth(l_list_out_prev_items, + l_tx_in->header.tx_out_prev_idx); + dap_chain_tx_out_t *l_tx_prev_out = + l_list_out_prev_item ? + (dap_chain_tx_out_t*) l_list_out_prev_item->data : + NULL; + if(l_tx_prev_out && !memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_is_use_all_cur_out = true; + + } + } + + // find prev OUT items for IN items + l_list_tmp = l_list_in_items; + while(l_list_tmp) { + const dap_chain_tx_in_t *l_tx_in = (const dap_chain_tx_in_t*) l_list_tmp->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + // if first transaction - empty prev OUT item + if(dap_hash_fast_is_blank(&tx_prev_hash)) { + // add emit info to ret string + if(!memcmp(&l_tx_data->addr, a_addr, sizeof(dap_chain_addr_t))) + { + dap_list_t *l_records_tmp = l_records_out; + while(l_records_tmp) { + + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_records_tmp->data; + dap_string_append_printf(l_str_out, "tx hash %s \n emit %lld %s\n", + l_tx_data->tx_hash_str, + l_tx_out->header.value, + l_tx_data->token_ticker); + l_records_tmp = dap_list_next(l_records_tmp); + } + } + dap_list_free(l_records_out); + } + // in other transactions except first one + else { + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), l_tx_data_prev); + if(l_tx_data_prev != NULL) { + char *l_src_str = NULL; + bool l_src_str_is_cur = false; + if(l_tx_data) { + // get token from prev tx + memcpy(l_tx_data->token_ticker, l_tx_data_prev->token_ticker, + sizeof(l_tx_data->token_ticker)); + + dap_store_obj_t *l_obj_prev = get_prev_tx(l_objs, l_tx_data_prev); + dap_chain_datum_t *l_datum_prev = + l_obj_prev ? (dap_chain_datum_t*) l_obj_prev->value : NULL; + dap_chain_datum_tx_t *l_tx_prev = + l_datum_prev ? (dap_chain_datum_tx_t*) l_datum_prev->data : NULL; + + // find OUT items in prev datum + dap_list_t *l_list_out_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_OUT, NULL); + // find OUT item for IN item; + dap_list_t *l_list_out_prev_item = dap_list_nth(l_list_out_prev_items, + l_tx_in->header.tx_out_prev_idx); + dap_chain_tx_out_t *l_tx_prev_out = + l_list_out_prev_item ? + (dap_chain_tx_out_t*) l_list_out_prev_item->data : + NULL; + // if use src addr + bool l_is_use_src_addr = false; + // find source addrs + dap_string_t *l_src_addr = dap_string_new(NULL); + { + // find IN items in prev datum - for get destination addr + dap_list_t *l_list_in_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_IN, NULL); + dap_list_t *l_list_tmp = l_list_in_prev_items; + while(l_list_tmp) { + dap_chain_tx_in_t *l_tx_prev_in = l_list_tmp->data; + dap_chain_hash_fast_t l_tx_prev_prev_hash = + l_tx_prev_in->header.tx_prev_hash; + //find prev OUT item + dap_tx_data_t *l_tx_data_prev_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &l_tx_prev_prev_hash, + sizeof(dap_chain_hash_fast_t), l_tx_data_prev_prev); + if(l_tx_data_prev_prev) { + // if use src addr + if(!memcmp(&l_tx_data_prev_prev->addr, a_addr, + sizeof(dap_chain_addr_t))) + l_is_use_src_addr = true; + char *l_str = dap_chain_addr_to_str(&l_tx_data_prev_prev->addr); + if(l_src_addr->len > 0) + dap_string_append_printf(l_src_addr, "\n %s", l_str); + else + dap_string_append_printf(l_src_addr, "%s", l_str); // first record + DAP_DELETE(l_str); + } + l_list_tmp = dap_list_next(l_list_tmp); + } + } + + char *l_dst_to_str = + (l_tx_prev_out) ? dap_chain_addr_to_str(&l_tx_prev_out->addr) : + NULL; + // if use dst addr + bool l_is_use_dst_addr = false; + if(!memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_is_use_dst_addr = true; + + l_src_str_is_cur = l_is_use_src_addr; + if(l_src_addr->len <= 1) { + l_src_str = + (l_tx_data) ? dap_chain_addr_to_str(&l_tx_data->addr) : + NULL; + if(!memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_src_str_is_cur = true; + dap_string_free(l_src_addr, true); + } + else + l_src_str = dap_string_free(l_src_addr, false); + if(l_is_use_src_addr && !l_is_use_dst_addr) { + dap_string_append_printf(l_str_out, + "tx hash %s \n %s in send %lld %s from %s\n to %s\n", + l_tx_data->tx_hash_str, + l_time_str ? l_time_str : "", + l_tx_prev_out->header.value, + l_tx_data->token_ticker, + l_src_str ? l_src_str : "", + l_dst_to_str); + } else if(l_is_use_dst_addr && !l_is_use_src_addr) { + if(!l_src_str_is_cur) + dap_string_append_printf(l_str_out, + "tx hash %s \n %s in recv %lld %s from %s\n", + l_tx_data->tx_hash_str, + l_time_str ? l_time_str : "", + l_tx_prev_out->header.value, + l_tx_data->token_ticker, + l_src_str ? l_src_str : ""); + } + + DAP_DELETE(l_dst_to_str); + dap_list_free(l_list_out_prev_items); + DAP_DELETE(l_obj_prev); + } + + // OUT items + dap_list_t *l_records_tmp = l_records_out; + while(l_records_tmp) { + + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_records_tmp->data; + + if(l_is_use_all_cur_out + || !memcmp(&l_tx_out->addr, a_addr, sizeof(dap_chain_addr_t))) { + + char *l_addr_str = (l_tx_out) ? dap_chain_addr_to_str(&l_tx_out->addr) : NULL; + + if(!memcmp(&l_tx_out->addr, a_addr, sizeof(dap_chain_addr_t))) { + if(!l_src_str_is_cur) + dap_string_append_printf(l_str_out, "tx hash %s \n %s recv %lld %s from %s\n", + l_tx_data->tx_hash_str, + l_time_str ? l_time_str : "", + l_tx_out->header.value, + l_tx_data_prev->token_ticker, + l_src_str ? l_src_str : "?"); + } + else { + dap_string_append_printf(l_str_out, "tx hash %s \n %s send %lld %s to %sd\n", + l_tx_data->tx_hash_str, + l_time_str ? l_time_str : "", + l_tx_out->header.value, + l_tx_data_prev->token_ticker, + l_addr_str ? l_addr_str : ""); + } + DAP_DELETE(l_addr_str); + } + l_records_tmp = dap_list_next(l_records_tmp); + } + dap_list_free(l_records_out); + DAP_DELETE(l_src_str); + + } + } + l_list_tmp = dap_list_next(l_list_tmp); + } + l_list_tmp = dap_list_next(l_list_tmp); + } + } + + + + if(l_list_tx_token) + dap_list_free(l_list_tx_token); + if(l_list_out_items) + dap_list_free(l_list_out_items); + if(l_list_in_items) + dap_list_free(l_list_in_items); + + DAP_DELETE(l_time_str); + } + DAP_DELETE(l_obj); + dap_strfreev(l_keys); + + } + // delete hashes + dap_tx_data_t *l_iter_current, *l_item_tmp; + HASH_ITER(hh, l_tx_data_hash , l_iter_current, l_item_tmp) + { + // delete struct + DAP_DELETE(l_iter_current); + HASH_DEL(l_tx_data_hash, l_iter_current); + } + dap_chain_global_db_objs_delete(l_objs, l_data_size_out); + // if no history + if(!l_str_out->len) + dap_string_append(l_str_out, " empty"); + char *l_ret_str = l_str_out ? dap_string_free(l_str_out, false) : NULL; + return l_ret_str; +} +#endif + + +/** + * Get data according the history log + * + * return history string + */ +char* dap_db_history_filter(dap_chain_addr_t * a_addr, const char *a_group_mempool) +{ + dap_string_t *l_str_out = dap_string_new(NULL); + // load history + size_t l_data_size_out = 0; + dap_global_db_obj_t *l_objs = dap_chain_global_db_gr_load(GROUP_LOCAL_HISTORY, &l_data_size_out); + size_t i, j; + dap_tx_data_t *l_tx_data_hash = NULL; + for(i = 0; i < l_data_size_out; i++) { + dap_global_db_obj_t *l_obj_cur = l_objs + i; + + // parse global_db records in a history record + dap_global_db_hist_t l_rec; + if(dap_db_history_unpack_hist((char*) l_obj_cur->value, &l_rec) == -1) + continue; + // use only groups with datums + if(dap_strcmp(a_group_mempool, l_rec.group)) + continue; + + char **l_keys = dap_strsplit(l_rec.keys, GLOBAL_DB_HIST_KEY_SEPARATOR, -1); + size_t l_count = dap_str_countv(l_keys); + dap_store_obj_t *l_obj = NULL; + // all objs in one history records + for(j = 0; j < l_count; j++) { + // add record + if(l_rec.type == 'a') { + l_obj = (dap_store_obj_t*) dap_chain_global_db_obj_get(l_keys[j], l_rec.group); + if(!l_obj) + continue; + dap_chain_datum_t *l_datum = (dap_chain_datum_t*) l_obj->value; + if(!l_datum) + continue; + switch (l_datum->header.type_id) { + /* case DAP_CHAIN_DATUM_TOKEN_DECL: { + dap_chain_datum_token_t *l_token = (dap_chain_datum_token_t*) l_datum->data; + } + break; + case DAP_CHAIN_DATUM_TOKEN_EMISSION: { + dap_chain_datum_token_emission_t *l_token_emission = + (dap_chain_datum_token_emission_t*) l_datum->data; + } + break;*/ + // find transaction + case DAP_CHAIN_DATUM_TX: { + dap_chain_datum_tx_t *l_tx = (dap_chain_datum_tx_t*) l_datum->data; + dap_list_t *l_records_out = NULL; + + // transaction time + char *l_time_str = NULL; + if(l_tx->header.ts_created > 0) { + time_t rawtime = (time_t) l_tx->header.ts_created; + struct tm * timeinfo; + timeinfo = localtime(&rawtime); + if(timeinfo) + l_time_str = dap_strdup(asctime(timeinfo)); + } + else + l_time_str = dap_strdup(" "); + + int l_count = 0; + dap_tx_data_t *l_tx_data = NULL; + // find Token items - present in emit transaction + l_count = 0; + dap_list_t *l_list_tx_token = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_TOKEN, &l_count); + + // find OUT items + dap_list_t *l_list_out_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_OUT, &l_count); + dap_list_t *l_list_tmp = l_list_out_items; + while(l_list_tmp) { + dap_chain_tx_out_t *l_tx_out = (dap_chain_tx_out_t*) l_list_tmp->data; + // save OUT item l_tx_out + { + // save tx hash + l_tx_data = DAP_NEW_Z(dap_tx_data_t); + dap_chain_hash_fast_t l_tx_hash; + dap_hash_fast(l_tx, dap_chain_datum_tx_get_size(l_tx), &l_tx_hash); + memcpy(&l_tx_data->tx_hash, &l_tx_hash, sizeof(dap_chain_hash_fast_t)); + memcpy(&l_tx_data->addr, &l_tx_out->addr, sizeof(dap_chain_addr_t)); + l_tx_data->obj_num = i; + l_tx_data->pos_num = j; + // save token name + if(l_tx_data && l_list_tx_token) { + dap_chain_tx_token_t *tk = l_list_tx_token->data; +// int d = sizeof(l_tx_data->token_ticker); + memcpy(l_tx_data->token_ticker, tk->header.ticker, sizeof(l_tx_data->token_ticker)); + } + HASH_ADD(hh, l_tx_data_hash, tx_hash, sizeof(dap_chain_hash_fast_t), l_tx_data); + + // save OUT items to list + { + l_records_out = dap_list_append(l_records_out, (void*) l_tx_out); + } + } + l_list_tmp = dap_list_next(l_list_tmp); + } + + // find IN items + l_count = 0; + dap_list_t *l_list_in_items = dap_chain_datum_tx_items_get(l_tx, TX_ITEM_TYPE_IN, &l_count); + l_list_tmp = l_list_in_items; + + // find cur addr in prev OUT items + bool l_is_use_all_cur_out = false; + { + while(l_list_tmp) { + const dap_chain_tx_in_t *l_tx_in = (const dap_chain_tx_in_t*) l_list_tmp->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), l_tx_data_prev); + if(l_tx_data_prev != NULL) { + // fill token in l_tx_data from prev transaction + if(l_tx_data) { + // get token from prev tx + memcpy(l_tx_data->token_ticker, l_tx_data_prev->token_ticker, + sizeof(l_tx_data->token_ticker)); + dap_store_obj_t *l_obj_prev = get_prev_tx(l_objs, l_tx_data_prev); + dap_chain_datum_t *l_datum_prev = + l_obj_prev ? (dap_chain_datum_t*) l_obj_prev->value : NULL; + dap_chain_datum_tx_t *l_tx_prev = + l_datum_prev ? (dap_chain_datum_tx_t*) l_datum_prev->data : NULL; + + // find OUT items in prev datum + dap_list_t *l_list_out_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_OUT, &l_count); + // find OUT item for IN item; + dap_list_t *l_list_out_prev_item = dap_list_nth(l_list_out_prev_items, + l_tx_in->header.tx_out_prev_idx); + dap_chain_tx_out_t *l_tx_prev_out = + l_list_out_prev_item ? + (dap_chain_tx_out_t*) l_list_out_prev_item->data : + NULL; + if(l_tx_prev_out && !memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_is_use_all_cur_out = true; + + } + } + l_list_tmp = dap_list_next(l_list_tmp); + } + } + + // find prev OUT items for IN items + l_list_tmp = l_list_in_items; + while(l_list_tmp) { + const dap_chain_tx_in_t *l_tx_in = (const dap_chain_tx_in_t*) l_list_tmp->data; + dap_chain_hash_fast_t tx_prev_hash = l_tx_in->header.tx_prev_hash; + // if first transaction - empty prev OUT item + if(dap_hash_fast_is_blank(&tx_prev_hash)) { + // add emit info to ret string + if(!memcmp(&l_tx_data->addr, a_addr, sizeof(dap_chain_addr_t))) + { + dap_list_t *l_records_tmp = l_records_out; + while(l_records_tmp) { + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_records_tmp->data; + dap_string_append_printf(l_str_out, "emit %lld %s\n", + l_tx_out->header.value, + l_tx_data->token_ticker); + l_records_tmp = dap_list_next(l_records_tmp); + } + } + dap_list_free(l_records_out); + } + // in other transactions except first one + else { + //find prev OUT item + dap_tx_data_t *l_tx_data_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &tx_prev_hash, sizeof(dap_chain_hash_fast_t), l_tx_data_prev); + if(l_tx_data_prev != NULL) { + char *l_src_str = NULL; + bool l_src_str_is_cur = false; + if(l_tx_data) { + // get token from prev tx + memcpy(l_tx_data->token_ticker, l_tx_data_prev->token_ticker, + sizeof(l_tx_data->token_ticker)); + + dap_store_obj_t *l_obj_prev = get_prev_tx(l_objs, l_tx_data_prev); + dap_chain_datum_t *l_datum_prev = + l_obj_prev ? (dap_chain_datum_t*) l_obj_prev->value : NULL; + dap_chain_datum_tx_t *l_tx_prev = + l_datum_prev ? (dap_chain_datum_tx_t*) l_datum_prev->data : NULL; + + // find OUT items in prev datum + dap_list_t *l_list_out_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_OUT, &l_count); + // find OUT item for IN item; + dap_list_t *l_list_out_prev_item = dap_list_nth(l_list_out_prev_items, + l_tx_in->header.tx_out_prev_idx); + dap_chain_tx_out_t *l_tx_prev_out = + l_list_out_prev_item ? + (dap_chain_tx_out_t*) l_list_out_prev_item->data : + NULL; + // if use src addr + bool l_is_use_src_addr = false; + // find source addrs + dap_string_t *l_src_addr = dap_string_new(NULL); + { + // find IN items in prev datum - for get destination addr + dap_list_t *l_list_in_prev_items = dap_chain_datum_tx_items_get(l_tx_prev, + TX_ITEM_TYPE_IN, &l_count); + dap_list_t *l_list_tmp = l_list_in_prev_items; + while(l_list_tmp) { + dap_chain_tx_in_t *l_tx_prev_in = l_list_tmp->data; + dap_chain_hash_fast_t l_tx_prev_prev_hash = + l_tx_prev_in->header.tx_prev_hash; + //find prev OUT item + dap_tx_data_t *l_tx_data_prev_prev = NULL; + HASH_FIND(hh, l_tx_data_hash, &l_tx_prev_prev_hash, + sizeof(dap_chain_hash_fast_t), l_tx_data_prev_prev); + if(l_tx_data_prev_prev) { + // if use src addr + if(!memcmp(&l_tx_data_prev_prev->addr, a_addr, + sizeof(dap_chain_addr_t))) + l_is_use_src_addr = true; + char *l_str = dap_chain_addr_to_str(&l_tx_data_prev_prev->addr); + if(l_src_addr->len > 0) + dap_string_append_printf(l_src_addr, "\n %s", l_str); + else + dap_string_append_printf(l_src_addr, "%s", l_str); // first record + DAP_DELETE(l_str); + } + l_list_tmp = dap_list_next(l_list_tmp); + } + } + + char *l_dst_to_str = + (l_tx_prev_out) ? dap_chain_addr_to_str(&l_tx_prev_out->addr) : + NULL; + // if use dst addr + bool l_is_use_dst_addr = false; + if(!memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_is_use_dst_addr = true; + + l_src_str_is_cur = l_is_use_src_addr; + if(l_src_addr->len <= 1) { + l_src_str = + (l_tx_data) ? dap_chain_addr_to_str(&l_tx_data->addr) : + NULL; + if(!memcmp(&l_tx_prev_out->addr, a_addr, sizeof(dap_chain_addr_t))) + l_src_str_is_cur = true; + dap_string_free(l_src_addr, true); + } + else + l_src_str = dap_string_free(l_src_addr, false); + if(l_is_use_src_addr && !l_is_use_dst_addr) { + dap_string_append_printf(l_str_out, + "%s in send %lld %s from %s\n to %s\n", + l_time_str ? l_time_str : "", + l_tx_prev_out->header.value, + l_tx_data->token_ticker, + l_src_str ? l_src_str : "", + l_dst_to_str); + } else if(l_is_use_dst_addr && !l_is_use_src_addr) { + if(!l_src_str_is_cur) + dap_string_append_printf(l_str_out, + "%s in recv %lld %s from %s\n", + l_time_str ? l_time_str : "", + l_tx_prev_out->header.value, + l_tx_data->token_ticker, + l_src_str ? l_src_str : ""); + } + + DAP_DELETE(l_dst_to_str); + dap_list_free(l_list_out_prev_items); + DAP_DELETE(l_obj_prev); + } + + // OUT items + dap_list_t *l_records_tmp = l_records_out; + while(l_records_tmp) { + + const dap_chain_tx_out_t *l_tx_out = (const dap_chain_tx_out_t*) l_records_tmp->data; + + if(l_is_use_all_cur_out + || !memcmp(&l_tx_out->addr, a_addr, sizeof(dap_chain_addr_t))) { + + char *l_addr_str = (l_tx_out) ? dap_chain_addr_to_str(&l_tx_out->addr) : NULL; + + if(!memcmp(&l_tx_out->addr, a_addr, sizeof(dap_chain_addr_t))) { + if(!l_src_str_is_cur) + dap_string_append_printf(l_str_out, "%s recv %lld %s from %s\n", + l_time_str ? l_time_str : "", + l_tx_out->header.value, + l_tx_data_prev->token_ticker, + l_src_str ? l_src_str : "?"); + } + else { + dap_string_append_printf(l_str_out, "%s send %lld %s to %sd\n", + l_time_str ? l_time_str : "", + l_tx_out->header.value, + l_tx_data_prev->token_ticker, + l_addr_str ? l_addr_str : ""); + } + DAP_DELETE(l_addr_str); + } + l_records_tmp = dap_list_next(l_records_tmp); + } + dap_list_free(l_records_out); + DAP_DELETE(l_src_str); + + } + } + l_list_tmp = dap_list_next(l_list_tmp); + } + if(l_list_tx_token) + dap_list_free(l_list_tx_token); + if(l_list_out_items) + dap_list_free(l_list_out_items); + if(l_list_in_items) + dap_list_free(l_list_in_items); + + DAP_DELETE(l_time_str); + } + break; + default: + continue; + } + } + // delete record + else if(l_rec.type == 'd') { + //printf("del_gr%d_%d=%s\n", i, j, l_rec.group); + } + } + DAP_DELETE(l_obj); + dap_strfreev(l_keys); + } + // delete hashes + dap_tx_data_t *l_iter_current, *l_item_tmp; + HASH_ITER(hh, l_tx_data_hash , l_iter_current, l_item_tmp) + { + // delete struct + DAP_DELETE(l_iter_current); + HASH_DEL(l_tx_data_hash, l_iter_current); + } + dap_chain_global_db_objs_delete(l_objs, l_data_size_out); + // if no history + if(!l_str_out->len) + dap_string_append(l_str_out, "empty"); + char *l_ret_str = l_str_out ? dap_string_free(l_str_out, false) : NULL; + return l_ret_str; +} + +/** + * Add data to the history log + */ +bool dap_db_history_add(char a_type, pdap_store_obj_t a_store_obj, size_t a_dap_store_count, const char *a_group) +{ + if(!a_store_obj || a_dap_store_count <= 0) + return false; + dap_global_db_hist_t l_rec; + l_rec.keys_count = a_dap_store_count; + l_rec.type = a_type; + // group name should be always the same + if(l_rec.keys_count >= 1) + l_rec.group = a_store_obj->group; + if(l_rec.keys_count == 1) + l_rec.keys = a_store_obj->key; + else { + // make keys vector + char **l_keys = DAP_NEW_Z_SIZE(char*, sizeof(char*) * (((size_t ) a_dap_store_count) + 1)); + size_t i; + for(i = 0; i < a_dap_store_count; i++) { + // if it is marked, the data has not been saved + if(a_store_obj[i].timestamp == (time_t) -1) + continue; + l_keys[i] = a_store_obj[i].key; + } + l_keys[i] = NULL; + l_rec.keys = dap_strjoinv(GLOBAL_DB_HIST_KEY_SEPARATOR, l_keys); + DAP_DELETE(l_keys); + } + + char *l_str = dap_db_history_pack_hist(&l_rec); + size_t l_str_len = strlen(l_str); + dap_store_obj_t l_store_data; + // key - timestamp + // value - keys of added/deleted data + l_store_data.key = dap_db_new_history_timestamp(); + l_store_data.value = (uint8_t*) strdup(l_str); + l_store_data.value_len = l_str_len + 1; + l_store_data.group = a_group;//GROUP_LOCAL_HISTORY; + l_store_data.timestamp = time(NULL); + int l_res = dap_chain_global_db_driver_add(&l_store_data, 1); + if(l_rec.keys_count > 1) + DAP_DELETE(l_rec.keys); + DAP_DELETE(l_store_data.value); + DAP_DELETE(l_str); + if(!l_res) + return true; + return false; +} + +/** + * Truncate the history log + */ +bool dap_db_history_truncate(void) +{ + // TODO + return true; +} + +/** + * Get last id in log + */ +uint64_t dap_db_log_get_group_history_last_id(const char *a_history_group_name) +{ + dap_store_obj_t *l_last_obj = dap_chain_global_db_get_last(a_history_group_name); + if(l_last_obj) { + return l_last_obj->id; + }else + return 0; +} + +/** + * Get last id in log + */ +uint64_t dap_db_log_get_last_id(void) +{ + return dap_db_log_get_group_history_last_id(GROUP_LOCAL_HISTORY); +} + +/*static int compare_items(const void * l_a, const void * l_b) +{ + const dap_global_db_obj_t *l_item_a = (const dap_global_db_obj_t*) l_a; + const dap_global_db_obj_t *l_item_b = (const dap_global_db_obj_t*) l_b; + int l_ret = strcmp(l_item_a->key, l_item_b->key); + return l_ret; +}*/ + +/** + * Get log diff as list + */ +dap_list_t* dap_db_log_get_list(uint64_t first_id) +{ + dap_list_t *l_list = NULL; + size_t l_data_size_out = 0; + //log_it(L_DEBUG,"loading db list..."); + dap_store_obj_t *l_objs = dap_chain_global_db_cond_load(GROUP_LOCAL_HISTORY, first_id, &l_data_size_out); + //dap_global_db_obj_t *l_objs = dap_chain_global_db_gr_load(GROUP_LOCAL_HISTORY, first_timestamp, &l_data_size_out); + for(size_t i = 0; i < l_data_size_out; i++) { + dap_store_obj_t *l_obj_cur = l_objs + i; + dap_global_db_obj_t *l_item = DAP_NEW(dap_global_db_obj_t); + l_item->id = l_obj_cur->id; + l_item->key = dap_strdup(l_obj_cur->key); + l_item->value = (uint8_t*) dap_strdup((char*) l_obj_cur->value); + l_list = dap_list_append(l_list, l_item); + } + //log_it(L_DEBUG,"loaded db list n=%d", l_data_size_out); + dap_store_obj_free(l_objs, l_data_size_out); + + return l_list; + /* + size_t l_list_count = 0; + char *l_first_key_str = dap_strdup_printf("%lld", (int64_t) first_timestamp); + size_t l_data_size_out = 0; + + for(size_t i = 0; i < l_data_size_out; i++) { + dap_global_db_obj_t *l_obj_cur = l_objs[i]; + // log_it(L_DEBUG,"%lld and %lld tr",strtoll(l_obj_cur->key,NULL,10), first_timestamp ); + if( strtoll(l_obj_cur->key,NULL,10) > (long long) first_timestamp ) { + dap_global_db_obj_t *l_item = DAP_NEW(dap_global_db_obj_t); + l_item->key = dap_strdup(l_obj_cur->key); + l_item->value =(uint8_t*) dap_strdup((char*) l_obj_cur->value); + l_list = dap_list_append(l_list, l_item); + l_list_count++; + } + } + // sort list by key (time str) + //dap_list_sort(l_list, (dap_callback_compare_t) compare_items); + log_it(L_DEBUG,"Prepared %u items (list size %u)", l_list_count, dap_list_length(l_list)); + DAP_DELETE(l_first_key_str); + dap_chain_global_db_objs_delete(l_objs); + */ + /*/ dbg - sort result + l_data_size_out = dap_list_length(l_list); + for(size_t i = 0; i < l_data_size_out; i++) { + dap_list_t *l_list_tmp = dap_list_nth(l_list, i); + dap_global_db_obj_t *l_item = l_list_tmp->data; + printf("2 %d %s\n", i, l_item->key); + }*/ + +} + +/** + * Free list getting from dap_db_log_get_list() + */ +void dap_db_log_del_list(dap_list_t *a_list) +{ + dap_list_free_full(a_list, (dap_callback_destroyed_t) dap_chain_global_db_obj_delete); +} + + + + +/** + * Thread for reading log list + * instead dap_db_log_get_list() + */ +static void *s_list_thread_proc(void *arg) +{ + dap_db_log_list_t *l_dap_db_log_list = (dap_db_log_list_t*) arg; + size_t l_items_number = 0; + while(1) { + bool is_process; + char *l_group_cur_name = NULL; + // check for break process + pthread_mutex_lock(&l_dap_db_log_list->list_mutex); + is_process = l_dap_db_log_list->is_process; + size_t l_item_start = l_dap_db_log_list->item_start; + size_t l_item_last = l_dap_db_log_list->item_last; + if(l_dap_db_log_list->group_cur == -1) + l_group_cur_name = GROUP_LOCAL_HISTORY; + else + l_group_cur_name = l_dap_db_log_list->group_names[l_dap_db_log_list->group_cur]; + pthread_mutex_unlock(&l_dap_db_log_list->list_mutex); + if(!is_process) + break; + // calculating how many items required to read + int64_t l_item_count = min(10, (int64_t)l_item_last - (int64_t)l_item_start + 1); + dap_store_obj_t *l_objs = NULL; + // read next 1...10 items + if(l_item_count > 0) + l_objs = dap_chain_global_db_cond_load(l_group_cur_name, l_item_start, &l_item_count); + // go to next group + if(!l_objs) { + pthread_mutex_lock(&l_dap_db_log_list->list_mutex); + while(l_dap_db_log_list->group_cur < l_dap_db_log_list->group_number) { + l_dap_db_log_list->group_cur++; + // check for empty group + if(l_dap_db_log_list->group_number_items[l_dap_db_log_list->group_cur] < 1) { + continue; + } + break; + } + // end of all groups + if(l_dap_db_log_list->group_cur >= l_dap_db_log_list->group_number) { + pthread_mutex_unlock(&l_dap_db_log_list->list_mutex); + break; + } + l_dap_db_log_list->item_start = 0; + l_dap_db_log_list->item_last = l_dap_db_log_list->group_last_id[l_dap_db_log_list->group_cur]; + l_item_start = l_dap_db_log_list->item_start; + l_item_last = l_dap_db_log_list->item_last; + if(l_dap_db_log_list->group_cur == -1) + l_group_cur_name = GROUP_LOCAL_HISTORY; + else + l_group_cur_name = l_dap_db_log_list->group_names[l_dap_db_log_list->group_cur]; + pthread_mutex_unlock(&l_dap_db_log_list->list_mutex); + + //l_item_count = min(10, (int64_t)l_item_last - (int64_t)l_item_start + 1); + //if(l_item_count<=0) + // continue; + // read next 1...10 items + //l_objs = dap_chain_global_db_cond_load(l_group_cur_name, l_item_start, &l_item_count); + continue; + } + //if(!l_objs) + //continue; + dap_list_t *l_list = NULL; + for(size_t i = 0; i < l_item_count; i++) { + dap_store_obj_t *l_obj_cur = l_objs + i; + dap_global_db_obj_t *l_item = DAP_NEW(dap_global_db_obj_t); + l_item->id = l_obj_cur->id; + l_item->key = dap_strdup(l_obj_cur->key); + l_item->value = (uint8_t*) dap_strdup((char*) l_obj_cur->value); + l_list = dap_list_append(l_list, l_item); + } + pthread_mutex_lock(&l_dap_db_log_list->list_mutex); + // add l_list to list_write + l_dap_db_log_list->list_write = dap_list_concat(l_dap_db_log_list->list_write, l_list); + // init read list if it ended already + if(!l_dap_db_log_list->list_read) + l_dap_db_log_list->list_read = l_list; + // set new start pos = lastitem pos + 1 + if(l_item_count > 0) + l_dap_db_log_list->item_start = l_objs[l_item_count - 1].id + 1; + //else + // l_dap_db_log_list->item_start += l_data_size_out; + pthread_mutex_unlock(&l_dap_db_log_list->list_mutex); + l_items_number += l_item_count; + //log_it(L_DEBUG, "loaded items n=%u/%u", l_data_size_out, l_items_number); + dap_store_obj_free(l_objs, l_item_count); + } + + pthread_mutex_lock(&l_dap_db_log_list->list_mutex); + l_dap_db_log_list->is_process = false; + pthread_mutex_unlock(&l_dap_db_log_list->list_mutex); + pthread_exit(0); + return 0; +} + +/** + * instead dap_db_log_get_list() + */ +dap_db_log_list_t* dap_db_log_list_start(uint64_t first_id, dap_list_t *a_add_groups_mask) +{ + + //log_it(L_DEBUG, "Start loading db list_write..."); + dap_db_log_list_t *l_dap_db_log_list = DAP_NEW_Z(dap_db_log_list_t); + + size_t l_add_groups_num = 0;// number of group + //size_t l_add_groups_items_num = 0;// number items in all groups + dap_list_t *l_add_groups_mask = a_add_groups_mask; + // calc l_add_groups_num + while(l_add_groups_mask) { + // не Ñчитать группы del + dap_list_t *l_groups = dap_chain_global_db_driver_get_groups_by_mask(l_add_groups_mask->data); + l_add_groups_num += dap_list_length(l_groups); + dap_list_free_full(l_groups, (dap_callback_destroyed_t) free); + l_add_groups_mask = dap_list_next(l_add_groups_mask); + } + + //size_t l_add_groups_num = dap_list_length(a_add_groups_mask); + size_t l_data_size_out_main = dap_chain_global_db_driver_count(GROUP_LOCAL_HISTORY, first_id); + size_t *l_data_size_out_add_items = DAP_NEW_Z_SIZE(size_t, sizeof(size_t) * l_add_groups_num); + uint64_t *l_group_last_id = DAP_NEW_Z_SIZE(uint64_t, sizeof(uint64_t) * l_add_groups_num); + char **l_group_names = DAP_NEW_Z_SIZE(char*, sizeof(char*) * l_add_groups_num); + size_t l_data_size_out_add = 0; + l_add_groups_mask = a_add_groups_mask; + while(l_add_groups_mask){ + dap_list_t *l_groups0 = dap_chain_global_db_driver_get_groups_by_mask(l_add_groups_mask->data); + dap_list_t *l_groups = l_groups0; + size_t l_group_cur = 0; + while(l_groups){ + const char *l_group_name = (const char *) l_groups->data; + l_group_names[l_group_cur] = dap_strdup(dap_chain_global_db_get_history_group_by_group_name(l_group_name)); + dap_store_obj_t *l_obj = dap_chain_global_db_driver_read_last(l_group_names[l_group_cur]); + if(l_obj) { + l_group_last_id[l_group_cur] = l_obj->id; + dap_store_obj_free(l_obj, 1); + } + l_data_size_out_add_items[l_group_cur] = dap_chain_global_db_driver_count(l_group_names[l_group_cur], 1); + l_data_size_out_add += l_data_size_out_add_items[l_group_cur]; + l_group_cur++; + l_groups = dap_list_next(l_groups); + } + dap_list_free_full(l_groups0, (dap_callback_destroyed_t) free); + l_add_groups_mask = dap_list_next(l_add_groups_mask); + } + if(!(l_data_size_out_main + l_data_size_out_add)) + return NULL; + // debug +// if(l_data_size_out>11) +// l_data_size_out = 11; + l_dap_db_log_list->item_start = first_id; + l_dap_db_log_list->item_last = first_id + l_data_size_out_main; + l_dap_db_log_list->items_number_main = l_data_size_out_main; + l_dap_db_log_list->items_number_add = l_data_size_out_add; + l_dap_db_log_list->items_number = l_data_size_out_main + l_data_size_out_add; + l_dap_db_log_list->items_rest = l_dap_db_log_list->items_number; + l_dap_db_log_list->group_number = (int64_t)l_add_groups_num; + l_dap_db_log_list->group_number_items = l_data_size_out_add_items; + l_dap_db_log_list->group_last_id = l_group_last_id; + l_dap_db_log_list->group_names = l_group_names; + l_dap_db_log_list->group_cur = -1; + l_dap_db_log_list->add_groups = a_add_groups_mask; + // there are too few items, read items right now + if(0) {//l_data_size_out <= 10) { + dap_list_t *l_list = NULL; + // read first items + dap_store_obj_t *l_objs = dap_chain_global_db_cond_load(GROUP_LOCAL_HISTORY, first_id, &l_data_size_out_add_items); + for(size_t i = 0; i < l_data_size_out_add_items; i++) { + dap_store_obj_t *l_obj_cur = l_objs + i; + dap_global_db_obj_t *l_item = DAP_NEW(dap_global_db_obj_t); + l_item->id = l_obj_cur->id; + l_item->key = dap_strdup(l_obj_cur->key); + l_item->value = (uint8_t*) dap_strdup((char*) l_obj_cur->value); + l_list = dap_list_append(l_list, l_item); + } + l_dap_db_log_list->list_write = l_list; + l_dap_db_log_list->list_read = l_list; + //log_it(L_DEBUG, "loaded items n=%d", l_data_size_out); + dap_store_obj_free(l_objs, l_data_size_out_add_items); + } + // start thread for items loading + else { + l_dap_db_log_list->is_process = true; + pthread_mutex_init(&l_dap_db_log_list->list_mutex, NULL); + pthread_create(&l_dap_db_log_list->thread, NULL, s_list_thread_proc, l_dap_db_log_list); + } + return l_dap_db_log_list; +} + +/** + * Get number of items + */ +size_t dap_db_log_list_get_count(dap_db_log_list_t *a_db_log_list) +{ + if(!a_db_log_list) + return 0; + size_t l_items_number; + pthread_mutex_lock(&a_db_log_list->list_mutex); + l_items_number = a_db_log_list->items_number; + pthread_mutex_unlock(&a_db_log_list->list_mutex); + return l_items_number; +} + +size_t dap_db_log_list_get_count_rest(dap_db_log_list_t *a_db_log_list) +{ + if(!a_db_log_list) + return 0; + size_t l_items_rest; + pthread_mutex_lock(&a_db_log_list->list_mutex); + l_items_rest = a_db_log_list->items_rest; + pthread_mutex_unlock(&a_db_log_list->list_mutex); + return l_items_rest; +} +/** + * Get one item from log_list + */ +dap_global_db_obj_t* dap_db_log_list_get(dap_db_log_list_t *a_db_log_list) +{ + if(!a_db_log_list) + return NULL; + dap_list_t *l_list; + bool l_is_process; + int l_count = 0; + while(1) { + pthread_mutex_lock(&a_db_log_list->list_mutex); + l_is_process = a_db_log_list->is_process; + // check next item + l_list = a_db_log_list->list_read; + if (l_list){ + a_db_log_list->list_read = dap_list_next(a_db_log_list->list_read); + a_db_log_list->items_rest--; + } + pthread_mutex_unlock(&a_db_log_list->list_mutex); + // wait reading next item, no more 1 sec (50 ms * 100 times) + if(!l_list && l_is_process) { + dap_usleep(DAP_USEC_PER_SEC / 200); + l_count++; + if(l_count > 100) + break; + } + else + break; + } + //log_it(L_DEBUG, "get item n=%d", a_db_log_list->items_number - a_db_log_list->items_rest); + return (dap_global_db_obj_t*) l_list ? l_list->data : NULL; + //return l_list; +} + +/** + * Get log diff as list_write + */ +void dap_db_log_list_delete(dap_db_log_list_t *a_db_log_list) +{ + if(!a_db_log_list) + return; + // stop thread if it has created + if(a_db_log_list->thread) { + pthread_mutex_lock(&a_db_log_list->list_mutex); + a_db_log_list->is_process = false; + pthread_mutex_unlock(&a_db_log_list->list_mutex); + pthread_join(a_db_log_list->thread, NULL); + } + for(size_t i = 0; i < a_db_log_list->group_number; i++) + DAP_DELETE(a_db_log_list->group_names[i]); + DAP_DELETE(a_db_log_list->group_names); + DAP_DELETE(a_db_log_list->group_last_id); + DAP_DELETE(a_db_log_list->group_number_items); + dap_list_free(a_db_log_list->add_groups); + dap_list_free_full(a_db_log_list->list_write, (dap_callback_destroyed_t) dap_chain_global_db_obj_delete); + pthread_mutex_destroy(&a_db_log_list->list_mutex); + DAP_DELETE(a_db_log_list); +} diff --git a/libdap-chain-global-db/dap_chain_global_db_hist.h b/libdap-chain-global-db/dap_chain_global_db_hist.h new file mode 100755 index 0000000000000000000000000000000000000000..9738cde0f84215fd61bf8e22f7cabb8d8dfdc3f4 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_hist.h @@ -0,0 +1,51 @@ +#pragma once + +#include <stdbool.h> +#include <dap_list.h> +#include "dap_chain_global_db.h" +#include "dap_chain_global_db_driver.h" + +#define GLOBAL_DB_HIST_REC_SEPARATOR "\r;" +#define GLOBAL_DB_HIST_KEY_SEPARATOR "\a;" + +typedef struct dap_global_db_hist { + char type;// 'a' add or 'd' delete + const char *group; + size_t keys_count; + char *keys; +} dap_global_db_hist_t; + +//Add data to the history log +bool dap_db_history_add(char a_type, pdap_store_obj_t a_store_obj, size_t a_dap_store_count, const char *a_group); + +// Truncate the history log +bool dap_db_history_truncate(void); + + +// for dap_db_log_list_xxx() +typedef struct dap_db_log_list { + dap_list_t *list_write; // writed list + dap_list_t *list_read; // readed list (inside list_write) + bool is_process; + size_t item_start; // first item to read from db + size_t item_last; // last item to read from db + size_t items_rest; // rest items to read from list_read + size_t items_number_main; + size_t items_number_add; + size_t items_number; // remaining items in list_write after reading from db + char **group_names; + int64_t group_number; // number of group + int64_t group_cur; // current group number, -1 for the main group, 0 ... group_count for the additional group + size_t *group_number_items; // number of items for each group + int64_t *group_last_id; + dap_list_t *add_groups; // additional group for sync + pthread_t thread; + pthread_mutex_t list_mutex; +} dap_db_log_list_t; + +dap_db_log_list_t* dap_db_log_list_start(uint64_t first_id, dap_list_t *a_add_groups); +size_t dap_db_log_list_get_count(dap_db_log_list_t *a_db_log_list); +size_t dap_db_log_list_get_count_rest(dap_db_log_list_t *a_db_log_list); +dap_global_db_obj_t* dap_db_log_list_get(dap_db_log_list_t *a_db_log_list); +void dap_db_log_list_delete(dap_db_log_list_t *a_db_log_list); + diff --git a/libdap-chain-global-db/dap_chain_global_db_remote.c b/libdap-chain-global-db/dap_chain_global_db_remote.c new file mode 100755 index 0000000000000000000000000000000000000000..d8beff46eefb32c5141ece1b239d9dbb0bf94b6c --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_remote.c @@ -0,0 +1,133 @@ +#include <string.h> +#include <stdlib.h> +#include <time.h> + +#include <dap_common.h> +#include <dap_strfuncs.h> +#include <dap_string.h> +//#include "dap_chain_node.h" +#include "dap_chain_global_db.h" +#include "dap_chain_global_db_remote.h" + +#define LOG_TAG "dap_chain_global_db_remote" + +// default time of node address expired in hours +#define NODE_TIME_EXPIRED_DEFAULT 720 + +static bool dap_db_set_cur_node_addr_common(uint64_t a_address, char *a_net_name, time_t a_expire_time) +{ + if(!a_net_name) + return false; + char *l_key = dap_strdup_printf("cur_node_addr_%s", a_net_name); + uint64_t * l_address = DAP_NEW_Z(uint64_t); + *l_address = a_address; + bool l_ret = dap_chain_global_db_gr_set(l_key, (uint8_t*) l_address, sizeof(a_address), GROUP_LOCAL_GENERAL); + //DAP_DELETE(l_key); + if(l_ret) { + time_t *l_cur_time = DAP_NEW_Z(time_t); + *l_cur_time= a_expire_time; + char *l_key_time = dap_strdup_printf("cur_node_addr_%s_time", a_net_name); + l_ret = dap_chain_global_db_gr_set( dap_strdup(l_key_time), (uint8_t*) l_cur_time, sizeof(time_t), GROUP_LOCAL_GENERAL); + DAP_DELETE(l_key_time); + } + return l_ret; +} + +/** + * Set addr for current node and no expire time + */ +bool dap_db_set_cur_node_addr(uint64_t a_address, char *a_net_name ) +{ + return dap_db_set_cur_node_addr_common(a_address,a_net_name,0); +} + +/** + * Set addr for current node and expire time + */ +bool dap_db_set_cur_node_addr_exp(uint64_t a_address, char *a_net_name ) +{ + time_t l_cur_time = time(NULL); + return dap_db_set_cur_node_addr_common(a_address,a_net_name,l_cur_time); +} + + + +/** + * Get addr for current node + */ +uint64_t dap_db_get_cur_node_addr(char *a_net_name) +{ + size_t l_node_addr_len = 0, l_node_time_len = 0; + if(!a_net_name) + return 0; + char *l_key = dap_strdup_printf("cur_node_addr_%s", a_net_name); + char *l_key_time = dap_strdup_printf("cur_node_addr_%s_time", a_net_name); + uint8_t *l_node_addr_data = dap_chain_global_db_gr_get(l_key, &l_node_addr_len, GROUP_LOCAL_GENERAL); + uint8_t *l_node_time_data = dap_chain_global_db_gr_get(l_key_time, &l_node_time_len, GROUP_LOCAL_GENERAL); + uint64_t l_node_addr_ret = 0; + time_t l_node_time = 0; + if(l_node_addr_data && l_node_addr_len == sizeof(uint64_t)) + memcpy(&l_node_addr_ret, l_node_addr_data, l_node_addr_len); + if(l_node_time_data && l_node_time_len == sizeof(time_t)) + memcpy(&l_node_time, l_node_time_data, l_node_time_len); + // time delta in seconds + static int64_t addr_time_expired = -1; + // read time-expired + if(addr_time_expired == -1) { + dap_string_t *l_cfg_path = dap_string_new("network/"); + dap_string_append(l_cfg_path, a_net_name); + dap_config_t *l_cfg; + if((l_cfg = dap_config_open(l_cfg_path->str)) == NULL) { + log_it(L_ERROR, "Can't open default network config"); + addr_time_expired = 0; + } else { + addr_time_expired = 3600 * + dap_config_get_item_int64_default(l_cfg, "general", "node-addr-expired", + NODE_TIME_EXPIRED_DEFAULT); + } + dap_string_free(l_cfg_path, true); + } + time_t l_dt = time(NULL) - l_node_time; + //NODE_TIME_EXPIRED + if(l_node_time && l_dt > addr_time_expired) { + //log_it(L_NOTICE, "Node 0x%016X set last synced timestamp %llu", a_id); + l_node_addr_ret = 0; + } + DAP_DELETE(l_key); + DAP_DELETE(l_key_time); + DAP_DELETE(l_node_addr_data); + DAP_DELETE(l_node_time_data); + return l_node_addr_ret; +} + +/** + * Set last id for remote node + */ +bool dap_db_log_set_last_id_remote(uint64_t a_node_addr, uint64_t a_id) +{ + dap_global_db_obj_t l_objs; + l_objs.key = dap_strdup_printf("%ju", a_node_addr); + l_objs.value = (uint8_t*) &a_id; + l_objs.value_len = sizeof(uint64_t); + bool l_ret = dap_chain_global_db_gr_save(&l_objs, 1, GROUP_LOCAL_NODE_LAST_ID); + DAP_DELETE(l_objs.key); + //log_it( L_DEBUG, "Node 0x%016X set last synced timestamp %llu",a_id); + return l_ret; +} + +/** + * Get last id for remote node + */ +uint64_t dap_db_log_get_last_id_remote(uint64_t a_node_addr) +{ + char *l_node_addr_str = dap_strdup_printf("%ju", a_node_addr); + size_t l_timestamp_len = 0; + uint8_t *l_timestamp = dap_chain_global_db_gr_get((const char*) l_node_addr_str, &l_timestamp_len, + GROUP_LOCAL_NODE_LAST_ID); + uint64_t l_ret_timestamp = 0; + if(l_timestamp && l_timestamp_len == sizeof(uint64_t)) + memcpy(&l_ret_timestamp, l_timestamp, l_timestamp_len); + DAP_DELETE(l_node_addr_str); + DAP_DELETE(l_timestamp); + return l_ret_timestamp; +} diff --git a/libdap-chain-global-db/dap_chain_global_db_remote.h b/libdap-chain-global-db/dap_chain_global_db_remote.h new file mode 100755 index 0000000000000000000000000000000000000000..e14480244632655888716d36a694c8f6f2995825 --- /dev/null +++ b/libdap-chain-global-db/dap_chain_global_db_remote.h @@ -0,0 +1,15 @@ +#pragma once + +#include <stdbool.h> +#include <time.h> + +#include "dap_chain_common.h" +// Set addr for current node +bool dap_db_set_cur_node_addr(uint64_t a_address, char *a_net_name); +bool dap_db_set_cur_node_addr_exp(uint64_t a_address, char *a_net_name ); +uint64_t dap_db_get_cur_node_addr(char *a_net_name); + +// Set last id for remote node +bool dap_db_log_set_last_id_remote(uint64_t a_node_addr, uint64_t a_id); +// Get last id for remote node +uint64_t dap_db_log_get_last_id_remote(uint64_t a_node_addr); diff --git a/libdap-chain-global-db/libdap-cuttdb/CMakeLists.txt b/libdap-chain-global-db/libdap-cuttdb/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..50812739634e5c846bdb91a8679c809e8b6365b0 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.1) + +project(dap_cuttdb C) + +add_definitions ("-D_GNU_SOURCE") +set(CMAKE_C_FLAGS " -Wall -Wextra -fPIC") + +file(GLOB cuttdb_src src/*.c) +file(GLOB cuttdb_h src/*.h) + +# the server part ain't ported, and thus not built, so are tests. +list(FILTER cuttdb_src EXCLUDE REGEX "ae_") +list(FILTER cuttdb_h EXCLUDE REGEX "ae_") +list(FILTER cuttdb_src EXCLUDE REGEX "server.") +list(FILTER cuttdb_h EXCLUDE REGEX "server.") +list(FILTER cuttdb_src EXCLUDE REGEX "dump.") +list(FILTER cuttdb_h EXCLUDE REGEX "dump.") +list(FILTER cuttdb_src EXCLUDE REGEX "builddb.") +list(FILTER cuttdb_h EXCLUDE REGEX "builddb.") +list(FILTER cuttdb_src EXCLUDE REGEX "test_mt.") +list(FILTER cuttdb_h EXCLUDE REGEX "test_mt.") + +if(UNIX) + list(FILTER cuttdb_src EXCLUDE REGEX "mman.") + list(FILTER cuttdb_h EXCLUDE REGEX "mman.") +endif() + +add_library(${PROJECT_NAME} STATIC ${cuttdb_src} ${cuttdb_h}) +set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE C) +set_target_properties(${PROJECT_NAME} PROPERTIES COMPILER_LANGUAGE C) + +set_target_properties(dap_cuttdb PROPERTIES LINKER_LANGUAGE C) + +target_link_libraries(${PROJECT_NAME}) + +target_include_directories(${PROJECT_NAME} INTERFACE src) diff --git a/libdap-chain-global-db/libdap-cuttdb/src/ae_epoll.c b/libdap-chain-global-db/libdap-cuttdb/src/ae_epoll.c new file mode 100644 index 0000000000000000000000000000000000000000..ff8591d86c3d7962c1b01e6ad51b09144ba74f04 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/ae_epoll.c @@ -0,0 +1,109 @@ +/* Linux epoll(2) based ae.c module + * Copyright (C) 2009-2010 Salvatore Sanfilippo - antirez@gmail.com + * Released under the BSD license. See the COPYING file for more info. */ + +#include <sys/epoll.h> +#include <errno.h> + +typedef struct aeApiState { + int epfd; + struct epoll_event events[AE_SETSIZE]; +} aeApiState; + +static int aeApiCreate(EventLoop *eventLoop) { + aeApiState *state = malloc(sizeof(aeApiState)); + + if (!state) return -1; + state->epfd = epoll_create(1024); /* 1024 is just an hint for the kernel */ + if (state->epfd == -1) return -1; + eventLoop->apidata = state; + return 0; +} + +/* + be not referenced anywhere +static void aeApiFree(EventLoop *eventLoop) { + aeApiState *state = eventLoop->apidata; + + close(state->epfd); + free(state); +} +*/ + +static int aeApiAddEvent(EventLoop *eventLoop, int fd, int mask) { + aeApiState *state = eventLoop->apidata; + struct epoll_event ee; + ee.events = EPOLLONESHOT; + if (mask & AE_READABLE) ee.events |= EPOLLIN; + if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; + ee.data.u64 = 0; /* avoid valgrind warning */ + ee.data.fd = fd; + if (epoll_ctl(state->epfd, EPOLL_CTL_ADD,fd,&ee) == -1 && errno != EEXIST) { + fprintf(stderr, "epoll_ctl(%d,%d) failed: %d\n", EPOLL_CTL_ADD,fd,errno); + return -1; + } + return 0; +} + +static int aeApiUpdateEvent(EventLoop *eventLoop, int fd, int mask) { + aeApiState *state = eventLoop->apidata; + struct epoll_event ee; + ee.events = EPOLLONESHOT; + if (mask & AE_READABLE) ee.events |= EPOLLIN; + if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; + ee.data.u64 = 0; /* avoid valgrind warning */ + ee.data.fd = fd; + if (epoll_ctl(state->epfd, EPOLL_CTL_MOD,fd,&ee) == -1) { + fprintf(stderr, "epoll_ctl(%d,%d) failed: %d\n", EPOLL_CTL_ADD,fd,errno); + return -1; + } + return 0; +} + +static int aeApiDelEvent(EventLoop *eventLoop, int fd) { + aeApiState *state = eventLoop->apidata; + struct epoll_event ee; + + ee.events = 0; + ee.data.u64 = 0; /* avoid valgrind warning */ + ee.data.fd = fd; + /* Note, Kernel < 2.6.9 requires a non null event pointer even for + * EPOLL_CTL_DEL. */ + if ( epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee) == -1 + && errno != ENOENT && errno != EBADF) { + fprintf(stderr, "epoll_ctl(%d,%d) failed: %d\n", EPOLL_CTL_DEL,fd,errno); + return -1; + } + return 0; +} + +int aeApiPoll(EventLoop *eventLoop, struct timeval *tvp) { + aeApiState *state = eventLoop->apidata; + int retval, numevents = 0; + + retval = epoll_wait(state->epfd,state->events,AE_SETSIZE, + tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1); + if (retval > 0) { + int j; + + numevents = retval; + for (j = 0; j < numevents; j++) { + int mask = 0; + struct epoll_event *e = state->events+j; + + if (e->events & EPOLLIN) mask |= AE_READABLE; + if (e->events & EPOLLOUT) mask |= AE_WRITABLE; + eventLoop->fired[j] = e->data.fd; + } + } + return numevents; +} + + +/* + be not referenced anywhere +static char *aeApiName(void) { + return "epoll"; +} +*/ + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/ae_kqueue.c b/libdap-chain-global-db/libdap-cuttdb/src/ae_kqueue.c new file mode 100644 index 0000000000000000000000000000000000000000..cd80a57be2d19d485f2a2ce3485b42494ba43640 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/ae_kqueue.c @@ -0,0 +1,91 @@ +/* Kqueue(2)-based ae.c module + * Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com + * Released under the BSD license. See the COPYING file for more info. */ + +#include <sys/types.h> +#include <sys/event.h> +#include <sys/time.h> + +typedef struct aeApiState { + int kqfd; + struct kevent events[AE_SETSIZE]; +} aeApiState; + +static int aeApiCreate(EventLoop *eventLoop) { + aeApiState *state = malloc(sizeof(aeApiState)); + + if (!state) return -1; + state->kqfd = kqueue(); + if (state->kqfd == -1) return -1; + eventLoop->apidata = state; + + return 0; +} + +static void aeApiFree(EventLoop *eventLoop) { + aeApiState *state = eventLoop->apidata; + + close(state->kqfd); + free(state); +} + +static int aeApiAddEvent(EventLoop *eventLoop, int fd, int mask) { + aeApiState *state = eventLoop->apidata; + struct kevent ke; + + if (mask & AE_READABLE) { + EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; + } + if (mask & AE_WRITABLE) { + EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); + if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1; + } + return 0; +} + +static int aeApiUpdateEvent(EventLoop *eventLoop, int fd, int mask) { + return aeApiAddEvent(eventLoop, fd, mask); +} + +static int aeApiDelEvent(EventLoop *eventLoop, int fd) { + aeApiState *state = eventLoop->apidata; + struct kevent ke; + + EV_SET(&ke, fd, EVFILT_READ | EVFILT_WRITE, EV_DELETE, 0, 0, NULL); + kevent(state->kqfd, &ke, 1, NULL, 0, NULL); + return 0; +} + +static int aeApiPoll(EventLoop *eventLoop, struct timeval *tvp) { + aeApiState *state = eventLoop->apidata; + int retval, numevents = 0; + + if (tvp != NULL) { + struct timespec timeout; + timeout.tv_sec = tvp->tv_sec; + timeout.tv_nsec = tvp->tv_usec * 1000; + retval = kevent(state->kqfd, NULL, 0, state->events, AE_SETSIZE, &timeout); + } else { + retval = kevent(state->kqfd, NULL, 0, state->events, AE_SETSIZE, NULL); + } + + if (retval > 0) { + int j; + + numevents = retval; + for(j = 0; j < numevents; j++) { + int mask = 0; + struct kevent *e = state->events+j; + + if (e->filter == EVFILT_READ) mask |= AE_READABLE; + if (e->filter == EVFILT_WRITE) mask |= AE_WRITABLE; + eventLoop->fired[j] = e->ident; + } + } + return numevents; +} + +static char *aeApiName(void) { + return "kqueue"; +} diff --git a/libdap-chain-global-db/libdap-cuttdb/src/ae_select.c b/libdap-chain-global-db/libdap-cuttdb/src/ae_select.c new file mode 100644 index 0000000000000000000000000000000000000000..1e5d3ae91aa886a4b086ff07c28b9e10045ea292 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/ae_select.c @@ -0,0 +1,72 @@ +/* Select()-based ae.c module + * Copyright (C) 2009-2010 Salvatore Sanfilippo - antirez@gmail.com + * Released under the BSD license. See the COPYING file for more info. */ + +#include <string.h> + +typedef struct aeApiState { + fd_set rfds, wfds; + /* We need to have a copy of the fd sets as it's not safe to reuse + * FD sets after select(). */ + fd_set _rfds, _wfds; +} aeApiState; + +static int aeApiCreate(EventLoop *eventLoop) { + aeApiState *state = malloc(sizeof(aeApiState)); + + if (!state) return -1; + FD_ZERO(&state->rfds); + FD_ZERO(&state->wfds); + eventLoop->apidata = state; + return 0; +} + +static void aeApiFree(EventLoop *eventLoop) { + free(eventLoop->apidata); +} + +static int aeApiAddEvent(EventLoop *eventLoop, int fd, int mask) { + aeApiState *state = eventLoop->apidata; + + if (mask & AE_READABLE) FD_SET(fd,&state->rfds); + if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds); + return 0; +} + +static void aeApiDelEvent(EventLoop *eventLoop, int fd, int mask) { + aeApiState *state = eventLoop->apidata; + + if (mask & AE_READABLE) FD_CLR(fd,&state->rfds); + if (mask & AE_WRITABLE) FD_CLR(fd,&state->wfds); +} + +static int aeApiPoll(EventLoop *eventLoop, struct timeval *tvp) { + aeApiState *state = eventLoop->apidata; + int retval, j, numevents = 0; + + memcpy(&state->_rfds,&state->rfds,sizeof(fd_set)); + memcpy(&state->_wfds,&state->wfds,sizeof(fd_set)); + + retval = select(eventLoop->maxfd+1, + &state->_rfds,&state->_wfds,NULL,tvp); + if (retval > 0) { + for (j = 0; j <= eventLoop->maxfd; j++) { + int mask = 0; + aeFileEvent *fe = &eventLoop->events[j]; + + if (fe->mask == AE_NONE) continue; + if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds)) + mask |= AE_READABLE; + if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds)) + mask |= AE_WRITABLE; + eventLoop->fired[numevents].fd = j; + eventLoop->fired[numevents].mask = mask; + numevents++; + } + } + return numevents; +} + +static char *aeApiName(void) { + return "select"; +} diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.c new file mode 100644 index 0000000000000000000000000000000000000000..da998c2b0a0f164350186c74e1bed80f95f5d633 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.c @@ -0,0 +1,128 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cdb_bgtask.h" +#include <stdlib.h> +#ifndef _WIN32 +#include <sys/signal.h> +#else +#include <signal.h> +#endif + + +/* where thread begins */ +static void *_cdb_bgtask_func(void *arg); + + +CDBBGTASK *cdb_bgtask_new() +{ + CDBBGTASK *bt = (CDBBGTASK *)malloc(sizeof(CDBBGTASK)); + + bt->tnum = 0; + bt->run = 0; + bt->tid = 0; + pthread_cond_init(&bt->scond, NULL); + pthread_mutex_init(&bt->smutex, NULL); + return bt; +} + + +/* add a task into task list, must called before the thread run */ +int cdb_bgtask_add(CDBBGTASK *bt, TASKFUNC func, void *arg, int intval) +{ + TASK *task = &bt->tasks[bt->tnum]; + + if (bt->tid || bt->tnum > MAXTASKNUM) + return -1; + + task->arg = arg; + task->func = func; + task->intval = intval; + task->ltime = time(NULL); + bt->tnum++; + return 0; +} + + +static void *_cdb_bgtask_func(void *arg) +{ + CDBBGTASK *bt = (CDBBGTASK *)arg; +#ifndef _WIN32 + /* block all signals coming into current thread */ + sigset_t smask; + sigfillset(&smask); + pthread_sigmask(SIG_BLOCK, &smask, NULL); +#endif + /* loop */ + while(bt->run) { + time_t now = time(NULL); + struct timespec timeout; + + /* check should run some tasks every 1 second */ + timeout.tv_sec = now + 1; + timeout.tv_nsec = 0; + + /* iterate and run the tasks */ + for(int i = 0; i < bt->tnum; i++) { + TASK *task = &bt->tasks[i]; + if (now >= task->ltime + task->intval) { + task->func(task->arg); + task->ltime = now; + } + } + pthread_cond_timedwait(&bt->scond, &bt->smutex, &timeout); + } + + return NULL; +} + + +/* create a thread for tasks */ +void cdb_bgtask_start(CDBBGTASK *bt) +{ + if (bt->run) + return; + + bt->run = 1; + pthread_create(&bt->tid, NULL, _cdb_bgtask_func, bt); + return; +} + + +/* wait for the task thread exits */ +void cdb_bgtask_stop(CDBBGTASK *bt) +{ + if (bt->run) { + void **ret = NULL; + bt->run = 0; + pthread_cond_signal(&bt->scond); + pthread_join(bt->tid, ret); + } + + bt->tnum = 0; +} + + +void cdb_bgtask_destroy(CDBBGTASK *bt) +{ + cdb_bgtask_stop(bt); + pthread_cond_destroy(&bt->scond); + pthread_mutex_destroy(&bt->smutex); + free(bt); +} + + + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.h new file mode 100644 index 0000000000000000000000000000000000000000..6dee1b992d21bac8a496e7f8f89431c570b9358a --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bgtask.h @@ -0,0 +1,62 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_BGTASK_H_ +#define _CDB_BGTASK_H_ +#include <time.h> +#include <pthread.h> + + +/* 16 tasks at most in a task thread */ +#define MAXTASKNUM 16 + +typedef void (*TASKFUNC)(void *); + +/* struct for timer task */ +typedef struct { + /* task function */ + TASKFUNC func; + /* task argument */ + void *arg; + /* task run interval(seconds) */ + int intval; + /* time of last run */ + time_t ltime; +} TASK; + +/* struct for a background task manager */ +typedef struct CDBBGTASK +{ + TASK tasks[MAXTASKNUM]; + /* number of tasks */ + int tnum; + /* is running? */ + int run; + pthread_t tid; + /* for wait the thread exit */ + pthread_mutex_t smutex; + pthread_cond_t scond; +} CDBBGTASK; + + + +CDBBGTASK *cdb_bgtask_new(); +int cdb_bgtask_add(CDBBGTASK *task, TASKFUNC func, void *arg, int intval); +void cdb_bgtask_start(CDBBGTASK *bt); +void cdb_bgtask_stop(CDBBGTASK *task); +void cdb_bgtask_destroy(CDBBGTASK *task); + + +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.c new file mode 100644 index 0000000000000000000000000000000000000000..ebf5e2d3e8b508a20985b76bd6a1e974a89c9daf --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.c @@ -0,0 +1,158 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cdb_bloomfilter.h" +#include <stdlib.h> +#include <string.h> + +#define CDBBFHASHNUM 16 +#define CDBBFSPLITPOW 6 + +static uint64_t BFSEEDS[CDBBFHASHNUM] = {217636919,290182597,386910137,515880193, + 687840301,917120411,1222827239,1610612741, + 3300450239,3300450259,3300450281,3300450289, + 3221225473ul,4294967291ul,163227661,122420729,}; + +struct CDBBLOOMFILTER +{ + uint8_t *bitmap[1<<CDBBFSPLITPOW]; + uint64_t rnum; + uint64_t size; + int hnum; + int ratio; +}; + + +CDBBLOOMFILTER *cdb_bf_new(uint64_t rnum, uint64_t size) +{ + CDBBLOOMFILTER *bf = (CDBBLOOMFILTER *)malloc(sizeof(CDBBLOOMFILTER)); + bf->rnum = 0; + bf->size = size; + /* number of hash should be 0.7 * ratio */ + bf->hnum = size * 8 * 7 / (rnum * 10); + /* number of hash is limit in [1, 16] */ + if (bf->hnum > CDBBFHASHNUM) + bf->hnum = CDBBFHASHNUM; + if (bf->hnum == 0) + bf->hnum = 1; + /* avoid malloc too much memory once */ + for(int i = 0; i < (1 << CDBBFSPLITPOW); i++) { + bf->bitmap[i] = (uint8_t*)malloc(size >> CDBBFSPLITPOW); + memset(bf->bitmap[i], 0, size >> CDBBFSPLITPOW); + } + return bf; +} + + +void cdb_bf_set(CDBBLOOMFILTER *bf, void *key, int ksize) +{ + uint8_t *src = (uint8_t *)key, *end = src + ksize; + uint64_t hval[CDBBFHASHNUM] = {0}; + + for(;src < end; src++) + for(int i = 0; i < bf->hnum; i++) + hval[i] = hval[i] * BFSEEDS[i] + *src; + + for(int i = 0; i < bf->hnum; i++) { + uint64_t p = (hval[i] >> CDBBFSPLITPOW) % ((bf->size >> CDBBFSPLITPOW) << 3); + uint8_t *bitmap = bf->bitmap[hval[i] & ((1<<CDBBFSPLITPOW) - 1)]; + bitmap[p >> 3] |= (1 << (p & 0x07)); + } + + bf->rnum++; +} + + +bool cdb_bf_exist(CDBBLOOMFILTER *bf, void *key, int ksize) +{ + uint8_t *src = (uint8_t *)key, *end = src + ksize; + uint64_t hval[CDBBFHASHNUM] = {0}; + int exist = 0; + + for(;src < end; src++) + for(int i = 0; i < bf->hnum; i++) + hval[i] = hval[i] * BFSEEDS[i] + *src; + + for(int i = 0; i < bf->hnum; i++) { + uint64_t p = (hval[i] >> CDBBFSPLITPOW) % ((bf->size >> CDBBFSPLITPOW) << 3); + uint8_t *bitmap = bf->bitmap[hval[i] & ((1<<CDBBFSPLITPOW) - 1)]; + if (bitmap[p >> 3] & (1 << (p & 0x07))) + exist++; + else + break; + } + + return (exist == bf->hnum); +} + +void cdb_bf_clean(CDBBLOOMFILTER *bf) +{ + for(int i = 0; i < (1 << CDBBFSPLITPOW); i++) + memset(bf->bitmap[i], 0, bf->size >> CDBBFSPLITPOW); + + bf->rnum = 0; +} + + +void cdb_bf_destroy(CDBBLOOMFILTER *bf) +{ + for(int i = 0; i < (1 << CDBBFSPLITPOW); i++) + free(bf->bitmap[i]); + free(bf); +} + + +#ifdef _UT_CDBBF_ +#include <stdio.h> +#include <stdlib.h> +#include "cdb_bloomfilter.h" + +int main(int argc, char *argv[]) +{ + int size = 1048576; + int rnum = 1048576; + if (argc > 1) + rnum = atoi(argv[1]); + if (argc > 2) + size = atoi(argv[2]); + + CDBBLOOMFILTER *bf = cdb_bf_new(rnum, size); + for(int i = 0; i < rnum; i++) { + int j = 2 * i; + cdb_bf_set(bf, &j, 4); + } + + int exist = 0; + for(int i = 0; i < rnum; i++) { + int j = 2 * i; + if (cdb_bf_exist(bf, &j, 4)) + exist++; + } + printf("right positive: %.2f%%%%\n", (float)exist/(float)rnum*10000); + + exist = 0; + for(int i = 0; i < rnum * 2; i++) { + int j = 2 * i + 1; + if (cdb_bf_exist(bf, &j, 4)) + exist++; + } + + printf("false positive: %.2f%%%% %d/%d\n", (float)exist/(float)rnum*5000, exist, rnum * 2); + printf("element num: %d\n", bf->rnum); + cdb_bf_destroy(bf); + return 0; +} +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.h new file mode 100644 index 0000000000000000000000000000000000000000..6ccdab1fccc1d92d843f072046550741a5cdaf37 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_bloomfilter.h @@ -0,0 +1,34 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +/* +Bloom Filter is currently not used in cuttdb +*/ +#ifndef _CDB_BLOOMFILTER_H_ +#define _CDB_BLOOMFILTER_H_ +#include <stdbool.h> +#include <stdint.h> + +typedef struct CDBBLOOMFILTER CDBBLOOMFILTER; + +#define CDBBFRATIO 8 + +CDBBLOOMFILTER *cdb_bf_new(uint64_t rnum, uint64_t size); +void cdb_bf_set(CDBBLOOMFILTER *bf, void *key, int ksize); +bool cdb_bf_exist(CDBBLOOMFILTER *bf, void *key, int ksize); +void cdb_bf_clean(CDBBLOOMFILTER *bf); +void cdb_bf_destroy(CDBBLOOMFILTER *bf); + +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_builddb.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_builddb.c new file mode 100644 index 0000000000000000000000000000000000000000..fc5f18dc68c9dbd78de89441d88ada791a8b97a5 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_builddb.c @@ -0,0 +1,72 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + +#include "cuttdb.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <time.h> + +int main(int argc, char *argv[]) +{ + CDB *db = cdb_new(); + if (argc < 2) { + fprintf(stderr, "Usage: %s db_path [hsize = 2000000]\n", argv[0]); + return 0; + } + + /* 1TB memory limit(unlimited) */ + cdb_option(db, argc >= 3? atoi(argv[2]):2000000 , 0, 1048576); + cdb_seterrcb(db, cdb_deferrorcb, NULL); + if (cdb_open(db, argv[1], CDB_CREAT | CDB_PAGEWARMUP) < 0) { + return -1; + } + char *buf = NULL; + long count = 0; + + size_t size, size2; + while((size = getline(&buf, &size2, stdin)) != -1) { + /* remove the delimiter*/ + buf[--size] = '\0'; + int klen = -1; + int vlen = -1; + uint32_t expire = 0; + int parsenum = 0; + for(int i = 0; i < size; i++) { + if (buf[i] == '\t') { + if (klen == -1) + klen = i; + else { + vlen = i - klen - 1; + parsenum = 1; + } + } else if (buf[i] >= '0' && buf[i] <= '9' && parsenum) { + expire = expire * 10 + buf[i] - '0'; + } + } + + if (klen > 0 && vlen > 0) { + cdb_set2(db, buf, klen, buf + klen + 1, vlen, + CDB_OVERWRITE, expire > 0? expire - time(NULL): 0); + count++; + } + free(buf); + buf = NULL; + } + cdb_destroy(db); + fprintf(stderr, "imported %ld records\n", count); + return 0; +} + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.c new file mode 100644 index 0000000000000000000000000000000000000000..db2320d3a73e3c92d7d3dc6f2b752d1e5aa2cb5f --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.c @@ -0,0 +1,1452 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cuttdb.h" +#include "cdb_crc64.h" +#include "cdb_types.h" +#include "cdb_hashtable.h" +#include "cdb_bloomfilter.h" +#include "cdb_lock.h" +#include "cdb_bgtask.h" +#include "cdb_errno.h" +#include "cdb_vio.h" +#include "cdb_core.h" +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <pthread.h> + +static void _cdb_pageout(CDB *db); +static void _cdb_defparam(CDB *db); +static void _cdb_recout(CDB *db); +static uint32_t _pagehash(const void *key, int len); +static void _cdb_flushdpagetask(void *arg); +static void _cdb_timerreset(struct timespec *ts); +static uint32_t _cdb_timermicrosec(struct timespec *ts); +static void _cdb_pagewarmup(CDB *db, bool loadbf); + + +/* it isn't necessary to rehash bid in hash table cache */ +static uint32_t _pagehash(const void *key, int len) +{ + (void) len; + return *(uint32_t*)key; +} + + +/* used to get the duration of a procedure */ +static void _cdb_timerreset(struct timespec *ts) +{ + clock_gettime(CLOCK_MONOTONIC, ts); +} + + +static uint32_t _cdb_timermicrosec(struct timespec *ts) +{ + struct timespec ts2; + uint32_t diff; + clock_gettime(CLOCK_MONOTONIC, &ts2); + diff = (ts2.tv_sec - ts->tv_sec) * 1000000; + diff += ts2.tv_nsec / 1000; + diff -= ts->tv_nsec / 1000; + return diff; +} + + +/* reset the parameters */ +static void _cdb_defparam(CDB *db) +{ + db->rnum = 0; + db->bfsize = 0; + db->rclimit = 128 * MB; + db->pclimit = 1024 * MB; + db->hsize = 1000000; + db->rcache = db->pcache = db->dpcache = NULL; + db->bf = NULL; + db->opened = false; + db->vio = NULL; + db->mtable = NULL; + db->oid = 0; + db->roid = 0; + db->errcbarg = NULL; + db->errcb = NULL; + db->areadsize = 4 * KB; + return; +} + + +/* flush all dirty pages */ +void cdb_flushalldpage(CDB *db) +{ + if (db->dpcache) { + while (db->dpcache->num) { + CDBHTITEM *item = cdb_ht_poptail(db->dpcache); + uint32_t bid = *(uint32_t*)cdb_ht_itemkey(db->dpcache, item); + FOFF off; + db->vio->wpage(db->vio, (CDBPAGE*)cdb_ht_itemval(db->dpcache, item), &off); + db->mtable[bid] = off; + free(item); + } + + db->roid = db->oid; + db->vio->cleanpoint(db->vio); + } +} + + +/* flush oldest dirty index page to disk, it runs in another thread and triggered by timer */ +static void _cdb_flushdpagetask(void *arg) +{ + CDB *db = (CDB *)arg; + CDBHTITEM *item; + CDBPAGE *page; + time_t now = time(NULL); + bool cleandcache = false; + uint32_t bid; + + if (!db->dpcache) + /* no dirty page cache */ + return; + + /* if there isn't too much dirty page and some time passed since last clean, + write out all dirty pages to make a recovery point(oid) */ + if (db->dpcache->num < 1024 && now > db->ndpltime + 120) + cleandcache = true; + + while(db->dpcache->num) { + FOFF off; + cdb_lock_lock(db->dpclock); + item = cdb_ht_gettail(db->dpcache); + /* no item in dpcache after lock */ + if (item == NULL) { + cdb_lock_unlock(db->dpclock); + return; + } + page = (CDBPAGE *)cdb_ht_itemval(db->dpcache, item); + /* bid = page->bid; also OK */ + bid = *(uint32_t*)cdb_ht_itemkey(db->dpcache, item); + /* been dirty for too long? */ + if (now > page->mtime + DPAGETIMEOUT || cleandcache) { + if (cdb_lock_trylock(db->mlock[page->bid % MLOCKNUM])) { + /* avoid dead lock, since dpclock is holding */ + cdb_lock_unlock(db->dpclock); + return; + } + /* remove it from dpcache */ + cdb_ht_poptail(db->dpcache); + cdb_lock_unlock(db->dpclock); + + /* write to disk */ + struct timespec ts; + _cdb_timerreset(&ts); + db->vio->wpage(db->vio, page, &off); + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + db->mtable[bid] = off; + + /* move the clean page into pcache */ + cdb_lock_lock(db->pclock); + cdb_ht_insert(db->pcache, item); + cdb_lock_unlock(db->pclock); + cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + } else { + /* tail in dpcache isn't expired */ + cdb_lock_unlock(db->dpclock); + return; + } + } + + if (db->dpcache->num == 0 && cleandcache) + db->ndpltime = now; + + if (cleandcache) { + /* clean succeed if goes here, remember the recovery point */ + /* it's not necessary to lock */ + db->roid = db->oid; + db->vio->cleanpoint(db->vio); + } +} + + +/* fill the index page cache, and set the bloomfilter if necessary */ +static void _cdb_pagewarmup(CDB *db, bool loadbf) +{ + char sbuf[SBUFSIZE]; + void *it = db->vio->pageitfirst(db->vio, 0); + + if (it == NULL) + return; + + for(;;) { + CDBPAGE *page = (CDBPAGE *)sbuf; + if (db->vio->pageitnext(db->vio, &page, it) < 0) + break; + + /* the page is the newest one because its offset matches the one in main table */ + if (OFFEQ(page->ooff, db->mtable[page->bid])) { + if (loadbf) { + /* iterate key hashes in page, set to the filter */ + cdb_lock_lock(db->bflock); + for(uint32_t i = 0; i < page->num; i++) { + uint64_t hash = (page->bid << 24) | (page->items[i].hash.i2 << 8) + | (page->items[i].hash.i1); + /* bloom filter use the combined record hash as key */ + cdb_bf_set(db->bf, &hash, SI8); + } + cdb_lock_unlock(db->bflock); + } + + /* set the page to pcache if it doesn't exceed the limit size */ + if (db->pcache && db->pcache->size < db->pclimit) { + cdb_lock_lock(db->pclock); + cdb_ht_insert2(db->pcache, &page->bid, SI4, page, MPAGESIZE(page)); + cdb_lock_unlock(db->pclock); + } + } + /* the page may not be still in stack */ + if (page != (CDBPAGE *)sbuf) + free(page); + + if (!loadbf && (db->pcache && db->pcache->size > db->pclimit)) + break; + } + + db->vio->pageitdestroy(db->vio, it); +} + + +/* generate an incremental global operation id */ +uint64_t cdb_genoid(CDB *db) +{ + uint64_t oid; + cdb_lock_lock(db->oidlock); + oid = db->oid++; + cdb_lock_unlock(db->oidlock); + return oid; +} + + +/* get a new record iterator */ +void *cdb_iterate_new(CDB *db, uint64_t oid) +{ + return db->vio->recitfirst(db->vio, oid); +} + + + +/* iterate the database by callback */ +uint64_t cdb_iterate(CDB *db, CDB_ITERCALLBACK itcb, void *arg, void *iter) +{ + char sbuf[SBUFSIZE]; + uint64_t cnt = 0; + + if (iter == NULL) + return cnt; + for(;;) { + /* the rec is a copy from file, may in stack or allocated in heap */ + CDBREC *rec = (CDBREC *)sbuf; + bool ret = true; + if (db->vio->recitnext(db->vio, &rec, iter) < 0) + break; + + if (cdb_checkoff(db, CDBHASH64(rec->key, rec->ksize), rec->ooff, CDB_NOTLOCKED)) { + ret = itcb(arg, rec->key, rec->ksize, rec->val, rec->vsize, rec->expire, rec->oid); + cnt++; + } + if (rec != (CDBREC *)sbuf) + free(rec); + if (!ret) + break; + } + return cnt; +} + + + +/* destroy the iterator */ +void cdb_iterate_destroy(CDB *db, void *iter) +{ + db->vio->recitdestroy(db->vio, iter); +} + + +/* difficult to implement */ +/* +static void _cdb_rcachewarmup(CDB *db) +{ +} +*/ + + +CDB *cdb_new() +{ + CDB *db; + db = (CDB *)malloc(sizeof(CDB)); + /* I assume all operation in this layer is 'fast', so no mutex used here */ + for(int i = 0; i < MLOCKNUM; i++) + db->mlock[i] = cdb_lock_new(CDB_LOCKSPIN); + db->dpclock = cdb_lock_new(CDB_LOCKSPIN); + db->pclock = cdb_lock_new(CDB_LOCKSPIN); + db->rclock = cdb_lock_new(CDB_LOCKSPIN); + db->stlock = cdb_lock_new(CDB_LOCKSPIN); + db->oidlock = cdb_lock_new(CDB_LOCKSPIN); + db->bflock = cdb_lock_new(CDB_LOCKSPIN); + db->bgtask = cdb_bgtask_new(); + /* every thread should has its own errno */ + db->errkey = (pthread_key_t *)malloc(sizeof(pthread_key_t)); + pthread_key_create(db->errkey, NULL); + /* set default parameter */ + _cdb_defparam(db); + return db; +} + + +int cdb_option(CDB *db, int bnum, int rcacheMB, int pcacheMB) +{ + /* too small bnum is not allowed */ + db->hsize = bnum > 4096? bnum : 4096; + + if (rcacheMB >= 0) + db->rclimit = (uint64_t)rcacheMB * MB; + if (pcacheMB >= 0) + db->pclimit = (uint64_t)pcacheMB * MB; + return 0; +} + + +void cdb_option_bloomfilter(CDB *db, uint64_t size) +{ + db->bfsize = size; +} + +void cdb_option_areadsize(CDB *db, uint32_t size) +{ + db->areadsize = size; + if (db->areadsize < 1 * KB) + db->areadsize = 1 * KB; + + if (db->areadsize > SBUFSIZE - (sizeof(CDBREC) - RECHSIZE)) + db->areadsize = SBUFSIZE - (sizeof(CDBREC) - RECHSIZE); +} + +int cdb_open(CDB *db, const char *file_name, int mode) +{ + /* if will become into a hash table when file_name == CDB_MEMDB */ + int memdb = (strcmp(file_name, CDB_MEMDB) == 0); + + if (db->rclimit) + /* record cache is enabled */ + db->rcache = cdb_ht_new(true, NULL); + else if (memdb) { + /* record cache is disabled, but in MEMDB mode */ + cdb_seterrno(db, CDB_MEMDBNOCACHE, __FILE__, __LINE__); + goto ERRRET; + } + + if (db->pclimit && !memdb) { + /* page cache enabled. page cache is meaningless under MEMDB mode */ + db->dpcache = cdb_ht_new(true, _pagehash); + db->pcache = cdb_ht_new(true, _pagehash); + } + + + if (!memdb) { + if (db->bfsize) { + /* bloom filter enabled */ + db->bf = cdb_bf_new(db->bfsize, db->bfsize); + } + /* now only one storage format is supported */ + db->vio = cdb_vio_new(CDBVIOAPND2); + db->vio->db = db; + if (db->vio->open(db->vio, file_name, mode) < 0) + goto ERRRET; + if (db->vio->rhead(db->vio) < 0) { + db->mtable = (FOFF*)malloc(sizeof(FOFF) * db->hsize); + memset(db->mtable, 0, sizeof(FOFF) * db->hsize); + } + /* dirty index page would be swap to disk by timer control */ + cdb_bgtask_add(db->bgtask, _cdb_flushdpagetask, db, 1); + db->ndpltime = time(NULL); + /* start background task thread */ + cdb_bgtask_start(db->bgtask); + } else { + /* no persistent storage under MEMDB mode */ + db->vio = NULL; + db->bgtask = NULL; + db->mtable = NULL; + } + + if (db->bf || ((mode & CDB_PAGEWARMUP) && db->pcache)) { + /* fill the bloom filter if it is enabled, and fill the page cache */ + _cdb_pagewarmup(db, !!db->bf); + } + + /* reset the statistic info */ + cdb_stat(db, NULL); + db->opened = true; + return 0; + +ERRRET: + if (db->rcache) + cdb_ht_destroy(db->rcache); + if (db->pcache) + cdb_ht_destroy(db->pcache); + if (db->dpcache) + cdb_ht_destroy(db->dpcache); + if (db->bf) + cdb_bf_destroy(db->bf); + cdb_bgtask_stop(db->bgtask); + _cdb_defparam(db); + return -1; +} + + +/* check if the page cache size exceed the limit. clean oldest page if necessary */ +static void _cdb_pageout(CDB *db) +{ + while (PCOVERFLOW(db)) { + if (db->pcache->num) { + /* clean page cache is prior */ + cdb_lock_lock(db->pclock); + cdb_ht_removetail(db->pcache); + cdb_lock_unlock(db->pclock); + } else if (db->dpcache->num) { + CDBHTITEM *item; + uint32_t bid; + FOFF off; + cdb_lock_lock(db->dpclock); + item = cdb_ht_gettail(db->dpcache); + if (item == NULL) { + cdb_lock_unlock(db->dpclock); + break; + } + + bid = *(uint32_t*)cdb_ht_itemkey(db->dpcache, item); + /* must lock the main table inside the dpclock protection */ + if (cdb_lock_trylock(db->mlock[bid % MLOCKNUM]) < 0) { + /* avoid dead lock since dpclock is holding */ + cdb_lock_unlock(db->dpclock); + /* do nothing this time */ + break; + } + cdb_ht_poptail(db->dpcache); + cdb_lock_unlock(db->dpclock); + + /* write out dirty page */ + struct timespec ts; + _cdb_timerreset(&ts); + db->vio->wpage(db->vio, (CDBPAGE*)cdb_ht_itemval(db->dpcache, item), &off); + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + db->mtable[bid] = off; + cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + free(item); + } + } +} + + +/* check if the record cache size exceed the limit. clean oldest record if necessary */ +static void _cdb_recout(CDB *db) +{ + while (RCOVERFLOW(db)) { + cdb_lock_lock(db->rclock); + if (db->rcache->num) + cdb_ht_removetail(db->rcache); + cdb_lock_unlock(db->rclock); + } +} + + +/* get all offsets from index(page) by key, even if only one of them at most is valid. + Others are due to the hash collision */ +int cdb_getoff(CDB *db, uint64_t hash, FOFF **offs, int locked) +{ + char sbuf[SBUFSIZE]; + CDBPAGE *page = NULL; + int rnum; + bool incache = true; + uint32_t bid = (hash >> 24) % db->hsize; + PHASH phash; + + phash.i1 = hash & 0xff; + phash.i2 = (hash >> 8) & 0xffff; + + if (db->bf) { + uint64_t bfkey = (bid << 24) | (hash & 0xffffff); + /* check the key-hash in bloom filter? return now if not exist */ + cdb_lock_lock(db->bflock); + if (!cdb_bf_exist(db->bf, &bfkey, SI8)) { + cdb_lock_unlock(db->bflock); + return 0; + } + cdb_lock_unlock(db->bflock); + } + + if (locked == CDB_NOTLOCKED) cdb_lock_lock(db->mlock[bid % MLOCKNUM]); + /* page exists in clean page cache? */ + if (db->pcache) { + cdb_lock_lock(db->pclock); + page = cdb_ht_get2(db->pcache, &bid, SI4, true); + cdb_lock_unlock(db->pclock); + } + + /* not in pcache, exists in dirty page cache? */ + if (page == NULL && db->dpcache) { + cdb_lock_lock(db->dpclock); + page = cdb_ht_get2(db->dpcache, &bid, SI4, true); + cdb_lock_unlock(db->dpclock); + } + + if (page == NULL) { + /* not in dpcache either, read from disk */ + incache = false; + db->pcmiss++; + /* page stays in stack by default */ + page = (CDBPAGE *)sbuf; + if (OFFNOTNULL(db->mtable[bid])) { + /* page offset not null in main table */ + int ret; + struct timespec ts; + _cdb_timerreset(&ts); + ret = db->vio->rpage(db->vio, &page, db->mtable[bid]); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + /* read page error, return */ + if (ret < 0) { + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + if (page != (CDBPAGE *)sbuf) + free(page); + return -1; + } + } else { + /* no page in this bucket */ + page->cap = page->num = 0; + page->osize = 0; + OFFZERO(page->ooff); + } + } else { + db->pchit++; + } + + rnum = 0; + for(uint32_t i = 0; i < page->num; i++) { + /* compare every hash in the page */ + if (PHASHEQ(page->items[i].hash, phash)) { + (*offs)[rnum] = page->items[i].off; + /* result offset list stays in stack by default. Allocate one in heap if + it exceeds the limit */ + if (++rnum == SFOFFNUM) { + /* very little possibility goes here */ + FOFF *tmp = (FOFF*)malloc((page->num - i + SFOFFNUM + 1) * sizeof(FOFF)); + memcpy(tmp, *offs, SFOFFNUM * sizeof(FOFF)); + *offs = tmp; + } + } + } + + if (!incache) { + /* set into clean page cache if not exists before */ + if (db->pcache) { + cdb_lock_lock(db->pclock); + cdb_ht_insert2(db->pcache, &bid, SI4, page, MPAGESIZE(page)); + cdb_lock_unlock(db->pclock); + } + /* if page now points to heap memory, free it */ + if (page != (CDBPAGE *)sbuf) { + free(page); + } + } + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + + /* check page cache overflow */ + if (PCOVERFLOW(db)) + _cdb_pageout(db); + + return rnum; +} + + +/* replace a specified record's offset, may be used at disk space recycling + off indicates its previous offset, noff is the new offset. return negative if not found */ +int cdb_replaceoff(CDB *db, uint64_t hash, FOFF off, FOFF noff, int locked) +{ + char sbuf[SBUFSIZE]; + CDBPAGE *page = NULL; + CDBHTITEM *pitem = NULL; + bool indpcache = false; + uint32_t bid = (hash >> 24) % db->hsize; + PHASH phash; + bool found = false; + + phash.i1 = hash & 0xff; + phash.i2 = (hash >> 8) & 0xffff; + + if (locked == CDB_NOTLOCKED) cdb_lock_lock(db->mlock[bid % MLOCKNUM]); + if (db->pcache) { + /* in clean page cache, since it would be modified, it should be deleted from pcache */ + cdb_lock_lock(db->pclock); + pitem = cdb_ht_del(db->pcache, &bid, SI4); + cdb_lock_unlock(db->pclock); + if (pitem) + page = (CDBPAGE *)cdb_ht_itemval(db->pcache, pitem); + } + if (page == NULL && db->dpcache) { + /* not in pcache, but in dirty page cache */ + cdb_lock_lock(db->dpclock); + page = cdb_ht_get2(db->dpcache, &bid, SI4, true); + cdb_lock_unlock(db->dpclock); + if (page) + indpcache = true; + } + if (page == NULL) { + /* not exists either, read from disk */ + db->pcmiss++; + page = (CDBPAGE *)sbuf; + if (OFFNOTNULL(db->mtable[bid])) { + int ret; + struct timespec ts; + _cdb_timerreset(&ts); + ret = db->vio->rpage(db->vio, &page, db->mtable[bid]); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + if (ret < 0) { + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + if (page != (CDBPAGE *)sbuf) + free(page); + return -1; + } + } else { + /* nullified the empty page */ + page->cap = page->num = 0; + page->osize = 0; + OFFZERO(page->ooff); + } + } else { + db->pchit++; + } + + /* check and modify */ + for(uint32_t i = 0; i < page->num; i++) { + if (PHASHEQ(page->items[i].hash, phash) + && OFFEQ(page->items[i].off, off)) { + page->items[i].off = noff; + found = true; + break; + } + } + + if (db->dpcache && !indpcache) { + /* if page already dirty in cache, need not do anything */ + /* dirty page cache is enabled but not exists before */ + if (pitem) { + /* pitem not NULL indicates it belongs to pcache */ + if (found) { + /* modified page */ + cdb_lock_lock(db->dpclock); + cdb_ht_insert(db->dpcache, pitem); + cdb_lock_unlock(db->dpclock); + } else { + /* got from pcache, but not modified */ + cdb_lock_lock(db->pclock); + cdb_ht_insert(db->pcache, pitem); + cdb_lock_unlock(db->pclock); + } + /* page belongs to memory in 'cache', must not free */ + } else if (page != NULL) { + /* page read from disk, but not in cache */ + cdb_lock_lock(db->dpclock); + cdb_ht_insert2(db->dpcache, &bid, SI4, page, MPAGESIZE(page)); + cdb_lock_unlock(db->dpclock); + /* the 'page' won't be use anymore */ + if (page != (CDBPAGE *)sbuf) + free(page); + } + } else if (!db->dpcache){ + /* no page cache. Write out dirty page immediately */ + FOFF poff; + struct timespec ts; + _cdb_timerreset(&ts); + db->vio->wpage(db->vio, page, &poff); + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + + db->mtable[bid] = poff; + if (page != (CDBPAGE *)sbuf) + free(page); + } + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + + /* check page cache overflow */ + if (PCOVERFLOW(db)) + _cdb_pageout(db); + + return 0; +} + + +/* insert/delete a key-offset pair from index page */ +int cdb_updatepage(CDB *db, uint64_t hash, FOFF off, int opt, int locked) +{ + char sbuf[SBUFSIZE], sbuf2[SBUFSIZE]; + CDBPAGE *page = NULL, *npage = NULL; + CDBHTITEM *pitem = NULL, *nitem = NULL; + CDBHASHTABLE *tmpcache = NULL; + CDBLOCK *tmpclock = NULL; + int npsize = 0; + uint32_t bid = (hash >> 24) % db->hsize; + PHASH phash; + + phash.i1 = hash & 0xff; + phash.i2 = (hash >> 8) & 0xffff; + + if (locked == CDB_NOTLOCKED) cdb_lock_lock(db->mlock[bid % MLOCKNUM]); + /* firstly, try move the page out of the cache if possible, + it assumes that the page would be modified(pair exists) */ + if (db->pcache) { + /* try clean page cache */ + cdb_lock_lock(db->pclock); + pitem = cdb_ht_del(db->pcache, &bid, SI4); + cdb_lock_unlock(db->pclock); + if (pitem) { + page = (CDBPAGE *)cdb_ht_itemval(db->pcache, pitem); + tmpcache = db->pcache; + tmpclock = db->pclock; + } + } + if (page == NULL && db->dpcache) { + /* try dirty page cache */ + cdb_lock_lock(db->dpclock); + pitem = cdb_ht_del(db->dpcache, &bid, SI4); + cdb_lock_unlock(db->dpclock); + if (pitem) { + page = (CDBPAGE *)cdb_ht_itemval(db->dpcache, pitem); + tmpcache = db->dpcache; + tmpclock = db->dpclock; + } + } + + if (page == NULL) { + db->pcmiss++; + page = (CDBPAGE *)sbuf; + /* doesn't exist in cache, read from disk */ + if (OFFNOTNULL(db->mtable[bid])) { + int ret; + struct timespec ts; + _cdb_timerreset(&ts); + ret = db->vio->rpage(db->vio, &page, db->mtable[bid]); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + if (ret < 0) { + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + if (page != (CDBPAGE *)sbuf) + free(page); + return -1; + } + } else { + page->cap = 0; + page->num = 0; + page->osize = 0; + OFFZERO(page->ooff); + } + } else { + db->pchit++; + } + + npsize = MPAGESIZE(page); + + if (opt == CDB_PAGEDELETEOFF) + ;// npsize = MPAGESIZE(page) - sizeof(PITEM); + /* do not malloc new page on deletion */ + + else if (opt == CDB_PAGEINSERTOFF && page->cap == page->num) { + /* get a new page, from dirty page cache if possible */ + npsize = MPAGESIZE(page) + CDB_PAGEINCR * sizeof(PITEM); + if (db->dpcache) { + nitem = cdb_ht_newitem(db->dpcache, SI4, npsize); + *(uint32_t*)cdb_ht_itemkey(db->dpcache, nitem) = bid; + npage = (CDBPAGE *)cdb_ht_itemval(db->dpcache, nitem); + } else { + /* no dpcache, use stack if size fits */ + if (npsize > SBUFSIZE) + npage = (CDBPAGE *)malloc(npsize); + else + npage = (CDBPAGE *)sbuf2; + } + + /* initialize the new page */ + + npage->bid = bid; + npage->oid = cdb_genoid(db); + npage->osize = page->osize; + npage->ooff = page->ooff; + npage->mtime = time(NULL); + npage->cap = page->cap + CDB_PAGEINCR; + npage->num = page->num; + memcpy(npage->items, page->items, page->num * sizeof(PITEM)); + /* old page got from cache */ + if (pitem) + free(pitem); + /* old page read from disk, if in stack? */ + else if (page != (CDBPAGE *)sbuf) + free(page); + + page = npage; + pitem = nitem; + } + + uint32_t onum = page->num; + + if (opt == CDB_PAGEDELETEOFF) { + bool found = false; + for(uint32_t i = 0; i < page->num; i++) { + if (!found) { + if (PHASHEQ(page->items[i].hash, phash) + && OFFEQ(page->items[i].off, off)) + { + found = true; + /* records num is consistant with index */ + cdb_lock_lock(db->stlock); + db->rnum--; + cdb_lock_unlock(db->stlock); + } + } + if (found && i + 1 < page->num) + page->items[i] = page->items[i+1]; + } + if (found) + page->num--; + } else if (opt == CDB_PAGEINSERTOFF) { + bool found = false; + /* check already exist? */ + for(uint32_t i = 0; i < page->num; i++) { + if (PHASHEQ(page->items[i].hash, phash) + && OFFEQ(page->items[i].off, off)) { + /* avoid exceptional deduplicated item */ + found = true; + break; + } + } + + /* append to the tail */ + if (!found) { + page->items[page->num].hash = phash; + page->items[page->num].off = off; + page->num++; + /* records num is consistant with index */ + cdb_lock_lock(db->stlock); + db->rnum++; + cdb_lock_unlock(db->stlock); + if (db->bf) { + uint64_t bfkey = (((hash >> 24) % db->hsize) << 24) | (hash & 0xffffff); + cdb_lock_lock(db->bflock); + cdb_bf_set(db->bf, &bfkey, SI8); + cdb_lock_unlock(db->bflock); + } + } + } + + if (page->num == onum) { + /* nothing done */ + if (pitem) { + /* insert the item back to the cache where it belongs */ + cdb_lock_lock(tmpclock); + cdb_ht_insert(tmpcache, pitem); + cdb_lock_unlock(tmpclock); + } else { + if (page != (CDBPAGE *)sbuf2 + && page != (CDBPAGE *)sbuf) + free(page); + } + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + return -1; + } else { + if (pitem) { + cdb_lock_lock(db->dpclock); + cdb_ht_insert(db->dpcache, pitem); + cdb_lock_unlock(db->dpclock); + } else { + struct timespec ts; + _cdb_timerreset(&ts); + db->vio->wpage(db->vio, page, &off); + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + + db->mtable[bid] = off; + if (page != (CDBPAGE *)sbuf2 + && page != (CDBPAGE *)sbuf) + free(page); + } + } + + if (locked == CDB_NOTLOCKED) cdb_lock_unlock(db->mlock[bid % MLOCKNUM]); + + /* check page cache overflow */ + if (PCOVERFLOW(db)) + _cdb_pageout(db); + + return 0; +} + + +/* check if an record with specified key-offset exists in index */ +bool cdb_checkoff(CDB *db, uint64_t hash, FOFF off, int locked) +{ + FOFF soffs[SFOFFNUM]; + FOFF *soff = (FOFF *)soffs; + int dupnum; + int ret = false; + + /* get all possible offsets */ + dupnum = cdb_getoff(db, hash, &soff, locked); + for(int i = 0; i < dupnum; i++) { + if (OFFEQ(soff[i], off)) { + ret = true; + break; + } + } + + if (soff != (FOFF *)soffs) { + free(soff); + } + + return ret; +} + + +/* wrapper and simplified of set operation */ +int cdb_set(CDB *db, const char *key, int ksize, const char *val, int vsize) +{ + return cdb_set2(db, key, ksize, val, vsize, CDB_OVERWRITE, 0); +} + + +int cdb_set2(CDB *db, const char *key, int ksize, const char *val, int vsize, int opt, int expire) +{ + CDBREC rec; + FOFF ooff, noff; + uint32_t now = time(NULL); + uint64_t hash; + uint32_t lockid; + bool expired = false; + + if (db->vio == NULL) { + /* if it is a memdb, just operate on the record cache and return */ + cdb_lock_lock(db->rclock); + cdb_ht_insert2(db->rcache, key, ksize, val, vsize); + cdb_lock_unlock(db->rclock); + if (RCOVERFLOW(db)) + _cdb_recout(db); + return 0; + } + + hash = CDBHASH64(key, ksize); + lockid = (hash >> 24) % db->hsize % MLOCKNUM; + OFFZERO(rec.ooff); + OFFZERO(ooff); + rec.osize = 0; + rec.key = (char*)key; + rec.val = (char*)val; + rec.ksize = ksize; + rec.vsize = vsize; + rec.oid = cdb_genoid(db); + rec.expire = expire? now + expire : 0; + + cdb_lock_lock(db->mlock[lockid]); + if (db->rcache) { + /* if record already exists, get its old meta info */ + int item_vsize; + char *cval; + uint32_t old_expire = 0; + cdb_lock_lock(db->rclock); + cval = cdb_ht_get(db->rcache, key, ksize, &item_vsize, false); + if (cval) { + /* record already exists */ + ooff = rec.ooff = *(FOFF*)cval; + rec.osize = item_vsize - SFOFF - SI4; + old_expire = *(uint32_t*)(cval + SFOFF); + } + cdb_lock_unlock(db->rclock); + if (old_expire && old_expire <= now) + /* once exist but expired? */ + expired = true; + } + + if (OFFNULL(ooff)) { + FOFF soffs[SFOFFNUM]; + FOFF *soff = soffs; + char sbuf[SBUFSIZE]; + CDBREC *rrec = (CDBREC*)sbuf; + + int retnum; + if ((retnum = cdb_getoff(db, hash, &soff, CDB_LOCKED)) < 0) { + cdb_lock_unlock(db->mlock[lockid]); + return -1; + } + + for(int i = 0; i < retnum; i++) { + /* check for duplicate records/older version*/ + int cret; + if (rrec != (CDBREC*)sbuf) { + free(rrec); + rrec = (CDBREC*)sbuf; + } + + struct timespec ts; + _cdb_timerreset(&ts); + cret = db->vio->rrec(db->vio, &rrec, soff[i], false); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + if (cret < 0) + continue; + + if (ksize == rrec->ksize && memcmp(rrec->key, key, ksize) == 0) { + /* got its old meta info */ + rec.osize = rrec->osize; + rec.ooff = rrec->ooff; + ooff = rec.ooff; + if (rrec->expire <= now) + expired = true; + break; + } + } + if (soff != soffs) + free(soff); + if (rrec != (CDBREC*)sbuf) + free(rrec); + } + + if (OFFNOTNULL(ooff) && !expired) { + /* record already exists*/ + if (opt & CDB_INSERTIFNOEXIST) { + cdb_lock_unlock(db->mlock[lockid]); + cdb_seterrno(db, CDB_EXIST, __FILE__, __LINE__); + return -2; + } + } else { + if (opt & CDB_INSERTIFEXIST) { + cdb_lock_unlock(db->mlock[lockid]); + cdb_seterrno(db, CDB_NOTFOUND, __FILE__, __LINE__); + return -3; + } + } + + struct timespec ts; + _cdb_timerreset(&ts); + if (db->vio->wrec(db->vio, &rec, &noff) < 0) { + cdb_lock_unlock(db->mlock[lockid]); + return -1; + } + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + + if (OFFNOTNULL(ooff)) { + cdb_replaceoff(db, hash, ooff, noff, CDB_LOCKED); + } else { + cdb_updatepage(db, hash, noff, CDB_PAGEINSERTOFF, CDB_LOCKED); + } + + if (db->rcache) { + if ((opt & CDB_INSERTCACHE) == CDB_INSERTCACHE) { + char *cval; + CDBHTITEM *item = cdb_ht_newitem(db->rcache, ksize, vsize + SI4 + SFOFF); + memcpy(cdb_ht_itemkey(db->rcache, item), key, ksize); + cval = cdb_ht_itemval(db->rcache, item); + memcpy(cval + SI4 + SFOFF, val, vsize); + *(FOFF*)(cval) = rec.ooff; + *(uint32_t*)(cval + SFOFF) = rec.expire; + cdb_lock_lock(db->rclock); + cdb_ht_insert(db->rcache, item); + cdb_lock_unlock(db->rclock); + } + } + cdb_lock_unlock(db->mlock[lockid]); + + if (RCOVERFLOW(db)) + _cdb_recout(db); + + cdb_seterrno(db, CDB_SUCCESS, __FILE__, __LINE__); + return 0; +} + + +int cdb_is(CDB *db, const char *key, int ksize) +{ + FOFF soffs[SFOFFNUM]; + FOFF *offs; + int dupnum, ret = -3; + uint64_t hash; + uint32_t now = time(NULL); + uint32_t lockid; + + if (db->rcache) { + char *cval; + cdb_lock_lock(db->rclock); + cval = cdb_ht_get(db->rcache, key, ksize, 0, true); + if (cval) { + db->rchit++; + cdb_lock_unlock(db->rclock); + return 0; + } else { + db->rcmiss++; + if (db->vio == NULL) { + cdb_lock_unlock(db->rclock); + return -3; + } + } + cdb_lock_unlock(db->rclock); + } + + offs = soffs; + hash = CDBHASH64(key, ksize); + lockid = (hash >> 24) % db->hsize % MLOCKNUM; + cdb_lock_lock(db->mlock[lockid]); + dupnum = cdb_getoff(db, hash, &offs, CDB_LOCKED); + if (dupnum <= 0) { + cdb_lock_unlock(db->mlock[lockid]); + return -1; + } + else + ret = 0; + cdb_lock_unlock(db->mlock[lockid]); + + if (RCOVERFLOW(db)) + _cdb_recout(db); + + if (offs != soffs) + free(offs); + + if (ret < 0) + cdb_seterrno(db, CDB_NOTFOUND, __FILE__, __LINE__); + else { + db->rcmiss++; + cdb_seterrno(db, CDB_SUCCESS, __FILE__, __LINE__); + } + return ret; +} + + +int cdb_get(CDB *db, const char *key, int ksize, void **val, int *vsize) +{ + char sbuf[SBUFSIZE]; + CDBREC *rec = (CDBREC *)sbuf; + FOFF soffs[SFOFFNUM]; + FOFF *offs; + int dupnum, ret = -3; + uint64_t hash; + uint32_t now = time(NULL); + uint32_t lockid; + + *vsize = 0; + *val = NULL; + if (db->rcache) { + char *cval; + cdb_lock_lock(db->rclock); + cval = cdb_ht_get(db->rcache, key, ksize, vsize, true); + if (cval) { + db->rchit++; + if (db->vio) { + (*vsize) -= SI4 + SFOFF; + if (*(uint32_t*)(cval + SFOFF) + && *(uint32_t*)(cval + SFOFF) <= now) { + cdb_lock_unlock(db->rclock); + /* not found no not report error now */ + //cdb_seterrno(db, CDB_NOTFOUND, __FILE__, __LINE__); + return -3; + } + cval = (void*)(cval + SI4 + SFOFF); + } + *val = malloc(*vsize); + memcpy(*val, cval, *vsize); + cdb_lock_unlock(db->rclock); + return 0; + } else { + db->rcmiss++; + if (db->vio == NULL) { + cdb_lock_unlock(db->rclock); + return -3; + } + } + cdb_lock_unlock(db->rclock); + } + + offs = soffs; + hash = CDBHASH64(key, ksize); + lockid = (hash >> 24) % db->hsize % MLOCKNUM; + cdb_lock_lock(db->mlock[lockid]); + dupnum = cdb_getoff(db, hash, &offs, CDB_LOCKED); + if (dupnum < 0) { + cdb_lock_unlock(db->mlock[lockid]); + return -1; + } + + for(int i = 0; i < dupnum; i++) { + int cret; + if (rec != (CDBREC*)sbuf) { + free(rec); + rec = (CDBREC*)sbuf; + } + + struct timespec ts; + _cdb_timerreset(&ts); + cret = db->vio->rrec(db->vio, &rec, offs[i], true); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + if (cret < 0) + continue; + + if (ksize == rec->ksize && memcmp(rec->key, key, ksize) == 0) { + if (rec->expire && rec->expire <= now) { + break; + } + *vsize = rec->vsize; + *val = malloc(*vsize); + memcpy(*val, rec->val, *vsize); + ret = 0; + break; + } + } + + if (ret == 0 && db->rcache) { + char *cval; + CDBHTITEM *item = cdb_ht_newitem(db->rcache, ksize, *vsize + SI4 + SFOFF); + memcpy(cdb_ht_itemkey(db->rcache, item), key, ksize); + cval = cdb_ht_itemval(db->rcache, item); + memcpy(cval + SI4 + SFOFF, *val, *vsize); + *(FOFF*)(cval) = rec->ooff; + *(uint32_t*)(cval + SFOFF) = rec->expire; + cdb_lock_lock(db->rclock); + cdb_ht_insert(db->rcache, item); + cdb_lock_unlock(db->rclock); + } + cdb_lock_unlock(db->mlock[lockid]); + + if (RCOVERFLOW(db)) + _cdb_recout(db); + + if (offs != soffs) + free(offs); + + if (rec != (CDBREC*)sbuf) + free(rec); + + if (ret < 0) + cdb_seterrno(db, CDB_NOTFOUND, __FILE__, __LINE__); + else { + db->rcmiss++; + cdb_seterrno(db, CDB_SUCCESS, __FILE__, __LINE__); + } + return ret; +} + + +void cdb_free_val(void **val) +{ + if (*val) + free(*val); + *val = NULL; +} + + +int cdb_del(CDB *db, const char *key, int ksize) +{ + FOFF ooff; + CDBREC rec; + uint32_t lockid; + uint64_t hash; + + OFFZERO(rec.ooff); + OFFZERO(ooff); + rec.osize = 0; + rec.key = (char*)key; + rec.ksize = ksize; + rec.val = NULL; + rec.vsize = 0; + + if (db->vio == NULL) { + /* if it is a memdb, just operate on the record cache and return */ + cdb_lock_lock(db->rclock); + cdb_ht_del2(db->rcache, key, ksize); + cdb_lock_unlock(db->rclock); + if (RCOVERFLOW(db)) + _cdb_recout(db); + return 0; + } + + hash = CDBHASH64(key, ksize); + lockid = (hash >> 24) % db->hsize % MLOCKNUM; + cdb_lock_lock(db->mlock[lockid]); + if (db->rcache) { + /* if record already exists, get its old meta info */ + CDBHTITEM *item; + cdb_lock_lock(db->rclock); + item = cdb_ht_del(db->rcache, key, ksize); + cdb_lock_unlock(db->rclock); + if (item) { + char *cval = cdb_ht_itemval(db->rcache, item); + ooff = rec.ooff = *(FOFF*)cval; + rec.osize = item->vsize - SFOFF - SI4; + rec.expire = *(uint32_t*)(cval + SFOFF); + free(item); + } + } + + if (OFFNULL(ooff)) { + FOFF soffs[SFOFFNUM]; + FOFF *soff = soffs; + char sbuf[SBUFSIZE]; + CDBREC *rrec = (CDBREC*)sbuf; + + int retnum; + if ((retnum = cdb_getoff(db, hash, &soff, CDB_LOCKED)) < 0) { + cdb_lock_unlock(db->mlock[lockid]); + return -1; + } + + for(int i = 0; i < retnum; i++) { + /* check for duplicate records/older version*/ + int cret; + if (rrec != (CDBREC*)sbuf) { + free(rrec); + rrec = (CDBREC*)sbuf; + } + + struct timespec ts; + _cdb_timerreset(&ts); + cret = db->vio->rrec(db->vio, &rrec, soff[i], false); + db->rcount++; + db->rtime += _cdb_timermicrosec(&ts); + + if (cret < 0) + continue; + + if (ksize == rrec->ksize && memcmp(rrec->key, key, ksize) == 0) { + /* got its old meta info */ + rec.osize = rrec->osize; + rec.ooff = rrec->ooff; + ooff = rec.ooff; + break; + } + } + if (soff != soffs) + free(soff); + if (rrec != (CDBREC*)sbuf) + free(rrec); + } + + if (OFFNOTNULL(ooff)) { + cdb_updatepage(db, hash, ooff, CDB_PAGEDELETEOFF, CDB_LOCKED); + cdb_lock_unlock(db->mlock[lockid]); + + struct timespec ts; + _cdb_timerreset(&ts); + if (db->vio->drec(db->vio, &rec, ooff) < 0) + ; // return -1; succeed or not doesn't matter + db->wcount++; + db->wtime += _cdb_timermicrosec(&ts); + cdb_seterrno(db, CDB_SUCCESS, __FILE__, __LINE__); + return 0; + } else { + cdb_lock_unlock(db->mlock[lockid]); + cdb_seterrno(db, CDB_NOTFOUND, __FILE__, __LINE__); + return -3; + } +} + + +void cdb_stat(CDB *db, CDBSTAT *stat) +{ + if (stat == NULL) { + db->rchit = db->rcmiss = 0; + db->pchit = db->pcmiss = 0; + db->rcount = db->rtime = 0; + db->wcount = db->wtime = 0; + } else { + stat->rnum = db->rnum; + stat->rcnum = db->rcache? db->rcache->num : 0; + stat->pnum = db->hsize; + stat->pcnum = (db->pcache? db->pcache->num : 0) + + (db->dpcache? db->dpcache->num : 0); + stat->rchit = db->rchit; + stat->rcmiss = db->rcmiss; + stat->pchit = db->pchit; + stat->pcmiss = db->pcmiss; + stat->rlatcy = db->rcount ? db->rtime / db->rcount : 0; + stat->wlatcy = db->wcount ? db->wtime / db->wcount : 0; + } +} + + +int cdb_close(CDB *db) +{ + if (!db->opened) + return -1; + + if (db->bgtask) + cdb_bgtask_stop(db->bgtask); + if (db->rcache) + cdb_ht_destroy(db->rcache); + if (db->pcache) + cdb_ht_destroy(db->pcache); + if (db->dpcache) { + cdb_flushalldpage(db); + cdb_ht_destroy(db->dpcache); + } + + if (db->vio) { + db->vio->whead(db->vio); + db->vio->close(db->vio); + cdb_vio_destroy(db->vio); + } + if (db->mtable) + free(db->mtable); + db->opened = false; + _cdb_defparam(db); + return 0; +} + + +void cdb_deferrorcb(void *arg, int errno, const char *file, int line) +{ + fprintf(stderr, "DBERR: [%s:%d] %d - %s\n", file, line, errno, cdb_errmsg(errno)); +} + + +int cdb_destroy(CDB *db) +{ + if (db->opened) + cdb_close(db); + for(int i = 0; i < MLOCKNUM; i++) + cdb_lock_destory(db->mlock[i]); + cdb_lock_destory(db->dpclock); + cdb_lock_destory(db->pclock); + cdb_lock_destory(db->rclock); + cdb_lock_destory(db->stlock); + cdb_lock_destory(db->oidlock); + cdb_lock_destory(db->bflock); + cdb_bgtask_destroy(db->bgtask); + pthread_key_delete(*(pthread_key_t*)db->errkey); + free(db->errkey); + free(db); + return 0; +} + + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.h new file mode 100644 index 0000000000000000000000000000000000000000..bcd4ad9a6bef0d43993781f3707ed0f44935cae2 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_core.h @@ -0,0 +1,122 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_CORE_H_ +#define _CDB_CORE_H_ +#include "cuttdb.h" +#include "cdb_types.h" +#include "cdb_hashtable.h" +#include "cdb_bloomfilter.h" +#include "cdb_lock.h" +#include "cdb_vio.h" +#include "cdb_bgtask.h" +#include <stdint.h> +#include <stdbool.h> + + +enum { + CDB_PAGEDELETEOFF = 0, + CDB_PAGEINSERTOFF = 1, +}; + +/* the DB object */ +struct CDB +{ + /* size limit for record cache */ + uint64_t rclimit; + /* size limit for index page cache */ + uint64_t pclimit; + /* size of bloom filter */ + uint64_t bfsize; + /* record number in db */ + uint64_t rnum; + /* always increment operation id */ + uint64_t oid; + /* recovery point oid */ + uint64_t roid; + /* hash table size */ + uint32_t hsize; + /* last timestamp of no dirty page state */ + uint32_t ndpltime; + /* currently the database opened or not */ + bool opened; + /* the size for a disk seek&read, should not greater than SBUFSIZE */ + uint32_t areadsize; + + /* record cache */ + CDBHASHTABLE *rcache; + /* (clean) index page cache */ + CDBHASHTABLE *pcache; + /* dirty index page cache */ + CDBHASHTABLE *dpcache; + /* Bloom Filter */ + CDBBLOOMFILTER *bf; + + /* lock for rcache */ + CDBLOCK *rclock; + /* lock for pcache */ + CDBLOCK *pclock; + /* lock for dpcache */ + CDBLOCK *dpclock; + /* lock for hash table operation, split to MLOCKNUM groups */ + CDBLOCK *mlock[MLOCKNUM]; + /* lock for statistic */ + CDBLOCK *stlock; + /* lock for operation id */ + CDBLOCK *oidlock; + /* lock for bloom filter */ + CDBLOCK *bflock; + /* background tasks in another thread */ + CDBBGTASK *bgtask; + + /* main hash table, contains 'hsize' elements */ + FOFF *mtable; + /* disk i/o layer object */ + CDBVIO *vio; + + /* callback function when error occurs */ + CDB_ERRCALLBACK errcb; + /* argument for callback function */ + void *errcbarg; + /* key to get error code in current thread */ + void *errkey; + + /* statistics below, this fields have no lock protection */ + /* record cache hit/miss */ + uint64_t rchit; + uint64_t rcmiss; + /* page cache hit/miss */ + uint64_t pchit; + uint64_t pcmiss; + /* cumulative disk read time */ + uint64_t rtime; + /* number of disk read operation */ + uint64_t rcount; + /* cumulative disk write time */ + uint64_t wtime; + /* number of disk write operation */ + uint64_t wcount; +}; + + +bool cdb_checkoff(CDB *db, uint64_t hash, FOFF off, int locked); +int cdb_getoff(CDB *db, uint64_t hash, FOFF **offs, int locked); +int cdb_replaceoff(CDB *db, uint64_t hash, FOFF off, FOFF noff, int locked); +int cdb_updatepage(CDB *db, uint64_t hash, FOFF off, int opt, int locked); +void cdb_flushalldpage(CDB *db); +uint64_t cdb_genoid(CDB *db); + +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.c new file mode 100644 index 0000000000000000000000000000000000000000..6c72eb73fb3b8aaf774cd0e87479fc0fe82c580b --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.c @@ -0,0 +1,170 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +/************************************************************** +* * +* Fichier : crc64.c * +* Fonction pour calculer le CRC64 * +* * +**************************************************************/ +#include "cdb_crc64.h" + + +#define CONST64(n) (n##ULL) +static uint64_t CRC64_Table[256] = +{ + CONST64(0x0000000000000000), CONST64(0x42f0e1eba9ea3693), + CONST64(0x85e1c3d753d46d26), CONST64(0xc711223cfa3e5bb5), + CONST64(0x493366450e42ecdf), CONST64(0x0bc387aea7a8da4c), + CONST64(0xccd2a5925d9681f9), CONST64(0x8e224479f47cb76a), + CONST64(0x9266cc8a1c85d9be), CONST64(0xd0962d61b56fef2d), + CONST64(0x17870f5d4f51b498), CONST64(0x5577eeb6e6bb820b), + CONST64(0xdb55aacf12c73561), CONST64(0x99a54b24bb2d03f2), + CONST64(0x5eb4691841135847), CONST64(0x1c4488f3e8f96ed4), + CONST64(0x663d78ff90e185ef), CONST64(0x24cd9914390bb37c), + CONST64(0xe3dcbb28c335e8c9), CONST64(0xa12c5ac36adfde5a), + CONST64(0x2f0e1eba9ea36930), CONST64(0x6dfeff5137495fa3), + CONST64(0xaaefdd6dcd770416), CONST64(0xe81f3c86649d3285), + CONST64(0xf45bb4758c645c51), CONST64(0xb6ab559e258e6ac2), + CONST64(0x71ba77a2dfb03177), CONST64(0x334a9649765a07e4), + CONST64(0xbd68d2308226b08e), CONST64(0xff9833db2bcc861d), + CONST64(0x388911e7d1f2dda8), CONST64(0x7a79f00c7818eb3b), + CONST64(0xcc7af1ff21c30bde), CONST64(0x8e8a101488293d4d), + CONST64(0x499b3228721766f8), CONST64(0x0b6bd3c3dbfd506b), + CONST64(0x854997ba2f81e701), CONST64(0xc7b97651866bd192), + CONST64(0x00a8546d7c558a27), CONST64(0x4258b586d5bfbcb4), + CONST64(0x5e1c3d753d46d260), CONST64(0x1cecdc9e94ace4f3), + CONST64(0xdbfdfea26e92bf46), CONST64(0x990d1f49c77889d5), + CONST64(0x172f5b3033043ebf), CONST64(0x55dfbadb9aee082c), + CONST64(0x92ce98e760d05399), CONST64(0xd03e790cc93a650a), + CONST64(0xaa478900b1228e31), CONST64(0xe8b768eb18c8b8a2), + CONST64(0x2fa64ad7e2f6e317), CONST64(0x6d56ab3c4b1cd584), + CONST64(0xe374ef45bf6062ee), CONST64(0xa1840eae168a547d), + CONST64(0x66952c92ecb40fc8), CONST64(0x2465cd79455e395b), + CONST64(0x3821458aada7578f), CONST64(0x7ad1a461044d611c), + CONST64(0xbdc0865dfe733aa9), CONST64(0xff3067b657990c3a), + CONST64(0x711223cfa3e5bb50), CONST64(0x33e2c2240a0f8dc3), + CONST64(0xf4f3e018f031d676), CONST64(0xb60301f359dbe0e5), + CONST64(0xda050215ea6c212f), CONST64(0x98f5e3fe438617bc), + CONST64(0x5fe4c1c2b9b84c09), CONST64(0x1d14202910527a9a), + CONST64(0x93366450e42ecdf0), CONST64(0xd1c685bb4dc4fb63), + CONST64(0x16d7a787b7faa0d6), CONST64(0x5427466c1e109645), + CONST64(0x4863ce9ff6e9f891), CONST64(0x0a932f745f03ce02), + CONST64(0xcd820d48a53d95b7), CONST64(0x8f72eca30cd7a324), + CONST64(0x0150a8daf8ab144e), CONST64(0x43a04931514122dd), + CONST64(0x84b16b0dab7f7968), CONST64(0xc6418ae602954ffb), + CONST64(0xbc387aea7a8da4c0), CONST64(0xfec89b01d3679253), + CONST64(0x39d9b93d2959c9e6), CONST64(0x7b2958d680b3ff75), + CONST64(0xf50b1caf74cf481f), CONST64(0xb7fbfd44dd257e8c), + CONST64(0x70eadf78271b2539), CONST64(0x321a3e938ef113aa), + CONST64(0x2e5eb66066087d7e), CONST64(0x6cae578bcfe24bed), + CONST64(0xabbf75b735dc1058), CONST64(0xe94f945c9c3626cb), + CONST64(0x676dd025684a91a1), CONST64(0x259d31cec1a0a732), + CONST64(0xe28c13f23b9efc87), CONST64(0xa07cf2199274ca14), + CONST64(0x167ff3eacbaf2af1), CONST64(0x548f120162451c62), + CONST64(0x939e303d987b47d7), CONST64(0xd16ed1d631917144), + CONST64(0x5f4c95afc5edc62e), CONST64(0x1dbc74446c07f0bd), + CONST64(0xdaad56789639ab08), CONST64(0x985db7933fd39d9b), + CONST64(0x84193f60d72af34f), CONST64(0xc6e9de8b7ec0c5dc), + CONST64(0x01f8fcb784fe9e69), CONST64(0x43081d5c2d14a8fa), + CONST64(0xcd2a5925d9681f90), CONST64(0x8fdab8ce70822903), + CONST64(0x48cb9af28abc72b6), CONST64(0x0a3b7b1923564425), + CONST64(0x70428b155b4eaf1e), CONST64(0x32b26afef2a4998d), + CONST64(0xf5a348c2089ac238), CONST64(0xb753a929a170f4ab), + CONST64(0x3971ed50550c43c1), CONST64(0x7b810cbbfce67552), + CONST64(0xbc902e8706d82ee7), CONST64(0xfe60cf6caf321874), + CONST64(0xe224479f47cb76a0), CONST64(0xa0d4a674ee214033), + CONST64(0x67c58448141f1b86), CONST64(0x253565a3bdf52d15), + CONST64(0xab1721da49899a7f), CONST64(0xe9e7c031e063acec), + CONST64(0x2ef6e20d1a5df759), CONST64(0x6c0603e6b3b7c1ca), + CONST64(0xf6fae5c07d3274cd), CONST64(0xb40a042bd4d8425e), + CONST64(0x731b26172ee619eb), CONST64(0x31ebc7fc870c2f78), + CONST64(0xbfc9838573709812), CONST64(0xfd39626eda9aae81), + CONST64(0x3a28405220a4f534), CONST64(0x78d8a1b9894ec3a7), + CONST64(0x649c294a61b7ad73), CONST64(0x266cc8a1c85d9be0), + CONST64(0xe17dea9d3263c055), CONST64(0xa38d0b769b89f6c6), + CONST64(0x2daf4f0f6ff541ac), CONST64(0x6f5faee4c61f773f), + CONST64(0xa84e8cd83c212c8a), CONST64(0xeabe6d3395cb1a19), + CONST64(0x90c79d3fedd3f122), CONST64(0xd2377cd44439c7b1), + CONST64(0x15265ee8be079c04), CONST64(0x57d6bf0317edaa97), + CONST64(0xd9f4fb7ae3911dfd), CONST64(0x9b041a914a7b2b6e), + CONST64(0x5c1538adb04570db), CONST64(0x1ee5d94619af4648), + CONST64(0x02a151b5f156289c), CONST64(0x4051b05e58bc1e0f), + CONST64(0x87409262a28245ba), CONST64(0xc5b073890b687329), + CONST64(0x4b9237f0ff14c443), CONST64(0x0962d61b56fef2d0), + CONST64(0xce73f427acc0a965), CONST64(0x8c8315cc052a9ff6), + CONST64(0x3a80143f5cf17f13), CONST64(0x7870f5d4f51b4980), + CONST64(0xbf61d7e80f251235), CONST64(0xfd913603a6cf24a6), + CONST64(0x73b3727a52b393cc), CONST64(0x31439391fb59a55f), + CONST64(0xf652b1ad0167feea), CONST64(0xb4a25046a88dc879), + CONST64(0xa8e6d8b54074a6ad), CONST64(0xea16395ee99e903e), + CONST64(0x2d071b6213a0cb8b), CONST64(0x6ff7fa89ba4afd18), + CONST64(0xe1d5bef04e364a72), CONST64(0xa3255f1be7dc7ce1), + CONST64(0x64347d271de22754), CONST64(0x26c49cccb40811c7), + CONST64(0x5cbd6cc0cc10fafc), CONST64(0x1e4d8d2b65facc6f), + CONST64(0xd95caf179fc497da), CONST64(0x9bac4efc362ea149), + CONST64(0x158e0a85c2521623), CONST64(0x577eeb6e6bb820b0), + CONST64(0x906fc95291867b05), CONST64(0xd29f28b9386c4d96), + CONST64(0xcedba04ad0952342), CONST64(0x8c2b41a1797f15d1), + CONST64(0x4b3a639d83414e64), CONST64(0x09ca82762aab78f7), + CONST64(0x87e8c60fded7cf9d), CONST64(0xc51827e4773df90e), + CONST64(0x020905d88d03a2bb), CONST64(0x40f9e43324e99428), + CONST64(0x2cffe7d5975e55e2), CONST64(0x6e0f063e3eb46371), + CONST64(0xa91e2402c48a38c4), CONST64(0xebeec5e96d600e57), + CONST64(0x65cc8190991cb93d), CONST64(0x273c607b30f68fae), + CONST64(0xe02d4247cac8d41b), CONST64(0xa2dda3ac6322e288), + CONST64(0xbe992b5f8bdb8c5c), CONST64(0xfc69cab42231bacf), + CONST64(0x3b78e888d80fe17a), CONST64(0x7988096371e5d7e9), + CONST64(0xf7aa4d1a85996083), CONST64(0xb55aacf12c735610), + CONST64(0x724b8ecdd64d0da5), CONST64(0x30bb6f267fa73b36), + CONST64(0x4ac29f2a07bfd00d), CONST64(0x08327ec1ae55e69e), + CONST64(0xcf235cfd546bbd2b), CONST64(0x8dd3bd16fd818bb8), + CONST64(0x03f1f96f09fd3cd2), CONST64(0x41011884a0170a41), + CONST64(0x86103ab85a2951f4), CONST64(0xc4e0db53f3c36767), + CONST64(0xd8a453a01b3a09b3), CONST64(0x9a54b24bb2d03f20), + CONST64(0x5d45907748ee6495), CONST64(0x1fb5719ce1045206), + CONST64(0x919735e51578e56c), CONST64(0xd367d40ebc92d3ff), + CONST64(0x1476f63246ac884a), CONST64(0x568617d9ef46bed9), + CONST64(0xe085162ab69d5e3c), CONST64(0xa275f7c11f7768af), + CONST64(0x6564d5fde549331a), CONST64(0x279434164ca30589), + CONST64(0xa9b6706fb8dfb2e3), CONST64(0xeb46918411358470), + CONST64(0x2c57b3b8eb0bdfc5), CONST64(0x6ea7525342e1e956), + CONST64(0x72e3daa0aa188782), CONST64(0x30133b4b03f2b111), + CONST64(0xf7021977f9cceaa4), CONST64(0xb5f2f89c5026dc37), + CONST64(0x3bd0bce5a45a6b5d), CONST64(0x79205d0e0db05dce), + CONST64(0xbe317f32f78e067b), CONST64(0xfcc19ed95e6430e8), + CONST64(0x86b86ed5267cdbd3), CONST64(0xc4488f3e8f96ed40), + CONST64(0x0359ad0275a8b6f5), CONST64(0x41a94ce9dc428066), + CONST64(0xcf8b0890283e370c), CONST64(0x8d7be97b81d4019f), + CONST64(0x4a6acb477bea5a2a), CONST64(0x089a2aacd2006cb9), + CONST64(0x14dea25f3af9026d), CONST64(0x562e43b4931334fe), + CONST64(0x913f6188692d6f4b), CONST64(0xd3cf8063c0c759d8), + CONST64(0x5dedc41a34bbeeb2), CONST64(0x1f1d25f19d51d821), + CONST64(0xd80c07cd676f8394), CONST64(0x9afce626ce85b507) +}; + + +uint64_t cdb_crc64(const void *buf, uint32_t len) +{ + uint32_t i; + uint64_t crc = 0xFFFFFFFFFFFFFFFF; + uint8_t *cbuf = (uint8_t *)buf; + + for (i = 0; i < len; i++) { + crc = CRC64_Table[(uint8_t)(crc >> 56) ^ *cbuf++] ^ (crc << 8); + } + return crc; +} + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.h new file mode 100644 index 0000000000000000000000000000000000000000..50744fc844afe84cdcef8ddba5f6cff81ae5599a --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_crc64.h @@ -0,0 +1,22 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_CRC64_H_ +#define _CDB_CRC64_H_ +#include <stdint.h> + +uint64_t cdb_crc64(const void *buf, uint32_t len); + +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpdb.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpdb.c new file mode 100644 index 0000000000000000000000000000000000000000..99cddbb5355cb44d235ef08b27234a7199c2fe67 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpdb.c @@ -0,0 +1,68 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + +#include "cuttdb.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> + + +bool itcb(void *arg, const char *key, int ksize, const char *val, int vsize, uint32_t expire, uint64_t oid) +{ +#define SBUFSIZE 4096 + char buf[SBUFSIZE]; + char *kvbuf = buf; + if (ksize + vsize + 2 > SBUFSIZE) + kvbuf = (char*)malloc(ksize + vsize + 2); + memcpy(kvbuf, key, ksize); + kvbuf[ksize] = '\t'; + memcpy(kvbuf + ksize + 1, val, vsize); + kvbuf[ksize + vsize + 1] = '\0'; + printf("%s\t%u\n", kvbuf, expire); + if (kvbuf != buf) + free(kvbuf); + return true; +} + +int main(int argc, char *argv[]) +{ + /* 1TB */ + int cache_limit = 1048576; + + if (argc < 2) { + fprintf(stderr, "Usage: %s dbpath [cachelimit(MB)].... \n", argv[0]); + return -1; + } + if (argc > 2) { + cache_limit = atoi(argv[2]); + } + + CDB *db = cdb_new(); + cdb_option(db, 0, 0, cache_limit); + if (cdb_open(db, argv[1], CDB_PAGEWARMUP) < 0) { + fprintf(stderr, "Database open error, unable to recovery\n"); + return -1; + } + void *it = cdb_iterate_new(db, 0); + cdb_iterate(db, itcb, NULL, it); + cdb_iterate_destroy(db, it); + cdb_destroy(db); +} + + + + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpraw.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpraw.c new file mode 100644 index 0000000000000000000000000000000000000000..53bbe11c6e7bec1723c97fb951b63468889d555a --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_dumpraw.c @@ -0,0 +1,115 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <stdint.h> + +#define SI4 4 +#define SI8 8 + +/* data record */ +typedef struct { + /* disk store starts at following field */ + uint32_t magic; + uint32_t ksize; + uint32_t vsize; + uint32_t expire; + uint64_t oid; + char buf[0]; +} __attribute__((packed)) CDBREC; + +/* real size of a record header when stored on disk */ +#define RECHSIZE (SI4 * 4 + SI8) +/* real size of a record when stored on disk */ +#define RECSIZE(r) (RECHSIZE + (r)->ksize + (r)->vsize) + +#define FILEMETASIZE 64 +#define ALIGNBYTES 16 +#define RECMAGIC 0x19871022 +#define DELRECMAGIC 0x19871023 +#define FILEMAGICHEADER "CuTtDbFiLePaRtIaL" +#define FILEMAGICLEN (strlen(FILEMAGICHEADER)) +#define OFFALIGNED(off) (((off) & (ALIGNBYTES - 1))? ((off) | (ALIGNBYTES - 1)) + 1: off) + + + +void process(const char *filename) +{ +#define SBUFSIZE 4096 + int fd = open(filename, O_RDONLY, 0644); + char buf[SBUFSIZE]; + if (fd < 0) + fprintf(stderr, "%s Open failed\n", filename); + + long filesize = lseek(fd, 0, SEEK_END); + long pos = FILEMETASIZE; + char *map = (char*)mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0); + if (memcmp(map, FILEMAGICHEADER, FILEMAGICLEN)) { + fprintf(stderr, "%s is not a cuttdb file\n", filename); + close(fd); + return; + } + + while(pos < filesize) { + char *kvbuf = buf; + CDBREC *rec = (CDBREC*)&map[pos]; + if (rec->magic != RECMAGIC && rec->magic != DELRECMAGIC) { + pos += ALIGNBYTES; + continue; + } + + pos += OFFALIGNED(RECSIZE(rec)); + if (rec->magic != RECMAGIC) + continue; + + if (rec->ksize + rec->vsize + 2 > SBUFSIZE) { + kvbuf = (char*)malloc(rec->ksize + rec->vsize + 2); + } + memcpy(kvbuf, rec->buf, rec->ksize); + kvbuf[rec->ksize] = '\t'; + memcpy(kvbuf + rec->ksize + 1, rec->buf + rec->ksize, rec->vsize); + kvbuf[rec->ksize + rec->vsize + 1] = '\0'; + printf("%s\t%u\n", kvbuf, rec->expire); + if (kvbuf != buf) + free(kvbuf); + } + + munmap(map, filesize); + close(fd); +} + + + + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + fprintf(stderr, "Usage: %s dat########.cdb dat########.cdb .... \n", argv[0]); + return 0; + } + for(int i = 1; i < argc; i++) + process(argv[i]); + return 0; +} + + + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.c new file mode 100644 index 0000000000000000000000000000000000000000..432d154ac4dba8d8a3879b16905ae69468c58094 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.c @@ -0,0 +1,78 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cuttdb.h" +#include "cdb_errno.h" +#include "cdb_types.h" +#include "cdb_core.h" +#include <pthread.h> + + +int cdb_errno(CDB *db) +{ + return (long)pthread_getspecific(*(pthread_key_t*)db->errkey); +} + +const char *cdb_errmsg(int ecode) +{ + switch(ecode) { + case CDB_SUCCESS: + return "Success"; + case CDB_NOTFOUND: + return "Key Not Found"; + case CDB_EXIST: + return "Item Already Exists"; + case CDB_DIRNOEXIST: + return "Path Open Failed"; + case CDB_OPENERR: + return "File Open Failed"; + case CDB_PIDEXIST: + return "Opened By Another Process"; + case CDB_DATAERRDAT: + return "Data File Content Error"; + case CDB_DATAERRIDX: + return "Index File Content Error"; + case CDB_WRITEERR: + return "Write To File Error"; + case CDB_READERR: + return "Read From File Error"; + case CDB_NOFID: + return "Internal File Lost"; + case CDB_INTERNALERR: + return "Internal Error"; + case CDB_DATAERRMETA: + return "File Header Error"; + case CDB_MEMDBNOCACHE: + return "MemDB Mode With Zero Record Cache Size"; + default: + return "Error For Errno"; + } +} + + +void cdb_seterrcb(CDB *db, CDB_ERRCALLBACK errcb, void *arg) +{ + db->errcb = errcb; + db->errcbarg = arg; +} + + +void cdb_seterrno(CDB *db, int ecode, const char *source, int line) +{ + pthread_setspecific(*(pthread_key_t*)db->errkey, (void*)(long)ecode); + if (ecode != CDB_SUCCESS && db->errcb) { + db->errcb(db->errcbarg, ecode, source, line); + } +} diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.h new file mode 100644 index 0000000000000000000000000000000000000000..f274819de73b2133d2648aa6490ea8f5cf66b41c --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_errno.h @@ -0,0 +1,22 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_ERRNO_H_ +#define _CDB_ERRNO_H_ + +void cdb_seterrno(CDB *db, int ecode, const char *source, int line); + +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.c new file mode 100644 index 0000000000000000000000000000000000000000..1f33f0c1f43b48c33b9b1ffede33519bc6490b17 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.c @@ -0,0 +1,541 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cdb_hashtable.h" +#include <stdlib.h> +#include <string.h> + +/* +#define LRUPREV(i) (*(CDBHTITEM**)&((i)->buf[0])) +#define LRUNEXT(i) (*(CDBHTITEM**)&((i)->buf[sizeof(void*)])) +*/ + +#define LRUPREV(i) ((i)->lruptr[0]) +#define LRUNEXT(i) ((i)->lruptr[1]) + +static uint32_t MurmurHash1( const void * key, int len) +{ + const unsigned int m = 0xc6a4a793; + const int r = 16; + unsigned int h = 0x19900917 ^ (len * m); + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + unsigned int k = *(unsigned int *)data; + h += k; h *= m; h ^= h >> 16; + data += 4; len -= 4; + } + + switch(len) + { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + h *= m; h ^= h >> 10; + h *= m; h ^= h >> 17; + return h; +} + +void *cdb_ht_itemkey(CDBHASHTABLE *ht, CDBHTITEM *item) +{ + return (void *)(item->buf + ht->lru * 2 * sizeof(void*)); +} + +void *cdb_ht_itemval(CDBHASHTABLE *ht, CDBHTITEM *item) +{ + return (void *)(item->buf + ht->lru * 2 * sizeof(void*) + item->ksize); +} + +CDBHASHTABLE *cdb_ht_new(bool lru, CDBHASHFUNC hashfunc) +{ + CDBHASHTABLE *ht; + + ht = (CDBHASHTABLE*)malloc(sizeof(CDBHASHTABLE)); + ht->hash = NULL; + ht->lru = lru; + ht->num = ht->size = 0; + ht->tail = ht->head = NULL; + for(uint32_t i = 0; i < (1<<CDBHTBNUMPOW); i++) { + CDBHTBUCKET *bucket = &(ht->buckets[i]); + bucket->bnum = 2; + uint32_t lsize = sizeof(CDBHTITEM *) * bucket->bnum; + bucket->rnum = 0; + bucket->items = (CDBHTITEM **)malloc(lsize); + ht->size += lsize; + memset(bucket->items, 0, lsize); + } + ht->hash = hashfunc; + if (ht->hash == NULL) + ht->hash = MurmurHash1; + + ht->size += sizeof(CDBHASHTABLE); + + return ht; +} + +CDBHTITEM *cdb_ht_newitem(CDBHASHTABLE *ht, int ksize, int vsize) +{ + CDBHTITEM *item; + int hsize; + + if (ht->lru) + hsize = sizeof(CDBHTITEM) + 2 * sizeof(void*); + else + hsize = sizeof(CDBHTITEM); + + item = (CDBHTITEM*)malloc(hsize + ksize + vsize); + item->ksize = ksize; + item->vsize = vsize; + if (ht->lru) { + LRUPREV(item) = NULL; + LRUNEXT(item) = NULL; + } + return item; +} + + + + +void cdb_ht_insert(CDBHASHTABLE *ht, CDBHTITEM *item) +{ + uint32_t bid, hid; + CDBHTBUCKET *bucket; + + item->hash = ht->hash(cdb_ht_itemkey(ht, item), item->ksize); + bid = item->hash & ((1<<CDBHTBNUMPOW)-1); + bucket = &(ht->buckets[bid]); + hid = (item->hash >> CDBHTBNUMPOW) & (bucket->bnum-1); + + if (bucket->rnum > bucket->bnum * 2) { + CDBHTITEM **ilist; + uint32_t exp = 2; + if (bucket->bnum < 512) + exp = 4; + int listsize = (bucket->bnum * exp) * sizeof(CDBHTITEM*); + ilist = (CDBHTITEM**)malloc(listsize); + memset(ilist, 0, listsize); + for(uint32_t i = 0; i < bucket->bnum; i++) { + CDBHTITEM *curitem = bucket->items[i]; + while(curitem != NULL) { + CDBHTITEM *nextitem = curitem->hnext; + uint32_t hid = (curitem->hash>>CDBHTBNUMPOW) + & (bucket->bnum * exp - 1); + curitem->hnext = ilist[hid]; + ilist[hid] = curitem; + curitem = nextitem; + } + } + free(bucket->items); + bucket->items = ilist; + ht->size += listsize - bucket->bnum * sizeof(CDBHTITEM *); + bucket->bnum *= exp; + hid = (item->hash >> CDBHTBNUMPOW) & (bucket->bnum - 1); + } + + { + CDBHTITEM *curitem = bucket->items[hid]; + CDBHTITEM *preitem = NULL; + while(curitem != NULL) { + if (curitem->hash == item->hash + && curitem->ksize == item->ksize + && memcmp(cdb_ht_itemkey(ht, curitem), + cdb_ht_itemkey(ht, item) ,curitem->ksize) == 0) { + CDBHTITEM *tmp; + if (ht->lru) { + if (LRUPREV(curitem)) + LRUNEXT(LRUPREV(curitem)) = LRUNEXT(curitem); + if (LRUNEXT(curitem)) + LRUPREV(LRUNEXT(curitem)) = LRUPREV(curitem); + if (ht->head == curitem) + ht->head = LRUNEXT(curitem); + if (ht->tail == curitem) + ht->tail = LRUPREV(curitem); + } + if (preitem) + preitem->hnext = curitem->hnext; + else + bucket->items[hid] = curitem->hnext; + tmp = curitem->hnext; + ht->size -= sizeof(CDBHTITEM) + curitem->ksize + curitem->vsize + + (ht->lru > 0) * sizeof(CDBHTITEM*) * 2; + ht->num--; + bucket->rnum--; + free(curitem); + curitem = tmp; + break; + } + preitem = curitem; + curitem = curitem->hnext; + } + } + + item->hnext = bucket->items[hid]; + bucket->items[hid] = item; + + if (ht->lru) { + if (ht->head) LRUPREV(ht->head) = item; + LRUPREV(item) = NULL; + LRUNEXT(item) = ht->head; + ht->head = item; + if (ht->tail == NULL) + ht->tail = item; + } + + bucket->rnum++; + ht->num++; + ht->size += sizeof(CDBHTITEM) + item->ksize + item->vsize + + ht->lru * sizeof(CDBHTITEM*) * 2; +} + + +void *cdb_ht_insert2(CDBHASHTABLE *ht, const void *key, int ksize, const void *val, int vsize) +{ + CDBHTITEM *item; + + item = cdb_ht_newitem(ht, ksize, vsize); + memcpy(cdb_ht_itemkey(ht, item), key, ksize); + memcpy(cdb_ht_itemval(ht, item), val, vsize); + cdb_ht_insert(ht, item); + return cdb_ht_itemval(ht, item); +} + +void *cdb_ht_get(CDBHASHTABLE *ht, const void *key, int ksize, int *vsize, bool mtf) +{ + CDBHTITEM *res; + + res = cdb_ht_get3(ht, key, ksize, mtf); + if (res) { + if(vsize) + *vsize = res->vsize; + return cdb_ht_itemval(ht, res); + } else { + if(vsize) + *vsize = 0; + return NULL; + } +} + + +void *cdb_ht_get2(CDBHASHTABLE *ht, const void *key, int ksize, bool mtf) +{ + CDBHTITEM *res; + + res = cdb_ht_get3(ht, key, ksize, mtf); + if (res) + return cdb_ht_itemval(ht, res); + else + return NULL; +} + + +CDBHTITEM *cdb_ht_get3(CDBHASHTABLE *ht, const void *key, int ksize, bool mtf) +{ + uint32_t hash, bid, hid; + CDBHTBUCKET *bucket; + CDBHTITEM *curitem; + + hash = ht->hash(key, ksize); + bid = hash & ((1<<CDBHTBNUMPOW)-1); + bucket = &(ht->buckets[bid]); + hid = (hash >> CDBHTBNUMPOW) & (bucket->bnum - 1); + + curitem = bucket->items[hid]; + while (curitem != NULL) { + if (curitem->hash == hash + && curitem->ksize == ksize + && memcmp(cdb_ht_itemkey(ht, curitem), key , ksize) == 0) { + if (ht->lru && mtf && ht->head != curitem) { + if (LRUPREV(curitem)) + LRUNEXT(LRUPREV(curitem)) = LRUNEXT(curitem); + if (LRUNEXT(curitem)) + LRUPREV(LRUNEXT(curitem)) = LRUPREV(curitem); + if (ht->tail == curitem) + ht->tail = LRUPREV(curitem); + + LRUNEXT(curitem) = ht->head; + LRUPREV(ht->head) = curitem; + ht->head = curitem; + LRUPREV(curitem) = NULL; + } + return curitem; + } + curitem = curitem->hnext; + } + return NULL; +} + + +bool cdb_ht_exist(CDBHASHTABLE *ht, const void *key, int ksize) +{ + int vsize; + return (cdb_ht_get(ht, key, ksize, &vsize, false) != NULL); +} + + +int cdb_ht_del2(CDBHASHTABLE *ht, const void *key, int ksize) +{ + CDBHTITEM *res = NULL; + res = cdb_ht_del(ht, key, ksize); + if (res) { + free(res); + return 0; + } + return -1; +} + + +CDBHTITEM *cdb_ht_del(CDBHASHTABLE *ht, const void *key, int ksize) +{ + uint32_t hash, bid, hid; + CDBHTBUCKET *bucket; + CDBHTITEM *curitem, *preitem; + CDBHTITEM *res = NULL; + + hash = ht->hash(key, ksize); + bid = hash & ((1<<CDBHTBNUMPOW)-1); + bucket = &(ht->buckets[bid]); + hid = (hash >> CDBHTBNUMPOW) & (bucket->bnum - 1); + + curitem = bucket->items[hid]; + preitem = NULL; + while(curitem != NULL) { + if (curitem->hash == hash + && curitem->ksize == ksize + && memcmp(cdb_ht_itemkey(ht, curitem), + key, ksize) == 0) { + if (ht->lru) { + if (LRUPREV(curitem)) + LRUNEXT(LRUPREV(curitem)) = LRUNEXT(curitem); + if (LRUNEXT(curitem)) + LRUPREV(LRUNEXT(curitem)) = LRUPREV(curitem); + if (ht->head == curitem) + ht->head = LRUNEXT(curitem); + if (ht->tail == curitem) + ht->tail = LRUPREV(curitem); + } + if (preitem) + preitem->hnext = curitem->hnext; + else + bucket->items[hid] = curitem->hnext; + ht->size -= sizeof(CDBHTITEM) + curitem->ksize + curitem->vsize + + (ht->lru > 0) * sizeof(CDBHTITEM*) * 2; + ht->num--; + bucket->rnum--; + res = curitem; + curitem = curitem->hnext; + break; + } + preitem = curitem; + curitem = curitem->hnext; + } + + return res; +} + + +void cdb_ht_removetail(CDBHASHTABLE *ht) +{ + CDBHTITEM *item; + + item = cdb_ht_poptail(ht); + if (item) + free(item); + return; +} + + +CDBHTITEM *cdb_ht_gettail(CDBHASHTABLE *ht) +{ + return ht->tail; +} + + +CDBHTITEM *cdb_ht_poptail(CDBHASHTABLE *ht) +{ + CDBHTITEM *item = ht->tail, *curitem, *preitem;; + CDBHTBUCKET *bucket; + uint32_t bid, hid; + + if (!(ht->lru) || item == NULL) + return NULL; + + bid = item->hash & ((1<<CDBHTBNUMPOW)-1); + bucket = &(ht->buckets[bid]); + hid = (item->hash >> CDBHTBNUMPOW) & (bucket->bnum - 1); + + curitem = bucket->items[hid]; + preitem = NULL; + while (curitem != NULL) { + if (curitem->hash == item->hash + && curitem->ksize == item->ksize + && memcmp(cdb_ht_itemkey(ht, curitem), + cdb_ht_itemkey(ht, item), item->ksize) == 0) { + if (preitem) { + preitem->hnext = curitem->hnext; + } else { + bucket->items[hid] = curitem->hnext; + } + break; + } + preitem = curitem; + curitem = curitem->hnext; + } + + if (LRUPREV(item)) + LRUNEXT(LRUPREV(item)) = NULL; + if (ht->head == item) + ht->head = NULL; + ht->tail = LRUPREV(item); + bucket->rnum--; + ht->num--; + ht->size -= sizeof(CDBHTITEM) + item->ksize + item->vsize + + sizeof(CDBHTITEM*) * 2; + return item; +} + +void cdb_ht_clean(CDBHASHTABLE *ht) +{ + for(uint32_t i = 0; i < (1<<CDBHTBNUMPOW); i++) { + CDBHTBUCKET *bucket = &(ht->buckets[i]); + for(uint32_t j = 0; j < bucket->bnum; j++) { + CDBHTITEM *curitem = bucket->items[j]; + while(curitem != NULL) { + CDBHTITEM *tmp = curitem->hnext; + free(curitem); + curitem = tmp; + } + bucket->items[j] = NULL; + } + bucket->rnum = 0; + } + ht->num = 0; +} + + +void cdb_ht_destroy(CDBHASHTABLE *ht) +{ + if (ht->lru) { + CDBHTITEM *curitem = ht->head; + while(curitem) { + CDBHTITEM *nextitem = LRUNEXT(curitem); + free(curitem); + curitem = nextitem; + } + } + + for(uint32_t i = 0; i < (1<<CDBHTBNUMPOW); i++) { + CDBHTBUCKET *bucket = &(ht->buckets[i]); + + for(uint32_t j = 0; j < bucket->bnum && (!ht->lru); j++) { + CDBHTITEM *curitem = bucket->items[j]; + while(curitem != NULL) { + CDBHTITEM *tmp = curitem->hnext; + free(curitem); + curitem = tmp; + } + } + free(bucket->items); + } + free(ht); +} + + +CDBHTITEM *cdb_ht_iterbegin(CDBHASHTABLE *ht) +{ + for(uint32_t i = 0; i < (1<<CDBHTBNUMPOW); i++) { + CDBHTBUCKET *bucket = &(ht->buckets[i]); + if (!bucket->rnum) + continue; + for(uint32_t j = 0; j < bucket->bnum; j++) + if (bucket->items[j]) + return bucket->items[j]; + } + + return NULL; +} + + +CDBHTITEM *cdb_ht_iternext(CDBHASHTABLE *ht, CDBHTITEM *cur) +{ + if (cur == NULL) + return NULL; + + if (cur->hnext) + return cur->hnext; + + uint32_t bid = cur->hash & ((1<<CDBHTBNUMPOW)-1); + CDBHTBUCKET *bucket = &(ht->buckets[bid]); + uint32_t hid = (cur->hash >> CDBHTBNUMPOW) & (bucket->bnum - 1); + + for(uint32_t i = hid + 1; i < bucket->bnum; i++) { + if (bucket->items[i]) + return bucket->items[i]; + } + + for(uint32_t i = bid + 1; i < (1<<CDBHTBNUMPOW); i++) { + CDBHTBUCKET *bucket = &(ht->buckets[i]); + if (!bucket->rnum) + continue; + for(int j = 0; j < bucket->bnum; j++) + if (bucket->items[j]) + return bucket->items[j]; + } + + return NULL; +} + + +#ifdef _UT_ +#include <stdio.h> +#include <time.h> +int main(int argc, char *argv[]) +{ + CDBHASHTABLE *ht; + long k, v; + ht = cdb_ht_new(true, NULL); + for(int i = 0; i < 1000; i++) { + k = i; + v = i * 1000; + cdb_ht_insert2(ht, &k, sizeof(long), &v, sizeof(long)); + } + + srand(time(NULL)); + + for(int i = 0; i < 1000; i++) { + long *v, k = rand() % 1000; + int vsize; + v = (long*)cdb_ht_get(ht, &k, sizeof(long), &vsize, true); + printf("get: %ld -> %ld (%d)\n", k, *v, vsize); + } + + printf("total size: %d num: %d\n", ht->size, ht->num); + + CDBHTITEM *item; + item = cdb_ht_poptail(ht); + printf("tail: %ld - %ld\n", *(long*)cdb_ht_itemkey(ht, item), *(long*)cdb_ht_itemval(ht, item)); + free(item); + item = cdb_ht_poptail(ht); + printf("tail: %ld - %ld\n", *(long*)cdb_ht_itemkey(ht, item), *(long*)cdb_ht_itemval(ht, item)); + free(item); +} +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.h new file mode 100644 index 0000000000000000000000000000000000000000..1f35b376dae7dd7618c24500dc729ab71577ad45 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_hashtable.h @@ -0,0 +1,139 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_HASHTABLE_H_ +#define _CDB_HASHTABLE_H_ +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef uint32_t (*CDBHASHFUNC)(const void *, int); + +/* default 1<<8 level-1 buckets, which makes the table expanding more smoothly */ +#define CDBHTBNUMPOW 8 + + +typedef struct CDBHTITEM +{ + int ksize; + int vsize; + uint32_t hash; + /* next element with the same hash */ + struct CDBHTITEM *hnext; + /* if LRU is true, the first several bytes are two pointers of prev/next element */ + struct CDBHTITEM *lruptr[0]; + char buf[0]; +} __attribute__((packed)) CDBHTITEM; + + +typedef struct { + /* array for items */ + CDBHTITEM **items; + /* number of allocated slots in the bucket */ + uint32_t bnum; + /* number of items exist in the bucket */ + uint32_t rnum; +} CDBHTBUCKET; + + +typedef struct CDBHASHTABLE { + /* is in LRU mode? */ + bool lru; + /* user specified hash function */ + CDBHASHFUNC hash; + /* fixed number for level-1 buckets */ + CDBHTBUCKET buckets[1<<CDBHTBNUMPOW]; + /* memory usage */ + uint64_t size; + /* number of items */ + uint64_t num; + /* in LRU mode, the newest item */ + CDBHTITEM *head; + /* in LRU mode, the oldest item */ + CDBHTITEM *tail; +} CDBHASHTABLE; + + +/* get the pointer of key in current item */ +/* #define cdb_ht_itemkey(ht, item) (item->buf + ht->lru * 2 * sizeof(void*)) */ +void *cdb_ht_itemkey(CDBHASHTABLE *ht, CDBHTITEM *item); + +/* get the pointer of value in current item */ +/* #define cdb_ht_itemval(ht, item) (item->buf + ht->lru * 2 * sizeof(void*) + item->ksize) */ +void *cdb_ht_itemval(CDBHASHTABLE *ht, CDBHTITEM *item); + +/* create an hashtable, it can be a simple hashtable or with LeastRecentUse + The LRU mode needs extra two pointer space for every element + hash function can by specified by user */ +CDBHASHTABLE *cdb_ht_new(bool lru, CDBHASHFUNC hashfunc); + +/* clean and free the hastable */ +void cdb_ht_destroy(CDBHASHTABLE *ht); + +/* allocate a new item with specified size, but do not insert it into table */ +CDBHTITEM *cdb_ht_newitem(CDBHASHTABLE *ht, int ksize, int vsize); + +/* insert an item which already exists into table */ +void cdb_ht_insert(CDBHASHTABLE *ht, CDBHTITEM *item); + +/* allocate and insert an item into table by key and value, return the pointer of value in table */ +void *cdb_ht_insert2(CDBHASHTABLE *ht, const void *key, int ksize, const void *val, int vsize); + +/* get the value of an item and its size in table, move the item to front if mtf == true */ +void *cdb_ht_get(CDBHASHTABLE *ht, const void *key, int ksize, int *vsize, bool mtf); + +/* get the value of an item, assume the size is known, move the item to front if mtf == true */ +void *cdb_ht_get2(CDBHASHTABLE *ht, const void *key, int ksize, bool mtf); + +/* get the pointer of an item, it hasn't been copied */ +CDBHTITEM *cdb_ht_get3(CDBHASHTABLE *ht, const void *key, int ksize, bool mtf); + +/* check if an item with the key exists */ +bool cdb_ht_exist(CDBHASHTABLE *ht, const void *key, int ksize); + +/* delete and free an item from table by its key */ +int cdb_ht_del2(CDBHASHTABLE *ht, const void *key, int ksize); + +/* return and delete an item from table, the item should be freed by user */ +CDBHTITEM *cdb_ht_del(CDBHASHTABLE *ht, const void *key, int ksize); + +/* delete and free the last item in table */ +void cdb_ht_removetail(CDBHASHTABLE *ht); + +/* return last item in table, do not delete nor free */ +CDBHTITEM *cdb_ht_gettail(CDBHASHTABLE *ht); + +/* return last item in table, delete but should be freed by user */ +CDBHTITEM *cdb_ht_poptail(CDBHASHTABLE *ht); + +/* clean and free all elements in the table*/ +void cdb_ht_clean(CDBHASHTABLE *ht); + +/* iterate the table by get the front one firstly */ +CDBHTITEM *cdb_ht_iterbegin(CDBHASHTABLE *ht); + +/* get the next item of current element */ +CDBHTITEM *cdb_ht_iternext(CDBHASHTABLE *ht, CDBHTITEM *cur); + +#if defined(__cplusplus) +} +#endif + +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.c new file mode 100644 index 0000000000000000000000000000000000000000..54b91071cba0e9ac124c616a54b93fdbe2e29894 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.c @@ -0,0 +1,75 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cdb_lock.h" +#include <stdlib.h> +#include <pthread.h> +#include <sched.h> + + +CDBLOCK *cdb_lock_new(int ltype) +{ + CDBLOCK *lock = NULL; + if (ltype == CDB_LOCKSPIN) { + lock = (CDBLOCK *)malloc(sizeof(CDBLOCK) + sizeof(pthread_spinlock_t)); + pthread_spin_init((pthread_spinlock_t*)&lock->lock, PTHREAD_PROCESS_PRIVATE); + } else if (ltype == CDB_LOCKMUTEX) { + lock = (CDBLOCK *)malloc(sizeof(CDBLOCK) + sizeof(pthread_mutex_t)); + pthread_mutex_init((pthread_mutex_t*)&lock->lock, NULL); + } + lock->ltype = ltype; + + return lock; +} + + +void cdb_lock_lock(CDBLOCK *lock) +{ + if (lock->ltype == CDB_LOCKSPIN) + pthread_spin_lock((pthread_spinlock_t*)&lock->lock); + else if (lock->ltype == CDB_LOCKMUTEX) + pthread_mutex_lock((pthread_mutex_t*)&lock->lock); +} + + +void cdb_lock_unlock(CDBLOCK *lock) +{ + if (lock->ltype == CDB_LOCKSPIN) + pthread_spin_unlock((pthread_spinlock_t*)&lock->lock); + else if (lock->ltype == CDB_LOCKMUTEX) + pthread_mutex_unlock((pthread_mutex_t*)&lock->lock); +} + + +void cdb_lock_destory(CDBLOCK *lock) +{ + if (lock->ltype == CDB_LOCKSPIN) + pthread_spin_destroy((pthread_spinlock_t*)&lock->lock); + else if (lock->ltype == CDB_LOCKMUTEX) + pthread_mutex_destroy((pthread_mutex_t*)&lock->lock); + + free(lock); +} + + +int cdb_lock_trylock(CDBLOCK *lock) +{ + if (lock->ltype == CDB_LOCKSPIN) + return pthread_spin_trylock((pthread_spinlock_t*)&lock->lock); + else if (lock->ltype == CDB_LOCKMUTEX) + return pthread_mutex_trylock((pthread_mutex_t*)&lock->lock); + return 0; +} + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.h new file mode 100644 index 0000000000000000000000000000000000000000..587fcdb18b40722da27f0eebff9fdb0e05934ce3 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_lock.h @@ -0,0 +1,49 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_LOCK_H_ +#define _CDB_LOCK_H_ + + +enum { + /* spinlock */ + CDB_LOCKSPIN, + /* mutex, which may cause OS context switch, mainly used in where Disk IO happens */ + CDB_LOCKMUTEX, +}; + +/* may be used to indicated whether the area is protected */ +enum { + CDB_LOCKED, + CDB_NOTLOCKED, +}; + +typedef struct CDBLOCK +{ + int ltype; + char lock[0]; +} CDBLOCK; + + +CDBLOCK *cdb_lock_new(int ltype); +void cdb_lock_lock(CDBLOCK *lock); +void cdb_lock_unlock(CDBLOCK *lock); +void cdb_lock_destory(CDBLOCK *lock); +int cdb_lock_trylock(CDBLOCK *lock); + + + +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_types.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_types.h new file mode 100644 index 0000000000000000000000000000000000000000..cfb6e6b8c7b7be2940d25e4ff9f8c098d3bc48c4 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_types.h @@ -0,0 +1,144 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_TYPES_H_ +#define _CDB_TYPES_H_ +#include <stdint.h> + +#define KB 1024 +#define MB 1048576 +#define CDBMIN(a, b) ((a)<(b)?(a):(b)) +#define CDBMAX(a, b) ((a)>(b)?(a):(b)) + +#define SI8 8 +#define SI4 4 +/* space reserved in stack for i/o, avoid some malloc/free */ +#define SBUFSIZE (64 * KB) + +/* a default disk read size for index page, 3KB is enough(a page with 300 items) */ +#define PAGEAREADSIZE (3 * KB) + +/* reserved in stack for matched items in a hash index page */ +#define SFOFFNUM 8 + +/* a valid virtual offset */ +#define OFFNOTNULL(o) (((o).i4)||((o).i2)) +/* a null virtual offset */ +#define OFFNULL(o) (((o).i4==0)&&((o).i2==0)) +/* nullify an offset */ +#define OFFZERO(o) do{(o).i4=0;(o).i2=0;}while(0) +/* offset is equal ? */ +#define OFFEQ(a,b) (((a).i4==(b).i4)&&((a).i2==(b).i2)) +/* hash in page is equal ? */ +#define PHASHEQ(a,b) (((a).i2==(b).i2)&&((a).i1==(b).i1)) +/* page size increment */ +#define CDB_PAGEINCR 4 + + +/* if page cache size exceeds the limit */ +#define PCOVERFLOW(db) ((db)->dpcache && (db)->dpcache->size + (db)->pcache->size > (db)->pclimit) +/* if record cache size exceeds the limit */ +#define RCOVERFLOW(db) ((db)->rcache && (db)->rcache->size > (db)->rclimit) + +/* timeout for a dirty index page stays since last modify */ +#define DPAGETIMEOUT 40 +/* operation on main table are isolated by these locks */ +#define MLOCKNUM 256 + +#define CDBHASH64(a, b) cdb_crc64(a, b) + +/* all virtual offsets are 48-bits */ +typedef struct FOFF +{ + uint32_t i4; + uint16_t i2; +} __attribute__((packed)) FOFF; + + + +#define SFOFF (sizeof(FOFF)) + + +/* all hash value in index page are 24-bits + range 0..16M guarantee very low collision + with less than a hundred records in a page */ +typedef struct PHASH +{ + uint16_t i2; + uint8_t i1; +} __attribute__((packed)) PHASH; + + +/* an item in index page contains a hash and an offset */ +typedef struct PITEM +{ + FOFF off; + PHASH hash; +} __attribute__((packed)) PITEM; + + +/* data record */ +typedef struct CDBREC{ + /* where the data come from */ + FOFF ooff; + uint32_t osize; + + /* access convenient*/ + void *key; + void *val; + + /* disk store starts at following field */ + uint32_t magic; + uint32_t ksize; + uint32_t vsize; + uint32_t expire; + uint64_t oid; + char buf[0]; +} __attribute__((packed)) CDBREC; + +/* real size of a record header when stored on disk */ +#define RECHSIZE (SI4 * 4 + SI8) +/* real size of a record when stored on disk */ +#define RECSIZE(r) (RECHSIZE + (r)->ksize + (r)->vsize) + + +/* index page */ +typedef struct CDBPAGE{ + FOFF ooff; + uint32_t osize; + uint32_t cap; + + union { + /* what it be on disk */ + uint32_t magic; + /* what it be in memory */ + uint32_t mtime; + }; + /* which bucket it belongs to */ + uint32_t bid; + uint32_t num; + uint64_t oid; + PITEM items[0]; +} __attribute__((packed)) CDBPAGE; + +/* real size of a page header when stored on disk */ +#define PAGEHSIZE (SI4 * 3 + SI8) +/* real size of a page when stored on disk */ +#define PAGESIZE(p) (PAGEHSIZE + sizeof(PITEM) * (p)->num) +/* in-memory size of an record structure */ +#define MPAGESIZE(p) (sizeof(CDBPAGE) + sizeof(PITEM) * (p)->cap) + +#endif + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.c b/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.c new file mode 100644 index 0000000000000000000000000000000000000000..c0da6d1572812cbda4afe795cf24e25a733e375b --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.c @@ -0,0 +1,42 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cdb_vio.h" +#include "cdb_types.h" +#include "vio_apnd2.h" +#include "stdlib.h" + + +CDBVIO *cdb_vio_new(int type) +{ + CDBVIO *res; + res = (CDBVIO *)malloc(sizeof(CDBVIO)); + switch(type) { + case CDBVIOAPND2: + vio_apnd2_init(res); + break; + default: + vio_apnd2_init(res); + break; + } + return res; +} + +int cdb_vio_destroy(CDBVIO *vio) +{ + free(vio); + return 0; +} + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.h b/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.h new file mode 100644 index 0000000000000000000000000000000000000000..5c6e7e205813f11d84d45f0fa8916cff1edc3bbe --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cdb_vio.h @@ -0,0 +1,101 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CDB_VIO_H_ +#define _CDB_VIO_H_ +#include "cdb_types.h" +#include "cuttdb.h" +#include <stdint.h> +#include <stdbool.h> + +enum { + /* obsoleted */ + CDBVIOAPPEND, + /* append only format storage */ + CDBVIOAPND2, +}; + +typedef struct CDBVIO CDBVIO; + +/* write a record, returns virtual offset at 3rd parameter */ +typedef int (*VIOWRITEREC)(CDBVIO*, CDBREC*, FOFF*); +/* delete a record, pass in the current offset at 3rd parameter */ +typedef int (*VIODELETEREC)(CDBVIO*, CDBREC*, FOFF); +/* read a record, 2nd parameter default points to stack buffer, if its real size +greater than the stack buffer size, it will be changed to points to a space in heap, +the last parameter decides whether read the whole record or just read key for comparsion */ +typedef int (*VIOREADREC)(CDBVIO*, CDBREC**, FOFF, bool); +/* close the storage */ +typedef int (*VIOCLOSE)(CDBVIO*); +/* open the storage, pass in the storage path and open mode */ +typedef int (*VIOOPEN)(CDBVIO*, const char*, int); +/* write an index page, return its virtual offset at 3rd parameter */ +typedef int (*VIOWRITEPAGE)(CDBVIO*, CDBPAGE *, FOFF*); +/* read an index page, 2nd parameter default points to stack buffer, if its real size +greater than the stack buffer size, it will be changed to points to a space in heap */ +typedef int (*VIOREADPAGE)(CDBVIO*, CDBPAGE **, FOFF); +/* make the storage do an sync operation */ +typedef int (*VIOSYNC)(CDBVIO*); +/* write db header, which contains main-index */ +typedef int (*VIOWRITEHEAD)(CDBVIO*); +/* read db header, which contains main-index */ +typedef int (*VIOREADHEAD)(CDBVIO*); +/* tell that no dirty page exists */ +typedef void (*VIOCLEANPOINT)(CDBVIO*); +/* get the record/page iterator at oid */ +typedef void* (*VIOITFIRST)(CDBVIO *, uint64_t oid); +/* get the next index page by iterator */ +typedef int (*VIOPAGEITNEXT)(CDBVIO *, CDBPAGE **, void *); +/* get the next record by iterator */ +typedef int (*VIORECITNEXT)(CDBVIO *, CDBREC **, void *); +/* destroy and free the iterator */ +typedef void (*VIOITDESTROY)(CDBVIO *, void *); + +struct CDBVIO +{ + VIOOPEN open; + VIOCLOSE close; + + VIOWRITEREC wrec; + VIODELETEREC drec; + VIOREADREC rrec; + + VIOWRITEPAGE wpage; + VIOREADPAGE rpage; + + VIOSYNC sync; + VIOWRITEHEAD whead; + VIOREADHEAD rhead; + + VIOCLEANPOINT cleanpoint; + + VIOITFIRST pageitfirst; + VIOPAGEITNEXT pageitnext; + VIOITDESTROY pageitdestroy; + + VIOITFIRST recitfirst; + VIORECITNEXT recitnext; + VIOITDESTROY recitdestroy; + + CDB *db; + void *iometa; +}; + + +CDBVIO *cdb_vio_new(int type); +int cdb_vio_destroy(CDBVIO *vio); + + +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.c b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.c new file mode 100644 index 0000000000000000000000000000000000000000..9b09a2863a28eeb88b74eebd1f23ebf6f6bfaa71 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.c @@ -0,0 +1,2152 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * The server&network part of CuttDB is based on Beansdb: + * + * http://beansdb.googlecode.com + * + * Beansdb is most based on Memcachedb and Memcached: + * + * http://memcachedb.org/ + * http://danga.com/memcached/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cuttdb-server.h" +#include "cuttdb.h" +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <signal.h> +#include <sys/resource.h> +#include <sys/uio.h> +#include <unistd.h> + +/* need this to get IOV_MAX on some platforms. */ +#ifndef __need_IOV_MAX +#define __need_IOV_MAX +#endif +#include <pwd.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <assert.h> +#include <limits.h> +#include <inttypes.h> +#include <ctype.h> + + +#ifdef HAVE_READPROC +#include <proc/readproc.h> +#endif + +#ifdef HAVE_MALLOC_H +/* OpenBSD has a malloc.h, but warns to use stdlib.h instead */ +#ifndef __OpenBSD__ +#include <malloc.h> +#endif +#endif + +/* FreeBSD 4.x doesn't have IOV_MAX exposed. */ +#ifndef IOV_MAX +#if defined(__FreeBSD__) || defined(__APPLE__) +# define IOV_MAX 1024 +#endif +#endif + +#ifndef IOV_MAX +# define IOV_MAX 1024 +#endif + +#ifndef CLOCK_MONOTONIC +#include "clock_gettime_stub.c" +#endif + +/* + * forward declarations + */ +static int new_socket(struct addrinfo *ai); +static int server_socket(const int port, const bool is_udp); +static int try_read_command(conn *c); +static int try_read_network(conn *c); + +/* stats */ +static void stats_reset(void); +static void stats_init(void); + +/* defaults */ +static void settings_init(void); + +/* event handling, network IO */ +static void conn_close(conn *c); +static void conn_init(void); +static bool update_event(conn *c, const int new_flags); +int delete_event(int fd); +static void complete_nread(conn *c); +static void process_command(conn *c, char *command); +static int transmit(conn *c); +static int ensure_iov_space(conn *c); +static int add_iov(conn *c, const void *buf, int len); +static int add_msghdr(conn *c); +static void conn_free(conn *c); + + +static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, + char *suffix, uint8_t *nsuffix); +static int item_free(item *it); +static item *item_get(char *key, size_t nkey); +static item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes); + +/** exported globals **/ +struct stats stats; +struct settings settings; + +CDB *db = NULL; +FILE *access_log = NULL; +int daemon_quit = 0; + +/** file scope variables **/ +static int stub_fd = 0; + +#define TRANSMIT_COMPLETE 0 +#define TRANSMIT_INCOMPLETE 1 +#define TRANSMIT_SOFT_ERROR 2 +#define TRANSMIT_HARD_ERROR 3 + + +void item_init(void) { + /*freeitemtotal = INIT_ITEM_FREELIST_LENGTH; + freeitemcurr = 0; + + freeitem = (item **)malloc( sizeof(item *) * freeitemtotal ); + if (freeitem == NULL) { + perror("malloc()"); + }*/ + return; +} + +static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, + char *suffix, uint8_t *nsuffix) { + /* suffix is defined at 40 chars elsewhere.. */ + *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2); + return sizeof(item) + nkey + *nsuffix + nbytes; +} + +static int item_free(item *it) +{ + free(it); + return 0; +} + +static item *item_get(char *key, size_t nkey) +{ + item *it = NULL; + int vlen; + uint32_t flag; + void *value; + int ret = cdb_get(db, key, nkey, &value, &vlen); + flag = 0; + if (ret == 0){ + it = item_alloc1(key, nkey, flag, vlen + 2); + if (it){ + memcpy(ITEM_data(it), value, vlen); + memcpy(ITEM_data(it) + vlen, "\r\n", 2); + } + cdb_free_val(&value); + } + return it; + +} + +static item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes) +{ + uint8_t nsuffix; + item *it; + char suffix[40]; + size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); + + it = (item *)malloc(ntotal); + if (it == NULL){ + return NULL; + } + memset(it, 0, ntotal); + + it->nkey = nkey; + it->nbytes = nbytes; + strcpy(ITEM_key(it), key); + memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); + it->nsuffix = nsuffix; + return it; +} + + +static void stats_init(void) { + stats.curr_conns = stats.total_conns = stats.conn_structs = 0; + stats.get_cmds = stats.set_cmds = stats.delete_cmds = 0; + stats.slow_cmds = stats.get_hits = stats.get_misses = 0; + stats.bytes_read = stats.bytes_written = 0; + + /* make the time we started always be 2 seconds before we really + did, so time(0) - time.started is never zero. if so, things + like 'settings.oldest_live' which act as booleans as well as + values are now false in boolean context... */ + stats.started = time(0) - 2; +} + +static void stats_reset(void) { + STATS_LOCK(); + stats.total_conns = 0; + stats.get_cmds = stats.set_cmds = stats.delete_cmds = 0; + stats.slow_cmds = stats.get_hits = stats.get_misses = 0; + stats.bytes_read = stats.bytes_written = 0; + STATS_UNLOCK(); +} + +static void settings_init(void) { + settings.port = 8964; + /* By default this string should be NULL for getaddrinfo() */ + settings.inter = NULL; + settings.item_buf_size = 4 * 1024; /* default is 4KB */ + settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */ + settings.verbose = 0; + settings.num_threads = 16; + settings.flush_period = 1; // 1 secs + settings.slow_cmd_time = 0.1; // 100ms +} + +/* + * Adds a message header to a connection. + * + * Returns 0 on success, -1 on out-of-memory. + */ +static int add_msghdr(conn *c) +{ + struct msghdr *msg; + + assert(c != NULL); + + if (c->msgsize == c->msgused) { + msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr)); + if (! msg) + return -1; + c->msglist = msg; + c->msgsize *= 2; + } + + msg = c->msglist + c->msgused; + + /* this wipes msg_iovlen, msg_control, msg_controllen, and + msg_flags, the last 3 of which aren't defined on solaris: */ + memset(msg, 0, sizeof(struct msghdr)); + + msg->msg_iov = &c->iov[c->iovused]; + + c->msgbytes = 0; + c->msgused++; + + return 0; +} + + +/* + * Free list management for connections. + */ + +static conn **freeconns; +static int freetotal; +static int freecurr; + + +static void conn_init(void) { + freetotal = 200; + freecurr = 0; + if ((freeconns = (conn **)malloc(sizeof(conn *) * freetotal)) == NULL) { + fprintf(stderr, "malloc()\n"); + } + return; +} + +/* + * Returns a connection from the freelist, if any. Should call this using + * conn_from_freelist() for thread safety. + */ +conn *do_conn_from_freelist() { + conn *c; + + if (freecurr > 0) { + c = freeconns[--freecurr]; + } else { + c = NULL; + } + + return c; +} + +/* + * Adds a connection to the freelist. 0 = success. Should call this using + * conn_add_to_freelist() for thread safety. + */ +bool do_conn_add_to_freelist(conn *c) { + if (freecurr < freetotal) { + freeconns[freecurr++] = c; + return false; + } else { + /* try to enlarge free connections array */ + conn **new_freeconns = realloc(freeconns, sizeof(conn *) * freetotal * 2); + if (new_freeconns) { + freetotal *= 2; + freeconns = new_freeconns; + freeconns[freecurr++] = c; + return false; + } + } + return true; +} + +conn *conn_new(const int sfd, const int init_state, const int read_buffer_size) { + conn *c = conn_from_freelist(); + + if (NULL == c) { + if (!(c = (conn *)calloc(1, sizeof(conn)))) { + fprintf(stderr, "calloc()\n"); + return NULL; + } + c->rbuf = c->wbuf = 0; + c->ilist = 0; + c->iov = 0; + c->msglist = 0; + + c->rsize = read_buffer_size; + c->wsize = DATA_BUFFER_SIZE; + c->isize = ITEM_LIST_INITIAL; + c->iovsize = IOV_LIST_INITIAL; + c->msgsize = MSG_LIST_INITIAL; + + c->rbuf = (char *)malloc((size_t)c->rsize); + c->wbuf = (char *)malloc((size_t)c->wsize); + c->ilist = (item **)malloc(sizeof(item *) * c->isize); + c->iov = (struct iovec *)malloc(sizeof(struct iovec) * c->iovsize); + c->msglist = (struct msghdr *)malloc(sizeof(struct msghdr) * c->msgsize); + + if (c->rbuf == 0 || c->wbuf == 0 || c->ilist == 0 || c->iov == 0 || + c->msglist == 0) { + conn_free(c); + fprintf(stderr, "malloc()\n"); + return NULL; + } + + STATS_LOCK(); + stats.conn_structs++; + STATS_UNLOCK(); + } + + if (settings.verbose > 1) { + if (init_state == conn_listening) + fprintf(stderr, "<%d server listening\n", sfd); + else + fprintf(stderr, "<%d new client connection\n", sfd); + } + + c->sfd = sfd; + c->state = init_state; + c->rlbytes = 0; + c->rbytes = c->wbytes = 0; + c->wcurr = c->wbuf; + c->rcurr = c->rbuf; + c->ritem = 0; + c->icurr = c->ilist; + c->ileft = 0; + c->iovused = 0; + c->msgcurr = 0; + c->msgused = 0; + + c->write_and_go = conn_read; + c->write_and_free = 0; + c->item = 0; + c->noreply = false; + + update_event(c, AE_READABLE); + if (add_event(sfd, AE_READABLE, c) == -1) { + if (conn_add_to_freelist(c)) { + conn_free(c); + } + perror("event_add"); + return NULL; + } + + STATS_LOCK(); + stats.curr_conns++; + stats.total_conns++; + STATS_UNLOCK(); + + return c; +} + +static void conn_cleanup(conn *c) { + assert(c != NULL); + + if (c->item) { + item_free(c->item); + c->item = 0; + } + + if (c->ileft != 0) { + for (; c->ileft > 0; c->ileft--,c->icurr++) { + item_free(*(c->icurr)); + } + } + + if (c->write_and_free) { + free(c->write_and_free); + c->write_and_free = 0; + } +} + +/* + * Frees a connection. + */ +void conn_free(conn *c) { + if (c) { + if (c->msglist) + free(c->msglist); + if (c->rbuf) + free(c->rbuf); + if (c->wbuf) + free(c->wbuf); + if (c->ilist) + free(c->ilist); + if (c->iov) + free(c->iov); + free(c); + } +} + +static void conn_close(conn *c) { + assert(c != NULL); + + if (settings.verbose > 1) + fprintf(stderr, "<%d connection closed.\n", c->sfd); + + delete_event(c->sfd); + close(c->sfd); + c->sfd = -1; + update_event(c, 0); + conn_cleanup(c); + + /* if the connection has big buffers, just free it */ + if (c->rsize > READ_BUFFER_HIGHWAT || conn_add_to_freelist(c)) { + conn_free(c); + } + + STATS_LOCK(); + stats.curr_conns--; + STATS_UNLOCK(); + + return; +} + + +/* + * Shrinks a connection's buffers if they're too big. This prevents + * periodic large "get" requests from permanently chewing lots of server + * memory. + * + * This should only be called in between requests since it can wipe output + * buffers! + */ +static void conn_shrink(conn *c) { + assert(c != NULL); + + if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) { + char *newbuf; + + if (c->rcurr != c->rbuf) + memmove(c->rbuf, c->rcurr, (size_t)c->rbytes); + + newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE); + + if (newbuf) { + c->rbuf = newbuf; + c->rsize = DATA_BUFFER_SIZE; + } + /* TODO check other branch... */ + c->rcurr = c->rbuf; + } + + if (c->isize > ITEM_LIST_HIGHWAT) { + item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0])); + if (newbuf) { + c->ilist = newbuf; + c->isize = ITEM_LIST_INITIAL; + } + /* TODO check error condition? */ + } + + if (c->msgsize > MSG_LIST_HIGHWAT) { + struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0])); + if (newbuf) { + c->msglist = newbuf; + c->msgsize = MSG_LIST_INITIAL; + } + /* TODO check error condition? */ + } + + if (c->iovsize > IOV_LIST_HIGHWAT) { + struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0])); + if (newbuf) { + c->iov = newbuf; + c->iovsize = IOV_LIST_INITIAL; + } + /* TODO check return value */ + } +} + +/* + * Sets a connection's current state in the state machine. Any special + * processing that needs to happen on certain state transitions can + * happen here. + */ +static void conn_set_state(conn *c, int state) { + assert(c != NULL); + + if (state != c->state) { + if (state == conn_read) { + conn_shrink(c); + } + c->state = state; + } +} + + +/* + * Ensures that there is room for another struct iovec in a connection's + * iov list. + * + * Returns 0 on success, -1 on out-of-memory. + */ +static int ensure_iov_space(conn *c) { + assert(c != NULL); + + if (c->iovused >= c->iovsize) { + int i, iovnum; + struct iovec *new_iov = (struct iovec *)realloc(c->iov, + (c->iovsize * 2) * sizeof(struct iovec)); + if (! new_iov) + return -1; + c->iov = new_iov; + c->iovsize *= 2; + + /* Point all the msghdr structures at the new list. */ + for (i = 0, iovnum = 0; i < c->msgused; i++) { + c->msglist[i].msg_iov = &c->iov[iovnum]; + iovnum += c->msglist[i].msg_iovlen; + } + } + + return 0; +} + + +/* + * Adds data to the list of pending data that will be written out to a + * connection. + * + * Returns 0 on success, -1 on out-of-memory. + */ + +static int add_iov(conn *c, const void *buf, int len) { + struct msghdr *m; + int leftover; + bool limit_to_mtu; + + assert(c != NULL); + + do { + m = &c->msglist[c->msgused - 1]; + + /* + * Limit the first payloads of TCP replies, to + * MAX_PAYLOAD_SIZE bytes. + */ + limit_to_mtu = (1 == c->msgused); + + /* We may need to start a new msghdr if this one is full. */ + if (m->msg_iovlen == IOV_MAX || + (limit_to_mtu && c->msgbytes >= MAX_PAYLOAD_SIZE)) { + add_msghdr(c); + m = &c->msglist[c->msgused - 1]; + } + + if (ensure_iov_space(c) != 0) + return -1; + + /* If the fragment is too big to fit in the datagram, split it up */ + if (limit_to_mtu && len + c->msgbytes > MAX_PAYLOAD_SIZE) { + leftover = len + c->msgbytes - MAX_PAYLOAD_SIZE; + len -= leftover; + } else { + leftover = 0; + } + + m = &c->msglist[c->msgused - 1]; + m->msg_iov[m->msg_iovlen].iov_base = (void *)buf; + m->msg_iov[m->msg_iovlen].iov_len = len; + + c->msgbytes += len; + c->iovused++; + m->msg_iovlen++; + + buf = ((char *)buf) + len; + len = leftover; + } while (leftover > 0); + + return 0; +} + + +static void out_string(conn *c, const char *str) { + size_t len; + + assert(c != NULL); + + if (c->noreply) { + if (settings.verbose > 1) + fprintf(stderr, ">%d %s\n", c->sfd, str); + c->noreply = false; + conn_set_state(c, conn_read); + return; + } + + len = strlen(str); + if ((len + 2) > c->wsize) { + /* ought to be always enough. just fail for simplicity */ + str = "SERVER_ERROR output line too long"; + len = strlen(str); + } + + memcpy(c->wbuf, str, len); + memcpy(c->wbuf + len, "\r\n", 2); + c->wbytes = len + 2; + c->wcurr = c->wbuf; + + conn_set_state(c, conn_write); + c->write_and_go = conn_read; + return; +} + +/* + * we get here after reading the value in set/add/replace commands. The command + * has been stored in c->item_comm, and the item is ready in c->item. + */ + +static void complete_nread(conn *c) { + assert(c != NULL); + + item *it = c->item; + int comm = c->item_comm; + int ret; + + STATS_LOCK(); + stats.set_cmds++; + STATS_UNLOCK(); + + if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) != 0) { + out_string(c, "CLIENT_ERROR bad data chunk"); + } else { + ret = store_item(it, comm); + if (ret == 0) + out_string(c, "STORED"); + else if(ret == -2) + out_string(c, "EXISTS"); + else if(ret == -3) + out_string(c, "NOT_FOUND"); + else + out_string(c, "NOT_STORED"); + } + + item_free(c->item); + c->item = 0; +} + +/* + * Stores an item in the cache according to the semantics of one of the set + * commands. In threaded mode, this is protected by the cache lock. + * + * Returns true if the item was stored. + */ +int store_item(item *it, int comm) { + char *key = ITEM_key(it); + + switch (comm) { + case NREAD_SET: + return cdb_set2(db, key, it->nkey, ITEM_data(it), it->nbytes - 2, CDB_INSERTCACHE | CDB_OVERWRITE, it->expire); + case NREAD_ADD: + return cdb_set2(db, key, it->nkey, ITEM_data(it), it->nbytes - 2, CDB_INSERTCACHE | CDB_INSERTIFNOEXIST, it->expire); + case NREAD_REPLACE: + return cdb_set2(db, key, it->nkey, ITEM_data(it), it->nbytes - 2, CDB_INSERTCACHE | CDB_INSERTIFEXIST, it->expire); + } + return 0; +} + +/* + * adds a delta value to a numeric item. + */ +/* + +int add_delta(char *key, size_t nkey, int64_t delta, char *buf); +int add_delta(char* key, size_t nkey, int64_t delta, char *buf) { + uint64_t value = hs_incr(db, key, delta); + snprintf(buf, INCR_MAX_STORAGE_LEN, "%llu", (unsigned long long)value); + return 0; +} +*/ + +typedef struct token_s { + char *value; + size_t length; +} token_t; + +#define COMMAND_TOKEN 0 +#define SUBCOMMAND_TOKEN 1 +#define KEY_TOKEN 1 +#define KEY_MAX_LENGTH 250 + +#define MAX_TOKENS 8 + +/* + * Tokenize the command string by replacing whitespace with '\0' and update + * the token array tokens with pointer to start of each token and length. + * Returns total number of tokens. The last valid token is the terminal + * token (value points to the first unprocessed character of the string and + * length zero). + * + * Usage example: + * + * while(tokenize_command(command, ncommand, tokens, max_tokens) > 0) { + * for(int ix = 0; tokens[ix].length != 0; ix++) { + * ... + * } + * ncommand = tokens[ix].value - command; + * command = tokens[ix].value; + * } + */ +static size_t tokenize_command(char *command, token_t *tokens, const size_t max_tokens) { + char *s, *e; + size_t ntokens = 0; + + assert(command != NULL && tokens != NULL && max_tokens > 1); + + for (s = e = command; ntokens < max_tokens - 1; ++e) { + if (*e == ' ') { + if (s != e) { + tokens[ntokens].value = s; + tokens[ntokens].length = e - s; + ntokens++; + *e = '\0'; + } + s = e + 1; + } + else if (*e == '\0') { + if (s != e) { + tokens[ntokens].value = s; + tokens[ntokens].length = e - s; + ntokens++; + } + + break; /* string end */ + } + } + + /* + * If we scanned the whole string, the terminal value pointer is null, + * otherwise it is the first unprocessed character. + */ + tokens[ntokens].value = *e == '\0' ? NULL : e; + tokens[ntokens].length = 0; + ntokens++; + + return ntokens; +} + +static inline bool set_noreply_maybe(conn *c, token_t *tokens, size_t ntokens) +{ + int noreply_index = ntokens - 2; + + /* + NOTE: this function is not the first place where we are going to + send the reply. We could send it instead from process_command() + if the request line has wrong number of tokens. However parsing + malformed line for "noreply" option is not reliable anyway, so + it can't be helped. + */ + if (tokens[noreply_index].value + && strcmp(tokens[noreply_index].value, "noreply") == 0) { + c->noreply = true; + } + return c->noreply; +} + +static void process_stat(conn *c, token_t *tokens, const size_t ntokens) { + time_t now = time(0); + char *command; + char *subcommand; + + assert(c != NULL); + + if(ntokens < 2) { + out_string(c, "CLIENT_ERROR bad command line"); + return; + } + + command = tokens[COMMAND_TOKEN].value; + + if (ntokens == 2 && strcmp(command, "stats") == 0) { + char temp[1024]; + pid_t pid = getpid(); + uint64_t total = 0, curr = 0; + CDBSTAT db_stat; + cdb_stat(db, &db_stat); + total = db_stat.rnum; + char *pos = temp; + +#ifndef WIN32 + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); +#endif /* !WIN32 */ + + STATS_LOCK(); + pos += sprintf(pos, "STAT pid %ld\r\n", (long)pid); + pos += sprintf(pos, "STAT uptime %"PRIuS"\r\n", now - stats.started); + pos += sprintf(pos, "STAT time %"PRIuS"\r\n", now); + pos += sprintf(pos, "STAT version " VERSION "\r\n"); + pos += sprintf(pos, "STAT pointer_size %"PRIuS"\r\n", 8 * sizeof(void *)); +#ifndef WIN32 + pos += sprintf(pos, "STAT rusage_user %ld.%06ld\r\n", usage.ru_utime.tv_sec, usage.ru_utime.tv_usec); + pos += sprintf(pos, "STAT rusage_system %ld.%06ld\r\n", usage.ru_stime.tv_sec, usage.ru_stime.tv_usec); + pos += sprintf(pos, "STAT rusage_minflt %"PRIu64"\r\n", usage.ru_minflt); + pos += sprintf(pos, "STAT rusage_majflt %"PRIu64"\r\n", usage.ru_majflt); + pos += sprintf(pos, "STAT rusage_nswap %"PRIu64"\r\n", usage.ru_nswap); + pos += sprintf(pos, "STAT rusage_inblock %"PRIu64"\r\n", usage.ru_inblock); + pos += sprintf(pos, "STAT rusage_oublock %"PRIu64"\r\n", usage.ru_oublock); + pos += sprintf(pos, "STAT rusage_nvcsw %"PRIu64"\r\n", usage.ru_nvcsw); + pos += sprintf(pos, "STAT rusage_nivcsw %"PRIu64"\r\n", usage.ru_nivcsw); +#endif /* !WIN32 */ +#ifdef HAVE_READPROC + proc_t p; + get_proc_stats(getpid(), &p); + pos += sprintf(pos, "STAT rusage_maxrss %"PRIu64"\r\n", p.vm_rss); +#endif + pos += sprintf(pos, "STAT item_buf_size %"PRIuS"\r\n", settings.item_buf_size); + pos += sprintf(pos, "STAT curr_connections %"PRIu32"\r\n", stats.curr_conns - 1); /* ignore listening conn */ + pos += sprintf(pos, "STAT total_connections %"PRIu32"\r\n", stats.total_conns); + pos += sprintf(pos, "STAT connection_structures %"PRIu32"\r\n", stats.conn_structs); + pos += sprintf(pos, "STAT cmd_get %"PRIu64"\r\n", stats.get_cmds); + pos += sprintf(pos, "STAT cmd_set %"PRIu64"\r\n", stats.set_cmds); + pos += sprintf(pos, "STAT cmd_delete %"PRIu64"\r\n", stats.delete_cmds); + pos += sprintf(pos, "STAT slow_cmd %"PRIu64"\r\n", stats.slow_cmds); + pos += sprintf(pos, "STAT get_hits %"PRIu64"\r\n", stats.get_hits); + pos += sprintf(pos, "STAT get_misses %"PRIu64"\r\n", stats.get_misses); + pos += sprintf(pos, "STAT curr_items %"PRIu64"\r\n", curr); + pos += sprintf(pos, "STAT total_items %"PRIu64"\r\n", total); + pos += sprintf(pos, "STAT bytes_read %"PRIu64"\r\n", stats.bytes_read); + pos += sprintf(pos, "STAT bytes_written %"PRIu64"\r\n", stats.bytes_written); + pos += sprintf(pos, "STAT threads %d\r\n", settings.num_threads); + pos += sprintf(pos, "STAT records_in_cache %lu\r\n", db_stat.rcnum); + pos += sprintf(pos, "STAT pages_total %lu\r\n", db_stat.pnum); + pos += sprintf(pos, "STAT pages_in_cache %lu\r\n", db_stat.pcnum); + pos += sprintf(pos, "STAT record_cache_hits %lu\r\n", db_stat.rchit); + pos += sprintf(pos, "STAT record_cache_misses %lu\r\n", db_stat.rcmiss); + pos += sprintf(pos, "STAT page_cache_hits %lu\r\n", db_stat.pchit); + pos += sprintf(pos, "STAT page_cache_misses %lu\r\n", db_stat.pcmiss); + pos += sprintf(pos, "STAT read_latency_avg %u\r\n", db_stat.rlatcy); + pos += sprintf(pos, "STAT write_latency_avg %u\r\n", db_stat.wlatcy); + pos += sprintf(pos, "END"); + STATS_UNLOCK(); + out_string(c, temp); + return; + } + + subcommand = tokens[SUBCOMMAND_TOKEN].value; + + if (strcmp(subcommand, "reset") == 0) { + stats_reset(); + out_string(c, "RESET"); + return; + } + + out_string(c, "ERROR"); +} + +/* ntokens is overwritten here... shrug.. */ +static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens) { + char *key; + size_t nkey; + int i = 0; + item *it = NULL; + token_t *key_token = &tokens[KEY_TOKEN]; + int stats_get_cmds = 0; + int stats_get_hits = 0; + int stats_get_misses = 0; + assert(c != NULL); + + do { + while(key_token->length != 0) { + + key = key_token->value; + nkey = key_token->length; + + if(nkey > KEY_MAX_LENGTH) { + STATS_LOCK(); + stats.get_cmds += stats_get_cmds; + stats.get_hits += stats_get_hits; + stats.get_misses += stats_get_misses; + STATS_UNLOCK(); + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + stats_get_cmds++; + + it = item_get(key, nkey); + + if (it) { + if (i >= c->isize) { + item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2); + if (new_list) { + c->isize *= 2; + c->ilist = new_list; + } else { + item_free(it); + it = NULL; + break; + } + } + + /* + * Construct the response. Each hit adds three elements to the + * outgoing data list: + * "VALUE " + * key + * " " + flags + " " + data length + "\r\n" + data (with \r\n) + */ + + if (add_iov(c, "VALUE ", 6) != 0 || + add_iov(c, ITEM_key(it), it->nkey) != 0 || + add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0) + { + item_free(it); + it = NULL; + break; + } + + if (settings.verbose > 1) + fprintf(stderr, ">%d sending key %s\n", c->sfd, ITEM_key(it)); + + stats_get_hits++; + *(c->ilist + i) = it; + i++; + + } else { + stats_get_misses++; + } + + key_token++; + } + + /* + * If the command string hasn't been fully processed, get the next set + * of tokens. + */ + if(key_token->value != NULL) { + ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS); + key_token = tokens; + } + + } while(key_token->value != NULL); + + c->icurr = c->ilist; + c->ileft = i; + + if (settings.verbose > 1) + fprintf(stderr, ">%d END\n", c->sfd); + + /* + If the loop was terminated because of out-of-memory, it is not + reliable to add END\r\n to the buffer, because it might not end + in \r\n. So we send SERVER_ERROR instead. + */ + if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0) { + out_string(c, "SERVER_ERROR out of memory writing get response"); + } + else { + conn_set_state(c, conn_mwrite); + c->msgcurr = 0; + } + + STATS_LOCK(); + stats.get_cmds += stats_get_cmds; + stats.get_hits += stats_get_hits; + stats.get_misses += stats_get_misses; + STATS_UNLOCK(); + + return; +} + +static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, int comm) { + char *key; + size_t nkey; + int flags; + time_t exptime; + int vlen; + item *it = NULL; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + flags = strtoul(tokens[2].value, NULL, 10); + exptime = strtol(tokens[3].value, NULL, 10); + vlen = strtol(tokens[4].value, NULL, 10); + + if(errno == ERANGE || ((flags == 0 || exptime == 0) && errno == EINVAL) + || vlen < 0) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + it = item_alloc1(key, nkey, flags, vlen+2); + it->expire = exptime; + it->flag = flags; + + if (it == NULL) { + out_string(c, "SERVER_ERROR out of memory storing object"); + /* swallow the data line */ + c->write_and_go = conn_swallow; + c->sbytes = vlen + 2; + return; + } + + c->item = it; + c->ritem = ITEM_data(it); + c->rlbytes = it->nbytes; + c->item_comm = comm; + conn_set_state(c, conn_nread); +} + +bool safe_strtoull(const char *str, uint64_t *out) { + assert(out != NULL); + errno = 0; + *out = 0; + char *endptr; + unsigned long long ull = strtoull(str, &endptr, 10); + if (errno == ERANGE) + return false; + if (isspace(*endptr) || (*endptr == '\0' && endptr != str)) { + *out = ull; + return true; + } + return false; +} + +/* + + +static void process_arithmetic_command(conn *c, token_t *tokens, const size_t ntokens, const bool incr) { + char temp[INCR_MAX_STORAGE_LEN]; + uint64_t delta; + char *key; + size_t nkey; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + STATS_LOCK(); + stats.set_cmds++; + STATS_UNLOCK(); + + if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + + if (!safe_strtoull(tokens[2].value, &delta)) { + out_string(c, "CLIENT_ERROR invalid numeric delta argument"); + return; + } + + switch(add_delta(key, nkey, delta, temp)) { + case 0: + out_string(c, temp); + break; +// case NON_NUMERIC: +// out_string(c, "CLIENT_ERROR cannot increment or decrement non-numeric value"); +// break; +// case EOM: +// out_string(c, "SERVER_ERROR out of memory"); +// break; + } +} +*/ + + +static void process_delete_command(conn *c, token_t *tokens, const size_t ntokens) { + char *key; + size_t nkey; + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + STATS_LOCK(); + stats.delete_cmds++; + STATS_UNLOCK(); + + key = tokens[KEY_TOKEN].value; + nkey = tokens[KEY_TOKEN].length; + if(nkey > KEY_MAX_LENGTH) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + + switch (cdb_del(db, key, nkey)) { + case 0: + out_string(c, "DELETED"); + break; + case -3: + out_string(c, "NOT_FOUND"); + break; +// case -1: +// out_string(c, "SERVER_ERROR while delete a item"); +// break; +// default: +// out_string(c, "SERVER_ERROR nothing to do"); + } + return; +} + +static void process_verbosity_command(conn *c, token_t *tokens, const size_t ntokens) { + unsigned int level; + + assert(c != NULL); + + set_noreply_maybe(c, tokens, ntokens); + + level = strtoul(tokens[1].value, NULL, 10); + if(errno == ERANGE) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + settings.verbose = level > MAX_VERBOSITY_LEVEL ? MAX_VERBOSITY_LEVEL : level; + out_string(c, "OK"); + return; +} + +static void process_command(conn *c, char *command) { + + token_t tokens[MAX_TOKENS]; + size_t ntokens; + int comm; + struct timespec start, end; + + assert(c != NULL); + + if (settings.verbose > 1) + fprintf(stderr, "<%d %s\n", c->sfd, command); + + /* + * for commands set/add/replace, we build an item and read the data + * directly into it, then continue in nread_complete(). + */ + + c->msgcurr = 0; + c->msgused = 0; + c->iovused = 0; + if (add_msghdr(c) != 0) { + out_string(c, "SERVER_ERROR out of memory preparing response"); + return; + } + + clock_gettime(CLOCK_MONOTONIC, &start); + + ntokens = tokenize_command(command, tokens, MAX_TOKENS); + if (ntokens >= 3 && + (strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ) { + + process_get_command(c, tokens, ntokens); + + } else if ((ntokens == 6 || ntokens == 7) && + ((strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = NREAD_SET)) || + (strcmp(tokens[COMMAND_TOKEN].value, "add") == 0 && (comm = NREAD_ADD)) || + (strcmp(tokens[COMMAND_TOKEN].value, "replace") == 0 && (comm = NREAD_REPLACE)))) { + + process_update_command(c, tokens, ntokens, comm); + +// } else if ((ntokens == 4 || ntokens == 5) && (strcmp(tokens[COMMAND_TOKEN].value, "incr") == 0)) { + +// process_arithmetic_command(c, tokens, ntokens, 1); + + } else if (ntokens >= 3 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "delete") == 0)) { + + process_delete_command(c, tokens, ntokens); + + } else if (ntokens >= 2 && (strcmp(tokens[COMMAND_TOKEN].value, "stats") == 0)) { + + process_stat(c, tokens, ntokens); + + } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "version") == 0)) { + + out_string(c, "VERSION " VERSION); + + } else if (ntokens == 2 && (strcmp(tokens[COMMAND_TOKEN].value, "quit") == 0)) { + + conn_set_state(c, conn_closing); + + } else if (ntokens == 3 && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) { + + process_verbosity_command(c, tokens, ntokens); + +/* } else if (ntokens >= 2 && ntokens <= 4 && (strcmp(tokens[COMMAND_TOKEN].value, "flush_all") == 0)) { + + set_noreply_maybe(c, tokens, ntokens); + + int limit = 10000; + if (ntokens == (c->noreply ? 4 : 3)) { + limit = strtol(tokens[1].value, NULL, 10); + if(errno == ERANGE) { + out_string(c, "CLIENT_ERROR bad command line format"); + return; + } + } + + hs_optimize(db, limit); + out_string(c, "OK"); + return; +*/ + } else { + out_string(c, "ERROR"); + return; + } + + clock_gettime(CLOCK_MONOTONIC, &end); + float secs = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / 1e9; + if (secs > settings.slow_cmd_time) { + STATS_LOCK(); + stats.slow_cmds ++; + STATS_UNLOCK(); + } + + // access logging + if (NULL != access_log && ntokens >= 3) { + char now[255]; + time_t t = time(NULL); + strftime(now, 200, "%Y-%m-%d %H:%M:%S", localtime(&t)); + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + getpeername(c->sfd, (struct sockaddr*)&addr, &addrlen); + char host[NI_MAXHOST], serv[NI_MAXSERV]; + getnameinfo((struct sockaddr*)&addr, addrlen, host, sizeof(host), serv, sizeof(serv), + NI_NUMERICSERV); + fprintf(access_log, "%s %s:%s %s %s %.3f\n", now, host, serv, + command, tokens[1].value, secs*1000); + } + + return; +} + +/* + * if we have a complete line in the buffer, process it. + */ +static int try_read_command(conn *c) { + char *el, *cont; + + assert(c != NULL); + assert(c->rcurr <= (c->rbuf + c->rsize)); + + if (c->rbytes == 0) + return 0; + el = memchr(c->rcurr, '\n', c->rbytes); + if (!el) + return 0; + cont = el + 1; + if ((el - c->rcurr) > 1 && *(el - 1) == '\r') { + el--; + } + *el = '\0'; + + assert(cont <= (c->rcurr + c->rbytes)); + + process_command(c, c->rcurr); + + c->rbytes -= (cont - c->rcurr); + c->rcurr = cont; + + assert(c->rcurr <= (c->rbuf + c->rsize)); + + return 1; +} + +/* + * read from network as much as we can, handle buffer overflow and connection + * close. + * before reading, move the remaining incomplete fragment of a command + * (if any) to the beginning of the buffer. + * return 0 if there's nothing to read on the first read. + */ +static int try_read_network(conn *c) { + int gotdata = 0; + int res; + + assert(c != NULL); + + if (c->rcurr != c->rbuf) { + if (c->rbytes != 0) /* otherwise there's nothing to copy */ + memmove(c->rbuf, c->rcurr, c->rbytes); + c->rcurr = c->rbuf; + } + + while (1) { + if (c->rbytes >= c->rsize) { + char *new_rbuf = realloc(c->rbuf, c->rsize * 2); + if (!new_rbuf) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't realloc input buffer\n"); + c->rbytes = 0; /* ignore what we read */ + out_string(c, "SERVER_ERROR out of memory reading request"); + c->write_and_go = conn_closing; + return 1; + } + c->rcurr = c->rbuf = new_rbuf; + c->rsize *= 2; + } + + + int avail = c->rsize - c->rbytes; + res = read(c->sfd, c->rbuf + c->rbytes, avail); + if (res > 0) { + STATS_LOCK(); + stats.bytes_read += res; + STATS_UNLOCK(); + gotdata = 1; + c->rbytes += res; + if (res == avail) { + continue; + } else { + break; + } + } + if (res == 0) { + /* connection closed */ + conn_set_state(c, conn_closing); + return 1; + } + if (res == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) break; + /* Should close on unhandled errors. */ + conn_set_state(c, conn_closing); + return 1; + } + } + return gotdata; +} + +static bool update_event(conn *c, const int new_flags) { + c->ev_flags = new_flags; + return true; +} + +/* + * Transmit the next chunk of data from our list of msgbuf structures. + * + * Returns: + * TRANSMIT_COMPLETE All done writing. + * TRANSMIT_INCOMPLETE More data remaining to write. + * TRANSMIT_SOFT_ERROR Can't write any more right now. + * TRANSMIT_HARD_ERROR Can't write (c->state is set to conn_closing) + */ +static int transmit(conn *c) { + assert(c != NULL); + + if (c->msgcurr < c->msgused && + c->msglist[c->msgcurr].msg_iovlen == 0) { + /* Finished writing the current msg; advance to the next. */ + c->msgcurr++; + } + if (c->msgcurr < c->msgused) { + ssize_t res; + struct msghdr *m = &c->msglist[c->msgcurr]; + + res = sendmsg(c->sfd, m, 0); + if (res > 0) { + STATS_LOCK(); + stats.bytes_written += res; + STATS_UNLOCK(); + + /* We've written some of the data. Remove the completed + iovec entries from the list of pending writes. */ + while (m->msg_iovlen > 0 && res >= m->msg_iov->iov_len) { + res -= m->msg_iov->iov_len; + m->msg_iovlen--; + m->msg_iov++; + } + + /* Might have written just part of the last iovec entry; + adjust it so the next write will do the rest. */ + if (res > 0) { + m->msg_iov->iov_base += res; + m->msg_iov->iov_len -= res; + } + return TRANSMIT_INCOMPLETE; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + update_event(c, AE_WRITABLE); + return TRANSMIT_SOFT_ERROR; + } + /* if res==0 or res==-1 and error is not EAGAIN or EWOULDBLOCK, + we have a real error, on which we close the connection */ + if (settings.verbose > 0) + perror("Failed to write, and not due to blocking"); + + conn_set_state(c, conn_closing); + return TRANSMIT_HARD_ERROR; + } else { + return TRANSMIT_COMPLETE; + } +} + +void drive_machine(conn *c) { + bool stop = false; + int sfd, flags = 1; + socklen_t addrlen; + struct sockaddr_storage addr; + int res; + + assert(c != NULL); + + while (!stop) { + + switch(c->state) { + case conn_listening: + addrlen = sizeof(addr); + if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) == -1) { + stop = true; + if (errno == EAGAIN || errno == EWOULDBLOCK) { + /* these are transient, so don't log anything */ + } else if (errno == EMFILE) { + if (settings.verbose > 0) + fprintf(stderr, "Too many open connections\n"); + if (stub_fd > 0){ + close(stub_fd); + if ((sfd = accept(c->sfd, (struct sockaddr *)&addr, &addrlen)) != -1) { + close(sfd); + stub_fd = open("/dev/null", O_RDONLY); + stop = false; + }else{ + if (settings.verbose > 0) + fprintf(stderr, "Too many open connections 2\n"); + } + } + } else { + perror("accept()"); + } + if (stop) break; + } + if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 || + fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) { + perror("setting O_NONBLOCK"); + close(sfd); + break; + } + if (NULL == conn_new(sfd, conn_read, DATA_BUFFER_SIZE)) { + if (settings.verbose > 0) { + fprintf(stderr, "Can't listen for events on fd %d\n", sfd); + } + close(sfd); + } + break; + + case conn_read: + if (try_read_command(c) != 0) { + continue; + } + if (try_read_network(c) != 0) { + continue; + } + /* we have no command line and no data to read from network */ + update_event(c, AE_READABLE); + stop = true; + break; + + case conn_nread: + /* we are reading rlbytes into ritem; */ + if (c->rlbytes == 0) { + complete_nread(c); + break; + } + /* first check if we have leftovers in the conn_read buffer */ + if (c->rbytes > 0) { + int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes; + memcpy(c->ritem, c->rcurr, tocopy); + c->ritem += tocopy; + c->rlbytes -= tocopy; + c->rcurr += tocopy; + c->rbytes -= tocopy; + break; + } + + /* now try reading from the socket */ + res = read(c->sfd, c->ritem, c->rlbytes); + if (res > 0) { + STATS_LOCK(); + stats.bytes_read += res; + STATS_UNLOCK(); + c->ritem += res; + c->rlbytes -= res; + break; + } + if (res == 0) { /* end of stream */ + conn_set_state(c, conn_closing); + break; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + update_event(c, AE_READABLE); + stop = true; + break; + } + /* otherwise we have a real error, on which we close the connection */ + if (settings.verbose > 0) + fprintf(stderr, "Failed to read, and not due to blocking\n"); + conn_set_state(c, conn_closing); + break; + + case conn_swallow: + /* we are reading sbytes and throwing them away */ + if (c->sbytes == 0) { + conn_set_state(c, conn_read); + break; + } + + /* first check if we have leftovers in the conn_read buffer */ + if (c->rbytes > 0) { + int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes; + c->sbytes -= tocopy; + c->rcurr += tocopy; + c->rbytes -= tocopy; + break; + } + + /* now try reading from the socket */ + res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize); + if (res > 0) { + STATS_LOCK(); + stats.bytes_read += res; + STATS_UNLOCK(); + c->sbytes -= res; + break; + } + if (res == 0) { /* end of stream */ + conn_set_state(c, conn_closing); + break; + } + if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + update_event(c, AE_READABLE); + stop = true; + break; + } + /* otherwise we have a real error, on which we close the connection */ + if (settings.verbose > 0) + fprintf(stderr, "Failed to read, and not due to blocking\n"); + conn_set_state(c, conn_closing); + break; + + case conn_write: + /* + * We want to write out a simple response. If we haven't already, + * assemble it into a msgbuf list (this will be a single-entry + * list for TCP or a two-entry list for UDP). + */ + if (c->iovused == 0) { + if (add_iov(c, c->wcurr, c->wbytes) != 0) { + if (settings.verbose > 0) + fprintf(stderr, "Couldn't build response\n"); + conn_set_state(c, conn_closing); + break; + } + } + + /* fall through... */ + + case conn_mwrite: + switch (transmit(c)) { + case TRANSMIT_COMPLETE: + if (c->state == conn_mwrite) { + while (c->ileft > 0) { + item *it = *(c->icurr); + item_free(it); + c->icurr++; + c->ileft--; + } + conn_set_state(c, conn_read); + } else if (c->state == conn_write) { + if (c->write_and_free) { + free(c->write_and_free); + c->write_and_free = 0; + } + conn_set_state(c, c->write_and_go); + } else { + if (settings.verbose > 0) + fprintf(stderr, "Unexpected state %d\n", c->state); + conn_set_state(c, conn_closing); + } + break; + + case TRANSMIT_INCOMPLETE: + case TRANSMIT_HARD_ERROR: + break; /* Continue in state machine. */ + + case TRANSMIT_SOFT_ERROR: + stop = true; + break; + } + break; + + case conn_closing: + conn_close(c); + stop = true; + break; + } + } + + return; +} + +static int new_socket(struct addrinfo *ai) { + int sfd; + int flags; + + if ((sfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) == -1) { + perror("socket()"); + return -1; + } + + if ((flags = fcntl(sfd, F_GETFL, 0)) < 0 || + fcntl(sfd, F_SETFL, flags | O_NONBLOCK) < 0) { + perror("setting O_NONBLOCK"); + close(sfd); + return -1; + } + return sfd; +} + +static int server_socket(const int port, const bool is_udp) { + int sfd; + struct linger ling = {0, 0}; + struct addrinfo *ai; + struct addrinfo *next; + struct addrinfo hints; + char port_buf[NI_MAXSERV]; + int error; + int success = 0; + + int flags =1; + + /* + * the memset call clears nonstandard fields in some impementations + * that otherwise mess things up. + */ + memset(&hints, 0, sizeof (hints)); + hints.ai_flags = AI_PASSIVE|AI_ADDRCONFIG; + hints.ai_family = AF_UNSPEC; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_socktype = SOCK_STREAM; + + snprintf(port_buf, NI_MAXSERV, "%d", port); + error= getaddrinfo(settings.inter, port_buf, &hints, &ai); + if (error != 0) { + if (error != EAI_SYSTEM) + fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error)); + else + perror("getaddrinfo()"); + + return 1; + } + + for (next= ai; next; next= next->ai_next) { + conn *listen_conn_add; + if ((sfd = new_socket(next)) == -1) { + freeaddrinfo(ai); + return 1; + } + + setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags)); + setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags)); + setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling)); + setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags)); + + if (bind(sfd, next->ai_addr, next->ai_addrlen) == -1) { + if (errno != EADDRINUSE) { + perror("bind()"); + close(sfd); + freeaddrinfo(ai); + return 1; + } + close(sfd); + continue; + } else { + success++; + if (listen(sfd, 1024) == -1) { + perror("listen()"); + close(sfd); + freeaddrinfo(ai); + return 1; + } + } + + if (!(listen_conn_add = conn_new(sfd, conn_listening, 1))) { + fprintf(stderr, "failed to create listening connection\n"); + exit(EXIT_FAILURE); + } + } + + freeaddrinfo(ai); + + /* Return zero iff we detected no errors in starting up connections */ + return success == 0; +} + +static void usage(void) { + printf(PACKAGE " " VERSION "\n"); + printf("-p <num> TCP port number to listen on (default: 8964)\n" + "-l <ip_addr> interface to listen on, default is INDRR_ANY\n" + "-d run as a daemon\n" + "-P <num> page cache limit(MB), default 256(MB)\n" + "-r <num> record cache limit(MB), default 256(MB)\n" + "-R <num> bytes for a disk read operation, must be between[1024,65535), recommend to be larger than most small records, default is 4096(Bytes)\n" + "-L <file> log file\n" + "-u <username> assume identity of <username> (only when run as root)\n" + "-c <num> max simultaneous connections, default is 1024\n" + "-t <num> number of threads to use, default 16\n" + "-H <dir> home of database, default is 'testdb', keep sure the directory exists\n" + "-s <num> slow command time limit, in ms, default is 100ms\n" + "-n main hash table size, recommend to be 1%% - 10%% of maximum record num, default is 1000000\n" + "-v verbose (print errors/warnings while in event loop)\n" + "-vv very verbose (also print client commands/reponses)\n" + "-h print this help and exit\n" + "-i print license info\n" + ); + + return; +} + +static void usage_license(void) { + printf(PACKAGE " " VERSION "\n\n"); + printf( + "Copyright (c) 2012, Siyuan Fu. <fusiyuan2010@gmail.com>\n" + "All rights reserved.\n" + "\n" + "\n" + "This product includes software developed by Douban Inc.\n" + "\n" + "[ Beansdb ]\n" + "\n" + "Copyright (c) 2009, Douban Inc. <http://www.douban.com/>\n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions are\n" + "met:\n" + "\n" + " * Redistributions of source code must retain the above copyright\n" + "notice, this list of conditions and the following disclaimer.\n" + "\n" + " * Redistributions in binary form must reproduce the above\n" + "copyright notice, this list of conditions and the following disclaimer\n" + "in the documentation and/or other materials provided with the\n" + "distribution.\n" + "\n" + " * Neither the name of the Douban Inc. nor the names of its\n" + "contributors may be used to endorse or promote products derived from\n" + "this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n" + "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n" + "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n" + "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n" + "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n" + "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n" + "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n" + "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + "\n" + "\n" + "This product includes software developed by Douban Inc.\n" + "\n" + "[ memcachedb ]\n" + "\n" + "Copyright (c) 2008, Steve Chu. <stvchu@gmail.com>\n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions are\n" + "met:\n" + "\n" + " * Redistributions of source code must retain the above copyright\n" + "notice, this list of conditions and the following disclaimer.\n" + "\n" + " * Redistributions in binary form must reproduce the above\n" + "copyright notice, this list of conditions and the following disclaimer\n" + "in the documentation and/or other materials provided with the\n" + "distribution.\n" + "\n" + " * Neither the name of the Danga Interactive nor the names of its\n" + "contributors may be used to endorse or promote products derived from\n" + "this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n" + "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n" + "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n" + "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n" + "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n" + "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n" + "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n" + "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + "\n" + "\n" + "This product includes software developed by Danga Interactive, Inc.\n" + "\n" + "[ memcached ]\n" + "\n" + "Copyright (c) 2003, Danga Interactive, Inc. <http://www.danga.com/>\n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions are\n" + "met:\n" + "\n" + " * Redistributions of source code must retain the above copyright\n" + "notice, this list of conditions and the following disclaimer.\n" + "\n" + " * Redistributions in binary form must reproduce the above\n" + "copyright notice, this list of conditions and the following disclaimer\n" + "in the documentation and/or other materials provided with the\n" + "distribution.\n" + "\n" + " * Neither the name of the Danga Interactive nor the names of its\n" + "contributors may be used to endorse or promote products derived from\n" + "this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n" + "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n" + "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n" + "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n" + "OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n" + "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n" + "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n" + "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + "\n" + "\n" + "This product includes software developed by Niels Provos.\n" + "\n" + "[ libevent ]\n" + "\n" + "Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>\n" + "All rights reserved.\n" + "\n" + "Redistribution and use in source and binary forms, with or without\n" + "modification, are permitted provided that the following conditions\n" + "are met:\n" + "1. Redistributions of source code must retain the above copyright\n" + " notice, this list of conditions and the following disclaimer.\n" + "2. Redistributions in binary form must reproduce the above copyright\n" + " notice, this list of conditions and the following disclaimer in the\n" + " documentation and/or other materials provided with the distribution.\n" + "3. All advertising materials mentioning features or use of this software\n" + " must display the following acknowledgement:\n" + " This product includes software developed by Niels Provos.\n" + "4. The name of the author may not be used to endorse or promote products\n" + " derived from this software without specific prior written permission.\n" + "\n" + "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n" + "IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n" + "OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n" + "IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n" + "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\n" + "NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n" + "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n" + "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n" + "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\n" + "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n" + ); + + return; +} + + +/* for safely exit, make sure to do checkpoint*/ +static void sig_handler(const int sig) +{ + if (sig != SIGTERM && sig != SIGQUIT && sig != SIGINT) { + return; + } + if (daemon_quit == 1){ + return; + } + daemon_quit = 1; + fprintf(stderr, "Signal(%d) received, try to exit daemon gracefully..\n", sig); +} + + +int main (int argc, char **argv) { + int c; + //struct in_addr addr; + char *dbhome = "testdb"; + bool daemonize = false; + char *username = NULL; + FILE *log_file = NULL; + struct passwd *pw; + struct sigaction sa; + struct rlimit rlim; + int rcache = 256, pcache = 256; + /* recommend for 100,000,000 records*/ + int db_hsize = 1000000; + int areadsize = 4096; + + /* init settings */ + settings_init(); + + /* set stderr non-buffering (for running under, say, daemontools) */ + setbuf(stderr, NULL); + + /* process arguments */ + while ((c = getopt(argc, argv, "a:p:c:hivl:dr:u:P:L:t:b:H:s:n:R:")) != -1) { + switch (c) { + case 'a': + if (strcmp(optarg, "-") == 0) { + access_log = stdout; + }else{ + access_log = fopen(optarg, "a"); + if (NULL == access_log) { + fprintf(stderr, "open access_log %s failed\n", optarg); + exit(1); + } + } + break; + case 'p': + settings.port = atoi(optarg); + break; + case 'c': + settings.maxconns = atoi(optarg); + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + case 'i': + usage_license(); + exit(EXIT_SUCCESS); + case 'v': + settings.verbose++; + break; + case 'l': + settings.inter= strdup(optarg); + break; + case 'd': + daemonize = true; + break; + case 'r': + rcache = atoi(optarg); + break; + case 'R': + areadsize = atoi(optarg); + break; + case 'u': + username = optarg; + break; + case 'P': + pcache = atoi(optarg); + break; + case 'L': + if ((log_file = fopen(optarg, "a")) != NULL){ + setlinebuf(log_file); + fclose(stdout); + fclose(stderr); + stdout = stderr = log_file; + }else{ + fprintf(stderr, "open log file %s failed\n", optarg); + } + break; + case 't': + settings.num_threads = atoi(optarg); + if (settings.num_threads == 0) { + fprintf(stderr, "Number of threads must be greater than 0\n"); + exit(EXIT_FAILURE); + } + break; + case 'b': + settings.item_buf_size = atoi(optarg); + if(settings.item_buf_size < 512){ + fprintf(stderr, "item buf size must be larger than 512 bytes\n"); + exit(EXIT_FAILURE); + } + if(settings.item_buf_size > 256 * 1024){ + fprintf(stderr, "Warning: item buffer size(-b) larger than 256KB may cause performance issue\n"); + } + break; + case 'H': + dbhome = optarg; + break; + case 's': + settings.slow_cmd_time = atoi(optarg) / 1000.0; + break; + case 'n': + db_hsize = atoi(optarg); + break; + default: + fprintf(stderr, "Illegal argument \"%c\"\n", c); + exit(EXIT_FAILURE); + } + } + + /* + * If needed, increase rlimits to allow as many connections + * as needed. + */ + + if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) { + fprintf(stderr, "failed to getrlimit number of files\n"); + exit(EXIT_FAILURE); + } else { + int maxfiles = settings.maxconns; + if (rlim.rlim_cur < maxfiles) + rlim.rlim_cur = maxfiles + 3; + if (rlim.rlim_max < rlim.rlim_cur) + rlim.rlim_max = rlim.rlim_cur; + if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) { + fprintf(stderr, "failed to set rlimit for open files. Try running as root or requesting smaller maxconns value.\n"); + exit(EXIT_FAILURE); + } + } + + /* daemonize if requested */ + /* if we want to ensure our ability to dump core, don't chdir to / */ + if (daemonize) { + int res; + res = daemon(1, settings.verbose || log_file); + if (res == -1) { + fprintf(stderr, "failed to daemon() in order to daemonize\n"); + return 1; + } + } + + /* lose root privileges if we have them */ + if (getuid() == 0 || geteuid() == 0) { + if (username == 0 || *username == '\0') { + fprintf(stderr, "can't run as root without the -u switch\n"); + return 1; + } + if ((pw = getpwnam(username)) == 0) { + fprintf(stderr, "can't find the user %s to switch to\n", username); + return 1; + } + if (setgid(pw->pw_gid) < 0 || setuid(pw->pw_uid) < 0) { + fprintf(stderr, "failed to assume identity of user %s\n", username); + return 1; + } + } + + /* initialize other stuff */ + item_init(); + stats_init(); + conn_init(); + + /* + * ignore SIGPIPE signals; we can use errno==EPIPE if we + * need that information + */ + sa.sa_handler = SIG_IGN; + sa.sa_flags = 0; + if (sigemptyset(&sa.sa_mask) == -1 || + sigaction(SIGPIPE, &sa, 0) == -1) { + perror("failed to ignore SIGPIPE; sigaction"); + exit(EXIT_FAILURE); + } + + /* open db */ + db = cdb_new(); + cdb_option(db, db_hsize, rcache, pcache); + cdb_option_areadsize(db, areadsize); + + if (cdb_open(db, dbhome, CDB_CREAT | CDB_PAGEWARMUP) < 0) { + fprintf(stderr, "failed to open db %s\n", dbhome); + exit(1); + } + + if ((stub_fd = open("/dev/null", O_RDONLY)) == -1) { + perror("open stub file failed"); + exit(1); + } + thread_init(settings.num_threads); + + /* create the listening socket, bind it, and init */ + if (server_socket(settings.port, false)) { + fprintf(stderr, "failed to listen\n"); + exit(EXIT_FAILURE); + } + + /* register signal callback */ + if (signal(SIGTERM, sig_handler) == SIG_ERR) + fprintf(stderr, "can not catch SIGTERM\n"); + if (signal(SIGQUIT, sig_handler) == SIG_ERR) + fprintf(stderr, "can not catch SIGQUIT\n"); + if (signal(SIGINT, sig_handler) == SIG_ERR) + fprintf(stderr, "can not catch SIGINT\n"); + + /* enter the event loop */ + printf("all ready.\n"); + loop_run(settings.num_threads); + + /* wait other thread to ends */ + fprintf(stderr, "waiting for close ... \n"); + cdb_destroy(db); + fprintf(stderr, "done.\n"); + + if (log_file) { + fclose(log_file); + } + + return 0; +} + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.h b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.h new file mode 100644 index 0000000000000000000000000000000000000000..90cc9b6271683c058dfacd007fad43ffef239c55 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb-server.h @@ -0,0 +1,270 @@ +/* + * Beansdb - A high available distributed key-value storage system: + * + * http://beansdb.googlecode.com + * + * The source code of Beansdb is most based on Memcachedb and Memcached: + * + * http://memcachedb.org/ + * http://danga.com/memcached/ + * + * Copyright 2009 Douban Inc. All rights reserved. + * + * Use and distribution licensed under the BSD license. See + * the LICENSE file for full text. + * + * Authors: + * Davies Liu <davies.liu@gmail.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <netdb.h> + +#define DATA_BUFFER_SIZE 2048 +#define MAX_PAYLOAD_SIZE 1400 +#define MAX_SENDBUF_SIZE (256 * 1024 * 1024) +/* I'm told the max legnth of a 64-bit num converted to string is 20 bytes. + * Plus a few for spaces, \r\n, \0 */ +#define SUFFIX_SIZE 24 +#define INCR_MAX_STORAGE_LEN 24 + +/** Initial size of list of items being returned by "get". */ +#define ITEM_LIST_INITIAL 200 + +/** Initial size of the sendmsg() scatter/gather array. */ +#define IOV_LIST_INITIAL 400 + +/** Initial number of sendmsg() argument structures to allocate. */ +#define MSG_LIST_INITIAL 10 + +/** High water marks for buffer shrinking */ +#define READ_BUFFER_HIGHWAT 8192 +#define ITEM_LIST_HIGHWAT 400 +#define IOV_LIST_HIGHWAT 600 +#define MSG_LIST_HIGHWAT 100 + +#define MAX_REP_PRIORITY 1000000 +#define MAX_REP_ACK_POLICY 6 +#define MAX_REP_NSITES 1000 + + +#define RGET_MAX_ITEMS 100 +#define PACKAGE "CuttDB" +#define VERSION "0.1.0" + +/* Get a consistent bool type */ +#include <stdbool.h> + +#if HAVE_STDINT_H +# include <stdint.h> +#else + typedef unsigned char uint8_t; +#endif + +/* unistd.h is here */ +#if HAVE_UNISTD_H +# include <unistd.h> +#endif + +/* 64-bit Portable printf */ +/* printf macros for size_t, in the style of inttypes.h */ +#ifdef _LP64 +#define __PRIS_PREFIX "z" +#else +#define __PRIS_PREFIX +#endif + +#define AE_SETSIZE (1024*60) /* Max number of fd supported */ + +#define AE_OK 0 +#define AE_ERR -1 + +#define AE_NONE 0 +#define AE_READABLE 1 +#define AE_WRITABLE 2 + +/* Use these macros after a % in a printf format string + to get correct 32/64 bit behavior, like this: + size_t size = records.size(); + printf("%"PRIuS"\n", size); */ + +#define PRIdS __PRIS_PREFIX "d" +#define PRIxS __PRIS_PREFIX "x" +#define PRIuS __PRIS_PREFIX "u" +#define PRIXS __PRIS_PREFIX "X" +#define PRIoS __PRIS_PREFIX "o" + +struct stats { + uint32_t curr_conns; + uint32_t total_conns; + uint32_t conn_structs; + uint64_t get_cmds; + uint64_t set_cmds; + uint64_t delete_cmds; + uint64_t slow_cmds; + uint64_t get_hits; + uint64_t get_misses; + time_t started; /* when the process was started */ + uint64_t bytes_read; + uint64_t bytes_written; +}; + +#define MAX_VERBOSITY_LEVEL 2 + +struct settings { + size_t item_buf_size; + int maxconns; + int port; + char *inter; + int verbose; + float slow_cmd_time; + int flush_period; + int flush_limit; + int num_threads; /* number of libevent threads to run */ +}; + +extern struct stats stats; +extern struct settings settings; + +typedef struct _stritem { + int expire; /* expire time */ + uint32_t flag; /* flag of item */ + int nbytes; /* size of data */ + uint8_t nsuffix; /* length of flags-and-length string */ + uint8_t nkey; /* key length, w/terminating null and padding */ + void * end[]; + /* then null-terminated key */ + /* then " flags length\r\n" (no terminating null) */ + /* then data with terminating \r\n (no terminating null; it's binary!) */ +} item; + +#define ITEM_key(item) ((char*)&((item)->end[0])) + +/* warning: don't use these macros with a function, as it evals its arg twice */ +#define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1) +#define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix) +#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes) + +enum conn_states { + conn_listening, /** the socket which listens for connections */ + conn_read, /** reading in a command line */ + conn_write, /** writing out a simple response */ + conn_nread, /** reading in a fixed number of bytes */ + conn_swallow, /** swallowing unnecessary bytes w/o storing */ + conn_closing, /** closing this connection */ + conn_mwrite, /** writing out many items sequentially */ +}; + +#define NREAD_ADD 1 +#define NREAD_SET 2 +#define NREAD_REPLACE 3 +#define NREAD_APPEND 4 +#define NREAD_PREPEND 5 + +typedef struct conn conn; +struct conn { + int sfd; + int state; + short ev_flags; + + char *rbuf; /** buffer to read commands into */ + char *rcurr; /** but if we parsed some already, this is where we stopped */ + int rsize; /** total allocated size of rbuf */ + int rbytes; /** how much data, starting from rcur, do we have unparsed */ + + char *wbuf; + char *wcurr; + int wsize; + int wbytes; + int write_and_go; /** which state to go into after finishing current write */ + void *write_and_free; /** free this memory after finishing writing */ + bool noreply; /* True if the reply should not be sent. */ + + char *ritem; /** when we read in an item's value, it goes here */ + int rlbytes; + + /* data for the nread state */ + + /** + * item is used to hold an item structure created after reading the command + * line of set/add/replace commands, but before we finished reading the actual + * data. The data is read into ITEM_data(item) to avoid extra copying. + */ + + void *item; /* for commands set/add/replace */ + int item_comm; /* which one is it: set/add/replace */ + + /* data for the swallow state */ + int sbytes; /* how many bytes to swallow */ + + /* data for the mwrite state */ + struct iovec *iov; + int iovsize; /* number of elements allocated in iov[] */ + int iovused; /* number of elements used in iov[] */ + + struct msghdr *msglist; + int msgsize; /* number of elements allocated in msglist[] */ + int msgused; /* number of elements used in msglist[] */ + int msgcurr; /* element in msglist[] being transmitted now */ + int msgbytes; /* number of bytes in current msg */ + + item **ilist; /* list of items to write out */ + int isize; + item **icurr; + int ileft; + + conn *next; /* Used for generating a list of conn structures */ +}; + +/* + * Functions + */ + +/* item management */ +/* +void item_init(void); +item *do_item_from_freelist(void); +int do_item_add_to_freelist(item *it); +item *item_alloc1(char *key, const size_t nkey, const int flags, const int nbytes); +int item_free(item *it); +item *item_get(char *key, size_t nkey); +*/ + +/* conn management */ +conn *do_conn_from_freelist(); +bool do_conn_add_to_freelist(conn *c); +conn *conn_new(const int sfd, const int init_state, const int read_buffer_size); + +int store_item(item *item, int comm); + +void thread_init(int nthreads); +int add_event(int fd, int mask, conn *c); +void loop_run(int nthreads); + +void drive_machine(conn *c); + +/* Lock wrappers for cache functions that are called from main loop. */ +conn *mt_conn_from_freelist(void); +bool mt_conn_add_to_freelist(conn *c); +item *mt_item_from_freelist(void); +int mt_item_add_to_freelist(item *it); +void mt_stats_lock(void); +void mt_stats_unlock(void); + +#define conn_from_freelist() mt_conn_from_freelist() +#define conn_add_to_freelist(x) mt_conn_add_to_freelist(x) +#define item_from_freelist() mt_item_from_freelist() +#define item_add_to_freelist(x) mt_item_add_to_freelist(x) +#define STATS_LOCK() mt_stats_lock() +#define STATS_UNLOCK() mt_stats_unlock() + +extern int daemon_quit; + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.c b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.c new file mode 100644 index 0000000000000000000000000000000000000000..74e342623a5308fd4275868263eaac87c1c726e7 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.c @@ -0,0 +1,21 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "cuttdb.h" +#include "cdb_types.h" +#include "cdb_vio.h" + + +/* nothing here */ diff --git a/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.h b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.h new file mode 100644 index 0000000000000000000000000000000000000000..1814028f51f0ebd8300adc9e2eb34ae684e7218e --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/cuttdb.h @@ -0,0 +1,217 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _CUTTDB_H_ +#define _CUTTDB_H_ +#include <stdint.h> +#include <stdbool.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct CDB CDB; +typedef void (*CDB_ERRCALLBACK)(void *, int, const char *, int); +typedef bool (*CDB_ITERCALLBACK)(void *, const char *, int, const char *, int, uint32_t, uint64_t); + +/* performance statistical information of an database instance */ +typedef struct { + /* number of records in db */ + uint64_t rnum; + /* number of records in cache */ + uint64_t rcnum; + /* number of pages in db */ + uint64_t pnum; + /* number of pages in cache */ + uint64_t pcnum; + /* cache hit of record cache */ + uint64_t rchit; + /* cache miss of record cache */ + uint64_t rcmiss; + /* cache hit of page cache */ + uint64_t pchit; + /* cache miss of page cache */ + uint64_t pcmiss; + /* average disk read latency */ + uint32_t rlatcy; + /* average disk write latency */ + uint32_t wlatcy; +} CDBSTAT; + +/* options to open a database*/ +enum { + /* create an database if not exist */ + CDB_CREAT = 0x1, + /* clean the database if already exist */ + CDB_TRUNC = 0x2, + /* fill the cache when start up */ + CDB_PAGEWARMUP = 0x4, +}; + +/* error codes */ +enum { + CDB_SUCCESS = 0, + CDB_NOTFOUND, + CDB_EXIST, + CDB_DIRNOEXIST, + CDB_OPENERR, + CDB_PIDEXIST, + CDB_DATAERRDAT, + CDB_DATAERRIDX, + CDB_WRITEERR, + CDB_READERR, + CDB_NOFID, + CDB_INTERNALERR, + CDB_DATAERRMETA, + CDB_MEMDBNOCACHE, +}; + +/* record insertion options */ +enum { + CDB_OVERWRITE = 0, + CDB_INSERTIFEXIST = 0x1, + CDB_INSERTIFNOEXIST = 0x2, + CDB_INSERTCACHE = 0x8, +}; + +/* if database path is CDB_MEMDB, records are never written to disk, they stay in cache only */ +#define CDB_MEMDB ":memory:" + +/* + WARNING: + + the library has auxiliary thread, which means do fork() after open a database will cause + unpredictable situation. +*/ + +/* create an cuttdb object, which should be freed by cdb_destory() */ +CDB *cdb_new(); + +/* cdb_option() must be called before cdb_open() + + the second parameter 'hsize' indicates the size of main hash table, which can't be + modified after the database be created. To get better performance, it is suggest to + set the 'hsize' to 10% - 1% of the total number of records. The default value 1 million + should be proper for about 100 million records. Too large or small of the value would + lead to drop in speed or waste of memory + + the third parameter 'rcacheMB' indicates the size limit of record cache (measured by + MegaBytes), every record in cache would have about 40 bytes overhead. + + the fourth parameter 'pcacheMB' indicates the size limit of index page cache (measured + by MegaBytes). If a record is not in record cache, it will be read by only 1 disk seek + with enough page cache, or it have to make an extra disk seek to load the page. + cuttdb will use about {10 * number of records} bytes to cache all index pages, which + ensures fastest 'set' operation. + + the default parameter is (_db, 1000000, 128, 1024) + + return 0 if success, or -1 at failure. */ +int cdb_option(CDB *db, int hsize, int rcacheMB, int pcacheMB); + +/* Enable bloomfilter, size should be the estimated number of records in database + must be called before cdb_open(), + The value is 100000 at minimum. Memory cost of bloomfilter is size/8 bytes */ +void cdb_option_bloomfilter(CDB *db, uint64_t size); + +/* this is an advanced parameter. It is the size for cuttdb making a read from disk. + CuttDB do not know the record size even if the index is in memory, + so at least a read with default size will performed while in cdb_get(). + The value is recommended to be larger than the size of most records in database, + unless the records are mostly larger than tens of KB. + If the value is much larger than recommended, it will be a waste of computing. + The value can only be 65536 at maximum, 1024 at minimum */ +void cdb_option_areadsize(CDB *db, uint32_t size); + +/* open an database, 'file' should be an existing directory, or CDB_MEMDB for temporary store, + 'mode' should be combination of CDB_CREAT / CDB_TRUNC / CDB_PAGEWARMUP + CDB_PAGEWARMUP means to warm up page cache while opening + If there is a file called 'force_recovery' in the data directory, even if it might be made by 'touch force_recovery', + a force recovery will happen to rebuild the index (be aware that some deleted records would reappear after this) + */ +int cdb_open(CDB *db, const char *file, int mode); + + +/* simplified cdb_set2, insert a record with CDB_OVERWRITE and never expire */ +int cdb_set(CDB *db, const char *key, int ksize, const char *val, int vsize); + +/* set a record by 'key' and 'value', + opt could be bit combination of CDB_INSERTCACHE and one in {CDB_INSERTIFEXIST, CDB_INSERTNOEXIST, + CDB_OVERWRITE} + expire is the time for the record be valid, measured by second. 0 means never expire. + return 0 if success, or -1 at failure. */ +int cdb_set2(CDB *db, const char *key, int ksize, const char *val, int vsize, int opt, int expire); + + +int cdb_is(CDB *db, const char *key, int ksize); + +/* get an record by 'key', the value will be allocated and passed out by 'val', its size is + 'vsize'. return 0 if success, or -1 at failure. */ +int cdb_get(CDB *db, const char *key, int ksize, void **val, int *vsize); + + +/* the val got by cdb_get should be freed by this for safety. + If there is more than one memory allocator */ +void cdb_free_val(void **val); + + +/* delete an record by 'key'. However ,the space of the record would not be recycled. + 'vsize'. return 0 if success, or -1 at failure. */ +int cdb_del(CDB *db, const char *key, int ksize); + + +/* create a new iterator begins at given operation id */ +void *cdb_iterate_new(CDB *db, uint64_t oid); + +/* iterate through the database with a callback, the function would stop if callback returned false + The callback should accept key, ksize, value, vsize, expire time, oid + Returns the number of records have been visited */ +uint64_t cdb_iterate(CDB *db, CDB_ITERCALLBACK itcb, void *arg, void *iter); + +/* destroy the iterator */ +void cdb_iterate_destroy(CDB *db, void *iter); + +/* get the current statistic information of db. 'stat' should be the struct already allocated. + if 'stat' is NULL, the statistic will be reset to zero. */ +void cdb_stat(CDB *db, CDBSTAT *stat); + + +/* close the database. IT MUST BE CALLED BEFORE PROGRAM EXITS TO ENSURE DATA COMPLETION */ +int cdb_close(CDB *db); + + +/* close the database if it opened, and free the object */ +int cdb_destroy(CDB *db); + + +/* get last error number in current thread */ +int cdb_errno(CDB *db); + + +/* get the description of an error number */ +const char *cdb_errmsg(int ecode); + + +/* set callback when error happened, 'cdb_deferrorcb' is optional, which shows the error to stderr */ +void cdb_seterrcb(CDB *db, CDB_ERRCALLBACK errcb, void *arg); + +/* a possible error callback */ +void cdb_deferrorcb(void *arg, int errno, const char *file, int line); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/libdap-chain-global-db/libdap-cuttdb/src/mman.c b/libdap-chain-global-db/libdap-cuttdb/src/mman.c new file mode 100644 index 0000000000000000000000000000000000000000..ea5d358adc7768acd227507d7ac9e7853823729d --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/mman.c @@ -0,0 +1,172 @@ +/* + * mman-win32 library + * https://code.google.com/p/mman-win32/ + * reinterpreted by Konstantin Papizh <konstantin.papizh@demlabs.net> + * DeM Labs Inc. https://demlabs.net + */ + +#include <windows.h> +#include <errno.h> +#include <stdio.h> +#include "mman.h" + +static DWORD __map_mmap_prot_page(const int prot) { + DWORD protect = 0; + + if (prot == PROT_NONE) + return protect; + + if ((prot & PROT_EXEC) != 0) { + protect = ((prot & PROT_WRITE) != 0) ? + PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ; + } else { + protect = ((prot & PROT_WRITE) != 0) ? + PAGE_READWRITE : PAGE_READONLY; + } + return protect; +} + +static DWORD __map_mmap_prot_file(const int prot) { + + DWORD desiredAccess = 0; + if (prot == PROT_NONE) + return desiredAccess; + + if ((prot & PROT_READ) != 0) + desiredAccess |= FILE_MAP_READ; + if ((prot & PROT_WRITE) != 0) + desiredAccess |= FILE_MAP_WRITE; + if ((prot & PROT_EXEC) != 0) + desiredAccess |= FILE_MAP_EXECUTE; + + return desiredAccess; +} + +void* mmap(void *addr, size_t len, int prot, int flags, int fildes, offset_t off) +{ + HANDLE fm, h; + void *map = MAP_FAILED; + + const DWORD dwFileOffsetLow = (sizeof(offset_t) <= sizeof(DWORD)) ? + (DWORD)off : (DWORD)(off & 0xFFFFFFFFL); + const DWORD dwFileOffsetHigh = (sizeof(offset_t) <= sizeof(DWORD)) ? + (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFF00000000L); + const DWORD protect = __map_mmap_prot_page(prot); + const DWORD desiredAccess = __map_mmap_prot_file(prot); + + const offset_t maxSize = off + (offset_t)len; + + const DWORD dwMaxSizeLow = (sizeof(offset_t) <= sizeof(DWORD)) ? + (DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL); + const DWORD dwMaxSizeHigh = (sizeof(offset_t) <= sizeof(DWORD)) ? + (DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFF00000000L); + _set_errno(0); + + if (len == 0 || prot == PROT_EXEC) { + _set_errno(EINVAL); + return MAP_FAILED; + } + + h = ((flags & MAP_ANONYMOUS) == 0) ? + (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE; + + if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) { + _set_errno(EBADF); + return MAP_FAILED; + } + + fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL); + + if (fm == NULL) { + int a = errno; + _set_errno(GetLastError()); + a = errno; + printf("%d", a); + return MAP_FAILED; + } + + if ((flags & MAP_FIXED) == 0) { + map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len); + } + else { + map = MapViewOfFileEx(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len, addr); + } + CloseHandle(fm); + + if (map == NULL) { + _set_errno(GetLastError()); + return MAP_FAILED; + } + return map; +} + +int munmap(void *addr, size_t len) { + if (UnmapViewOfFile(addr)) + return 0; + + _set_errno(GetLastError()); + return -1; +} + +int _mprotect(void *addr, size_t len, int prot) { + DWORD newProtect = __map_mmap_prot_page(prot); + DWORD oldProtect = 0; + + if (VirtualProtect(addr, len, newProtect, &oldProtect)) + return 0; + _set_errno(GetLastError()); + return -1; +} + +int msync(void *addr, size_t len, int flags) { + if (FlushViewOfFile(addr, len)) + return 0; + _set_errno(GetLastError()); + return -1; +} + +int mlock(const void *addr, size_t len) { + if (VirtualLock((LPVOID)addr, len)) + return 0; + _set_errno(GetLastError()); + return -1; +} + +int munlock(const void *addr, size_t len) { + if (VirtualUnlock((LPVOID)addr, len)) + return 0; + _set_errno(GetLastError()); + return -1; +} + +ssize_t pread(int fd, void *buf, unsigned long count, offset_t offset) { + unsigned long len = 0; + + OVERLAPPED overlapped; + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 32); + overlapped.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + HANDLE file = (HANDLE)_get_osfhandle(fd); + if ((!ReadFile(file, buf, count, &len, &overlapped)) && GetLastError() != ERROR_HANDLE_EOF) { + _set_errno(GetLastError()); + return -1; + } + return len; +} + +ssize_t pwrite(int fd, const void *buf, unsigned long count, offset_t offset) { + long unsigned int len = 0; + + OVERLAPPED overlapped; + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 32); + overlapped.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + HANDLE file = (HANDLE)_get_osfhandle(fd); + if (!WriteFile(file, buf, count, &len, &overlapped)) { + _set_errno(GetLastError()); + return -1; + } + return len; +} diff --git a/libdap-chain-global-db/libdap-cuttdb/src/mman.h b/libdap-chain-global-db/libdap-cuttdb/src/mman.h new file mode 100644 index 0000000000000000000000000000000000000000..7367f3d4cfa4a37ff423e6202cfb4d79d6058c8e --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/mman.h @@ -0,0 +1,55 @@ +#ifndef _MMAN_H_ +#define _MMAN_H_ + +#include <_mingw.h> +#include <stdint.h> +#include <io.h> + +#if defined(_WIN64) +typedef int64_t offset_t; +#else +typedef uint32_t offset_t; +#endif + +#include <sys/types.h> +#include <stdbool.h> +#ifdef __cplusplus +extern "C" { +#endif + +#define PROT_NONE 0 +#define PROT_READ 1 +#define PROT_WRITE 2 +#define PROT_EXEC 4 + +#define MAP_FILE 0 +#define MAP_SHARED 1 +#define MAP_PRIVATE 2 +#define MAP_TYPE 0xf +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS + +#define MAP_FAILED ((void *)-1) + +#define MS_ASYNC 1 +#define MS_SYNC 2 +#define MS_INVALIDATE 4 + +#define fdatasync(fd) _commit(fd) + +void* mmap(void *addr, size_t len, int prot, int flags, int fildes, offset_t offset); +int munmap(void *addr, size_t len); +int _mprotect(void *addr, size_t len, int prot); +int msync(void *addr, size_t len, int flags); +int mlock(const void *addr, size_t len); +int munlock(const void *addr, size_t len); + +ssize_t pread(int fd, void *buf, unsigned long count, offset_t offset); +ssize_t pwrite(int fd, const void *buf, unsigned long count, offset_t offset); + +#ifdef __cplusplus +} +#endif + +#endif /* _MMAN_H_ */ diff --git a/libdap-chain-global-db/libdap-cuttdb/src/server-thread.c b/libdap-chain-global-db/libdap-cuttdb/src/server-thread.c new file mode 100644 index 0000000000000000000000000000000000000000..c7a05c30319e63a177178ab43a84bc7b5435fb11 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/server-thread.c @@ -0,0 +1,217 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * The server&network part of CuttDB is based on Beansdb: + * + * http://beansdb.googlecode.com + * + * Beansdb is most based on Memcachedb and Memcached: + * + * http://memcachedb.org/ + * http://danga.com/memcached/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + +#include "cuttdb-server.h" +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <errno.h> +#include <assert.h> +#include <string.h> +#include <unistd.h> + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#include <pthread.h> + +typedef struct EventLoop { +// int maxfd; + conn* conns[AE_SETSIZE]; + int fired[AE_SETSIZE]; + int nready; + void *apidata; +} EventLoop; + +/* Lock for connection freelist */ +static pthread_mutex_t conn_lock; + +/* Lock for item buffer freelist */ +static pthread_mutex_t ibuffer_lock; + +static EventLoop loop; +static pthread_mutex_t leader; + +/* + * Pulls a conn structure from the freelist, if one is available. + */ +conn *mt_conn_from_freelist() { + conn *c; + pthread_mutex_lock(&conn_lock); + c = do_conn_from_freelist(); + pthread_mutex_unlock(&conn_lock); + return c; +} + +/* + * Adds a conn structure to the freelist. + * + * Returns 0 on success, 1 if the structure couldn't be added. + */ +bool mt_conn_add_to_freelist(conn *c) { + bool result; + + pthread_mutex_lock(&conn_lock); + result = do_conn_add_to_freelist(c); + pthread_mutex_unlock(&conn_lock); + + return result; +} + + +/******************************* GLOBAL STATS ******************************/ + +void mt_stats_lock() { +} + +void mt_stats_unlock() { +} + +/* Include the best multiplexing layer supported by this system. + * The following should be ordered by performances, descending. */ +#ifdef HAVE_EPOLL +#include "ae_epoll.c" +#else + #ifdef HAVE_KQUEUE + #include "ae_kqueue.c" + #else + #include "ae_select.c" + #endif +#endif + +/* + * Initializes the thread subsystem, creating various worker threads. + * + * nthreads Number of event handler threads to spawn + */ +void thread_init(int nthreads) { + pthread_mutex_init(&ibuffer_lock, NULL); + pthread_mutex_init(&conn_lock, NULL); + pthread_mutex_init(&leader, NULL); + + memset(&loop, 0, sizeof(loop)); + if (aeApiCreate(&loop) == -1) { + exit(1); + } +} + +int add_event(int fd, int mask, conn *c) +{ + if (fd >= AE_SETSIZE) { + fprintf(stderr, "fd is too large: %d\n", fd); + return AE_ERR; + } + assert(loop.conns[fd] == NULL); + loop.conns[fd] = c; + if (aeApiAddEvent(&loop, fd, mask) == -1){ + loop.conns[fd] = NULL; + return AE_ERR; + } +// if (fd > loop.maxfd) +// loop.maxfd = fd; + return AE_OK; +} + +int update_event(int fd, int mask, conn *c) +{ + loop.conns[fd] = c; + if (aeApiUpdateEvent(&loop, fd, mask) == -1){ + loop.conns[fd] = NULL; + return AE_ERR; + } + return AE_OK; +} + +int delete_event(int fd) +{ + if (fd >= AE_SETSIZE) return -1; + if (loop.conns[fd] == NULL) return 0; + if (aeApiDelEvent(&loop, fd) == -1) + return -1; + loop.conns[fd] = NULL; + return 0; +} + +static void *worker_main(void *arg) { + pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, 0); + + struct timeval tv = {1, 0}; + while (!daemon_quit) { + pthread_mutex_lock(&leader); + +AGAIN: + while(loop.nready == 0 && daemon_quit == 0) + loop.nready = aeApiPoll(&loop, &tv); + if (daemon_quit) { + pthread_mutex_unlock(&leader); + break; + } + + loop.nready --; + int fd = loop.fired[loop.nready]; + conn *c = loop.conns[fd]; + if (c == NULL){ + fprintf(stderr, "Bug: conn %d should not be NULL\n", fd); + close(fd); + goto AGAIN; + } + loop.conns[fd] = NULL; + pthread_mutex_unlock(&leader); + + drive_machine(c); + if (c->ev_flags > 0) { + update_event(fd, c->ev_flags, c); + } + } + return NULL; +} + +void loop_run(int nthread) +{ + int i, ret; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_t* tids = malloc(sizeof(pthread_t) * nthread); + + for (i=0; i<nthread - 1; i++) { + if ((ret = pthread_create(tids + i, &attr, worker_main, NULL)) != 0) { + fprintf(stderr, "Can't create thread: %s\n", + strerror(ret)); + exit(1); + } + } + + worker_main(NULL); + + // wait workers to stop + for (i=0; i<nthread - 1; i++) { + (void) pthread_join(tids[i], NULL); + } + free(tids); +} + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/test_mt.c b/libdap-chain-global-db/libdap-cuttdb/src/test_mt.c new file mode 100644 index 0000000000000000000000000000000000000000..de4d383731a4a66bc2e690f40dfc742ec022073e --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/test_mt.c @@ -0,0 +1,149 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> +#include "cuttdb.h" + + +CDB *db; + +enum { + SETOP, + GETOP, + DELOP, +}; + +#if 1 +static int prob_table1[8] = {SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, DELOP, GETOP}; +static int prob_table2[8] = {SETOP, SETOP, SETOP, SETOP, SETOP, DELOP, DELOP, GETOP}; +static int prob_table3[8] = {SETOP, SETOP, SETOP, DELOP, DELOP, DELOP, DELOP, GETOP}; +#else +static int prob_table1[8] = {SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, GETOP}; +static int prob_table2[8] = {SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, GETOP}; +static int prob_table3[8] = {SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, SETOP, GETOP}; +#endif +int *optable = NULL; + + +long get_rand() +{ + return (long)rand() * RAND_MAX + rand(); +} + + +void *test_thread(void *arg) +{ + char key[64]; + char value[128]; + void *v; + int knum = *(int*)arg; + while(1) { + int krand = get_rand() % knum; + int ksize = snprintf(key, 64, "%ld%ld%ld", krand, krand, krand); + int vsize = snprintf(value, 128, "%ld%ld%ld%ld%d%ld%ld%ld%ld", + krand, krand, krand, krand, krand, krand, krand, krand); + int op = optable[rand() & 0x07]; + int expire = 600 + 20 * (rand() % 1000); + switch(op) { + case SETOP: + if (cdb_set2(db, key, ksize, value, vsize, CDB_OVERWRITE | CDB_INSERTCACHE, expire) < 0) + printf("ERROR! %s:%d\n", __FILE__, __LINE__); + break; + case GETOP: + if (cdb_get(db, key, ksize, &v, &vsize) == -1) + printf("ERROR! %s:%d\n", __FILE__, __LINE__); + if (v) + cdb_free_val(&v); + break; + case DELOP: + if (cdb_del(db, key, ksize) == -1) + printf("ERROR! %s:%d\n", __FILE__, __LINE__); + break; + default: + break; + } + } +} + + + +int main(int argc, char *argv[]) +{ + int thread_num = 2; + int record_num = 10000000; + char *db_path = NULL; + printf("Usage: %s db_path [record_num] [thread_num]\n", argv[0]); + if (argc >= 2) + db_path = argv[1]; + else + return -1; + + if (argc >= 3) + record_num = atoi(argv[2]); + if (argc >= 4) + thread_num = atoi(argv[3]); + + record_num = record_num < 100? 100: record_num; + thread_num = thread_num < 1? 1: thread_num; + srand(time(NULL)); + + db = cdb_new(); + cdb_option(db, record_num / 100, 0, 1024000); + if (cdb_open(db, db_path, CDB_CREAT | CDB_TRUNC) < 0) { + printf("DB Open err\n"); + return -1; + } + + + optable = prob_table1; + pthread_t threads[thread_num]; + for(int i = 0; i < thread_num; i++) { + pthread_create(&threads[i], NULL, test_thread, &record_num); + } + + int clear_interval = 0; + while(1) { + CDBSTAT st; + cdb_stat(db, &st); + printf("rnum: %lu, rcnum: %lu, pnum: %lu, pcnum %lu, rlatcy: %u wlatcy: %u" + " rh/m: %lu/%lu ph/m: %lu/%lu\n", + st.rnum, st.rcnum, st.pnum, st.pcnum, st.rlatcy, st.wlatcy, + st.rchit, st.rcmiss, st.pchit, st.pcmiss); + if (++clear_interval % 20 == 0) + cdb_stat(db, NULL); + + if (st.rnum > 0.7 * record_num) + optable = prob_table2; + if (st.rnum > 0.9 * record_num) + optable = prob_table3; + + if (st.rnum < 0.8 * record_num) + optable = prob_table2; + + if (st.rnum < 0.6 * record_num) + optable = prob_table1; + fflush(stdout); + sleep(1); + } + + return 0; +} + + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.c b/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.c new file mode 100644 index 0000000000000000000000000000000000000000..3f093a6fb55150cc0c7ac927f042a9cf0fc404aa --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.c @@ -0,0 +1,2647 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#include "vio_apnd2.h" +#include "cdb_hashtable.h" +#include "cdb_bgtask.h" +#include "cdb_lock.h" +#include "cuttdb.h" +#include "cdb_core.h" +#include "cdb_errno.h" +#include "cdb_types.h" +#include "cdb_crc64.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/stat.h> +#ifdef _WIN32 +#include "mman.h" +#else +#include <sys/mman.h> +#endif +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> + +/* record magic bytes */ +#define RECMAGIC 0x19871022 +/* obsoleted, but appeared in some code */ +#define DELRECMAGIC 0x19871023 +#define PAGEMAGIC 0x19890604 + +/* data buffered before pwrite to disk */ +#define IOBUFSIZE (2 * MB) +/* structure of deletion buffer differs from the others, buffered DELBUFMAX records at most */ +#define DELBUFMAX 10000 + +/* index(page) file size limit */ +#define FIDXMAXSIZE (16 * MB) +/* data file size limit */ +#define FDATMAXSIZE (128 * MB) +/* all meta information are regulated to fix size */ +#define FILEMETASIZE 64 +/* the file opened simultaneously limit, managed by LRU */ +#define MAXFD 16384 +#define MAX_PATH_LEN 255 + +#define FILEMAGICHEADER "CuTtDbFiLePaRtIaL" +#define FILEMAGICLEN (strlen(FILEMAGICHEADER)) +/* page or data records are stored at aligned offset */ +#define ALIGNBYTES 16 + +/* virtual offset(48bits) transform into real offset(fid,offset) */ +#define VOFF2ROFF(off, fid, roff) do{fid = (off).i4 >> 8; \ + roff = ((off).i4 & 0xff) << 16; roff = (roff | (off).i2) * ALIGNBYTES;}while(0) + +/* real offset transform into virtual offset */ +#define ROFF2VOFF(fid, roff, off) do{(off).i4 = fid << 8; \ + (off).i4 |= (roff / ALIGNBYTES) >> 16; (off).i2 = (roff / ALIGNBYTES) & 0xffff;} while(0) + +/* align to a integer offset */ +#define OFFALIGNED(off) ((((off)-1) | (ALIGNBYTES - 1)) + 1) + +/* used in fd LRU-cached, distinguish index or data files' fd */ +#define VFIDIDX(fid) (fid * 2) +#define VFIDDAT(fid) (fid * 2 + 1) + +/* how often write out buffered data */ +#define FLUSHTIMEOUT 5 +/* how often to check if index file needs space recycle */ +#define RCYLEPAGEINTERVAL 60 +/* how often to check if data file needs space recycle */ +#define RCYLEDATAINTERVAL 120 +/* data file space recycle check interval factor (seconds per data file/128MB)*/ +#define DATARCYLECHECKFACTOR 1800 + + +/* three type of file */ +enum { + /* random value */ + VIOAPND2_INDEX = 0x97, + VIOAPND2_DATA = 0x98, + VIOAPND2_DELLOG = 0x99, +}; + + +/* where the record comes from when calling writerec */ +enum { + VIOAPND2_RECEXTERNAL = 0, + VIOAPND2_RECINTERNAL = 1, +}; + + +/* a file is writing or full? */ +enum { + VIOAPND2_WRITING = 0, + VIOAPND2_FULL = 1, +}; + +/* signature in the header file, indicates it's open or be safety closed */ +enum { + /* any number doens't matter */ + VIOAPND2_SIGOPEN = 2, + VIOAPND2_SIGCLOSED = 3, +}; + + +/* buffer for IO */ +typedef struct { + uint32_t limit; + uint32_t off; + uint32_t pos; + uint32_t fid; + uint64_t oid; + int fd; + char buf[IOBUFSIZE]; +} VIOAPND2IOBUF; + + +/* file information for every file */ +typedef struct VIOAPND2FINFO { + /* fid */ + uint32_t fid; + /* first oid */ + uint64_t oidf; + /* last oid */ + uint64_t oidl; + + /* next file */ + struct VIOAPND2FINFO *fnext; + /* prev file */ + struct VIOAPND2FINFO *fprev; + + uint32_t fsize; + /* junk space */ + uint32_t rcyled; + /* nearest expire time */ + uint32_t nexpire; + /* last time for recycle check */ + uint32_t lcktime; + /* index page file or data file? */ + uint8_t ftype; + /* writing or full? */ + uint8_t fstatus; + /* ref count, avoid unlink failure */ + uint32_t ref; + /* whether unlink the file after dereference */ + bool unlink; +} VIOAPND2FINFO; + + +typedef struct { + /* a new db? */ + bool create; + /* fd number limit */ + int maxfds; + /* opened files' fds cache */ + CDBHASHTABLE *fdcache; + + /* number of data file */ + uint32_t dfnum; + /* number of index file */ + uint32_t ifnum; + + /* Buffers */ + VIOAPND2IOBUF dbuf; + VIOAPND2IOBUF ibuf; + FOFF delbuf[DELBUFMAX]; + int delbufpos; + + /* db path */ + char *filepath; + + + /* file information of index files */ + CDBHASHTABLE *idxmeta; + VIOAPND2FINFO *idxfhead; + VIOAPND2FINFO *idxftail; + /* file information of data files */ + CDBHASHTABLE *datmeta; + VIOAPND2FINFO *datfhead; + VIOAPND2FINFO *datftail; + + /* fd for db header */ + int hfd; + /* fd for files meta header */ + int mfd; + /* fd for deletion log */ + int dfd; + + /* lock for all I/O operation */ + CDBLOCK *lock; + + int idxitfid; + uint32_t idxitoff; + char *idxmmap; + +} VIOAPND2; + + +/* iterator for index/data */ +typedef struct { + /* current open fd */ + int fd; + /* current offset in file*/ + uint32_t off; + /* current operation id */ + uint64_t oid; + /* current file size*/ + uint64_t fsize; + /* mapped of file */ + char *mmap; + /* reference of filemeta struct */ + VIOAPND2FINFO *finfo; +} VIOAPND2ITOR; + + +static int _vio_apnd2_open(CDBVIO *vio, const char *filepath, int flags); +static int _vio_apnd2_checkpid(CDBVIO *vio); +static int _vio_apnd2_write(CDBVIO *vio, int fd, void *buf, uint32_t size, bool aligned); +static int _vio_apnd2_read(CDBVIO *vio, int fd, void *buf, uint32_t size, uint64_t off); +static int _vio_apnd2_readmeta(CDBVIO *vio, bool overwrite); +static int _vio_apnd2_writemeta(CDBVIO *vio); +static int _vio_apnd2_close(CDBVIO *vio); +static int _vio_apnd2_writerec(CDBVIO *vio, CDBREC *rec, FOFF *off, int ptrtype); +static int _vio_apnd2_writerecexternal(CDBVIO *vio, CDBREC *rec, FOFF *off); +static int _vio_apnd2_writerecinternal(CDBVIO *vio, CDBREC *rec, FOFF *off); +static int _vio_apnd2_deleterec(CDBVIO *vio, CDBREC *rec, FOFF off); +static int _vio_apnd2_readrec(CDBVIO *vio, CDBREC** rec, FOFF off, bool readval); +static int _vio_apnd2_writepage(CDBVIO *vio, CDBPAGE *page, FOFF *off); +static int _vio_apnd2_readpage(CDBVIO *vio, CDBPAGE **page, FOFF off); +static int _vio_apnd2_sync(CDBVIO *vio); +static int _vio_apnd2_writehead2(CDBVIO *vio); +static int _vio_apnd2_writehead(CDBVIO *vio, bool wtable); +static int _vio_apnd2_readhead2(CDBVIO *vio); +static int _vio_apnd2_readhead(CDBVIO *vio, bool rtable); +static int _vio_apnd2_writefmeta(CDBVIO *vio, int fd, VIOAPND2FINFO *finfo); +static int _vio_apnd2_readfmeta(CDBVIO *vio, int fd, VIOAPND2FINFO *finfo); +static int _vio_apnd2_flushbuf(CDBVIO *vio, int dtype); +static void _vio_apnd2_flushtask(void *arg); +static void _vio_apnd2_rcyledataspacetask(void *arg); +static void _vio_apnd2_fixcachepageooff(CDB *db, uint32_t bit, FOFF off); +static void _vio_apnd2_rcylepagespacetask(void *arg); +static int _vio_apnd2_shiftnew(CDBVIO *vio, int dtype); +static int _vio_apnd2_recovery(CDBVIO *vio, bool force); +static void _vio_apnd2_unlink(CDBVIO *vio, VIOAPND2FINFO *finfo, int dtype); +static VIOAPND2FINFO* _vio_apnd2_fileiternext(CDBVIO *vio, int dtype, uint64_t oid); +static int _vio_apnd2_iterfirst(CDBVIO *vio, VIOAPND2ITOR *it, int dtype, int64_t oid); +static int _vio_apnd2_iterfree(CDBVIO *vio, int dtype, VIOAPND2ITOR *it); +static int _vio_apnd2_pageiternext(CDBVIO *vio, CDBPAGE **page, void *iter); +static int _vio_apnd2_reciternext(CDBVIO *vio, CDBREC **rec, void *iter); +static void* _vio_apnd2_reciterfirst(CDBVIO *vio, uint64_t oid); +static void* _vio_apnd2_pageiterfirst(CDBVIO *vio, uint64_t oid); +static void _vio_apnd2_reciterdestory(CDBVIO *vio, void *iter); +static void _vio_apnd2_pageiterdestory(CDBVIO *vio, void *iter); +static void _vio_apnd2_cleanpoint(CDBVIO *vio); +static int _vio_apnd2_cmpfuncsreorder(const void *p1, const void *p2); +static int _vio_apnd2_checkopensig(CDBVIO *vio); +static int _vio_apnd2_setopensig(CDBVIO *vio, int sig); +static int _vio_apnd2_rcyledatafile(CDBVIO *vio, VIOAPND2FINFO *finfo, bool rcyle); + + +/* hook the io methods */ +void vio_apnd2_init(CDBVIO *vio) +{ + vio->close = _vio_apnd2_close; + vio->open = _vio_apnd2_open; + vio->rpage = _vio_apnd2_readpage; + vio->wpage = _vio_apnd2_writepage; + vio->rrec = _vio_apnd2_readrec; + vio->drec = _vio_apnd2_deleterec; + vio->wrec = _vio_apnd2_writerecexternal; + vio->sync = _vio_apnd2_sync; + vio->rhead = _vio_apnd2_readhead2; + vio->whead = _vio_apnd2_writehead2; + vio->cleanpoint = _vio_apnd2_cleanpoint; + vio->pageitfirst = _vio_apnd2_pageiterfirst; + vio->pageitnext = _vio_apnd2_pageiternext; + vio->pageitdestroy = _vio_apnd2_pageiterdestory; + vio->recitfirst = _vio_apnd2_reciterfirst; + vio->recitnext = _vio_apnd2_reciternext; + vio->recitdestroy = _vio_apnd2_reciterdestory; +} + +/* the hash table used in VIOAPND2 need not rehash, just use the key id is OK */ +static uint32_t _directhash(const void *key, int size) +{ + return *(uint32_t*)key; +} + + +/* allocate a new VIOAPND2 object, called when open db */ +static void _vio_apnd2_new(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)malloc(sizeof(VIOAPND2)); + + myio->dfnum = myio->ifnum = 0; + + myio->dbuf.fid = 0; + myio->dbuf.pos = 0; + myio->dbuf.off = 0; + myio->dbuf.oid = 0; + memset(myio->dbuf.buf, 0, IOBUFSIZE); + myio->idxfhead = NULL; + myio->idxftail = NULL; + + myio->ibuf.fid = 0; + myio->ibuf.pos = 0; + myio->ibuf.off = 0; + myio->ibuf.oid = 0; + memset(myio->ibuf.buf, 0, IOBUFSIZE); + myio->datfhead = NULL; + myio->datftail = NULL; + + myio->delbufpos = 0; + + myio->ifnum = 0; + myio->dfnum = 0; + + myio->mfd = -1; + myio->hfd = -1; + myio->dfd = -1; + + myio->fdcache = cdb_ht_new(true, _directhash); + /* the following two are look-up table, need not LRU */ + myio->idxmeta = cdb_ht_new(false, _directhash); + myio->datmeta = cdb_ht_new(false, _directhash); + + myio->lock = cdb_lock_new(CDB_LOCKMUTEX); + + myio->create = true; + myio->maxfds = MAXFD; + myio->filepath = NULL; + + vio->iometa = myio; +} + + +/* free a VIOAPND2 object, called when close db */ +static void _vio_apnd2_destroy(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + cdb_ht_destroy(myio->fdcache); + cdb_ht_destroy(myio->idxmeta); + cdb_ht_destroy(myio->datmeta); + cdb_lock_destory(myio->lock); + if (myio->filepath) + free(myio->filepath); + free(myio); + vio->iometa = NULL; +} + +/* check if another process has already open the current db */ +static int _vio_apnd2_checkpid(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + struct stat st; + char filename[MAX_PATH_LEN] = {0}; + char syspidpath[MAX_PATH_LEN] = {0}; + snprintf(filename, MAX_PATH_LEN, "%s/pid.cdb", myio->filepath); + + if (stat(filename, &st) == 0) { + /* pid file exist */ + FILE *f = fopen(filename, "rt"); + int pid = -1; + if (f == NULL) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + + int ret = fscanf(f, "%d", &pid); + fclose(f); + if (ret != 1) { + cdb_seterrno(vio->db, CDB_PIDEXIST, __FILE__, __LINE__); + return -1; + } + + /* check if the process still alive */ + snprintf(syspidpath, MAX_PATH_LEN, "/proc/%d", pid); + if (stat(syspidpath, &st) == 0) { + cdb_seterrno(vio->db, CDB_PIDEXIST, __FILE__, __LINE__); + return -1; + } + } + + /* pid file non-exist or obsoleted */ + FILE *f = fopen(filename, "wt"); + if (f == NULL) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + fprintf(f, "%d\n", getpid()); + fclose(f); + return 0; +} + +/* open an db by path and mode */ +static int _vio_apnd2_open(CDBVIO *vio, const char *filepath, int flags) +{ + int rflags = O_RDWR; + char filename[MAX_PATH_LEN] = {0}; + int fsize; + int sigstatus; + VIOAPND2 *myio; + + _vio_apnd2_new(vio); + myio = (VIOAPND2 *)vio->iometa; + myio->filepath = strdup(filepath); + + if (flags & CDB_CREAT) + rflags |= O_CREAT; + if (flags & CDB_TRUNC) + rflags |= O_TRUNC; + + if (_vio_apnd2_checkpid(vio) < 0) { + goto ERRRET; + } + + snprintf(filename, MAX_PATH_LEN, "%s/mainindex.cdb", myio->filepath); + myio->hfd = open(filename, rflags, 0644); + if (myio->hfd < 0 && errno == ENOENT && (rflags & O_CREAT)) { + /* try to create, but path not exists */ + cdb_seterrno(vio->db, CDB_DIRNOEXIST, __FILE__, __LINE__); + goto ERRRET; + } else if (myio->hfd < 0) { + /* other open error */ + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + goto ERRRET; + } + + fsize = lseek(myio->hfd, 0, SEEK_END); + if (fsize) { + myio->create = false; + sigstatus = _vio_apnd2_checkopensig(vio); + if (sigstatus < 0) { + /* main table read error */ + cdb_seterrno(vio->db, CDB_READERR, __FILE__, __LINE__); + goto ERRRET; + } + } else { + sigstatus = VIOAPND2_SIGCLOSED; + } + + /* */ + struct stat st; + snprintf(filename, MAX_PATH_LEN, "%s/force_recovery", myio->filepath); + if (stat(filename, &st) == 0) { + /* special file exist, force recovery to fix the database */ + _vio_apnd2_recovery(vio, true); + unlink(filename); + } else if (sigstatus == VIOAPND2_SIGOPEN) { + /* didn't properly closed last time */ + _vio_apnd2_recovery(vio, false); + } else if (sigstatus != VIOAPND2_SIGCLOSED) { + cdb_seterrno(vio->db, CDB_DATAERRMETA, __FILE__, __LINE__); + goto ERRRET; + } + + if (_vio_apnd2_setopensig(vio, VIOAPND2_SIGOPEN) < 0) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + goto ERRRET; + } + + snprintf(filename, MAX_PATH_LEN, "%s/mainmeta.cdb", myio->filepath); + myio->mfd = open(filename, rflags, 0644); + if (myio->mfd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + goto ERRRET; + } + + fsize = lseek(myio->mfd, 0, SEEK_END); + if (fsize) { + /* exist database */ + _vio_apnd2_readmeta(vio, false); + + /* open current data file and index file for buffer */ + snprintf(filename, MAX_PATH_LEN, "%s/idx%08d.cdb", myio->filepath, myio->ibuf.fid); + myio->ibuf.fd = open(filename, rflags, 0644); + myio->ibuf.limit = CDBMIN(IOBUFSIZE, FIDXMAXSIZE - myio->ibuf.off); + myio->ibuf.pos = 0; + + snprintf(filename, MAX_PATH_LEN, "%s/dat%08d.cdb", myio->filepath, myio->dbuf.fid); + myio->dbuf.fd = open(filename, rflags, 0644); + myio->dbuf.limit = CDBMIN(IOBUFSIZE, FDATMAXSIZE - myio->dbuf.off); + myio->dbuf.pos = 0; + } else { + /* new database */ + myio->create = true; + /* remember the bnum */ + _vio_apnd2_writehead(vio, false); + _vio_apnd2_shiftnew(vio, VIOAPND2_INDEX); + _vio_apnd2_shiftnew(vio, VIOAPND2_DATA); + } + + snprintf(filename, MAX_PATH_LEN, "%s/dellog.cdb", myio->filepath); + myio->dfd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (myio->dfd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + goto ERRRET; + } + + /* set background tasks, flush buffer and recycle space */ + cdb_bgtask_add(vio->db->bgtask, _vio_apnd2_flushtask, vio, FLUSHTIMEOUT); + cdb_bgtask_add(vio->db->bgtask, _vio_apnd2_rcylepagespacetask, vio, RCYLEPAGEINTERVAL); + cdb_bgtask_add(vio->db->bgtask, _vio_apnd2_rcyledataspacetask, vio, RCYLEDATAINTERVAL); + return 0; + +ERRRET: + if (myio->mfd > 0) + close(myio->mfd); + if (myio->hfd > 0) + close(myio->hfd); + if (myio->dfd > 0) + close(myio->dfd); + _vio_apnd2_destroy(vio); + return -1; +} + + +/* task for flush buffer */ +static void _vio_apnd2_flushtask(void *arg) +{ + CDBVIO *vio = (CDBVIO *)arg; + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + cdb_lock_lock(myio->lock); + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + _vio_apnd2_flushbuf(vio, VIOAPND2_DELLOG); + cdb_lock_unlock(myio->lock); +} + + +/* read information for db files, 'overwrite' indicates recovery */ +static int _vio_apnd2_readmeta(CDBVIO *vio, bool overwrite) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + char buf[FILEMETASIZE]; + char *hbuf; + int hbufsize; + int pos = 0; + + if (pread(myio->mfd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + if (overwrite) + return 0; + cdb_seterrno(vio->db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + + if (memcmp(buf, FILEMAGICHEADER, FILEMAGICLEN) != 0) { + cdb_seterrno(vio->db, CDB_DATAERRMETA, __FILE__, __LINE__); + return -1; + } + + pos += FILEMAGICLEN; + cdb_lock_lock(myio->lock); + if (!overwrite) + myio->ibuf.off = *(uint32_t*)(buf + pos); + pos += SI4; + myio->ibuf.limit = *(uint32_t*)(buf + pos); + pos += SI4; + if (!overwrite) + myio->dbuf.off = *(uint32_t*)(buf + pos); + pos += SI4; + myio->dbuf.limit = *(uint32_t*)(buf + pos); + pos += SI4; + if (!overwrite) + myio->ifnum = *(uint32_t*)(buf + pos); + pos += SI4; + if (!overwrite) + myio->dfnum = *(uint32_t*)(buf + pos); + pos += SI4; + if (!overwrite) + myio->ibuf.fid = *(uint32_t*)(buf + pos); + pos += SI4; + if (!overwrite) + myio->dbuf.fid = *(uint32_t*)(buf + pos); + pos += SI4; + + hbufsize = (SI4 + SI4 + SI4 + SI8 + SI8 + 1 + 1) * myio->ifnum; + hbufsize += (SI4 + SI4 + SI4 + SI4 + SI8 + SI8 + 1 + 1) * myio->dfnum; + hbuf = (char*)malloc(hbufsize); + pos = 0; + + if (pread(myio->mfd, hbuf, hbufsize, FILEMETASIZE) != hbufsize) { + cdb_lock_unlock(myio->lock); + free(hbuf); + if (overwrite) + return 0; + cdb_seterrno(vio->db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + + for(int i = 0; i < myio->ifnum; i++) { + VIOAPND2FINFO finfo, *finfo2; + finfo.fid = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.fsize = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.rcyled = *(uint32_t*)(hbuf + pos); + pos += SI4;; + finfo.oidf = *(uint64_t*)(hbuf + pos); + pos += SI8; + finfo.oidl = *(uint64_t*)(hbuf + pos); + pos += SI8; + finfo.fstatus = *(uint8_t*)(hbuf + pos); + pos += 1; + finfo.ftype = *(uint8_t*)(hbuf + pos); + pos += 1; + finfo.ref = 0; + finfo.unlink = false; + if (overwrite) { + /* in recovery mode only fix 'recycled size' */ + /* But do nothing with index files */ + continue; + } + finfo2 = (VIOAPND2FINFO *)cdb_ht_insert2(myio->idxmeta, &finfo.fid, SI4, &finfo, sizeof(finfo)); + if (myio->idxfhead) { + finfo2->fprev = myio->idxftail; + myio->idxftail->fnext = finfo2; + finfo2->fnext = NULL; + myio->idxftail = finfo2; + } else { + myio->idxfhead = myio->idxftail = finfo2; + finfo2->fprev = finfo2->fnext = NULL; + } + } + + for(int i = 0; i < myio->dfnum; i++) { + VIOAPND2FINFO finfo, *finfo2; + finfo.fid = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.fsize = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.rcyled = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.nexpire = *(uint32_t*)(hbuf + pos); + pos += SI4; + finfo.oidf = *(uint64_t*)(hbuf + pos); + pos += SI8; + finfo.oidl = *(uint64_t*)(hbuf + pos); + pos += SI8; + finfo.fstatus = *(uint8_t*)(hbuf + pos); + pos += 1; + finfo.ftype = *(uint8_t*)(hbuf + pos); + pos += 1; + finfo.ref = 0; + finfo.unlink = false; + finfo.lcktime = time(NULL); + if (overwrite) { + /* in recovery mode only fix 'recycled size' */ + finfo2 = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &finfo.fid, SI4, false); + if (finfo2) { + finfo2->rcyled = finfo.rcyled; + finfo2->nexpire = finfo.nexpire; + } + continue; + } + finfo2 = (VIOAPND2FINFO *)cdb_ht_insert2(myio->datmeta, &finfo.fid, SI4, &finfo, sizeof(finfo)); + if (myio->datfhead) { + finfo2->fprev = myio->datftail; + myio->datftail->fnext = finfo2; + finfo2->fnext = NULL; + myio->datftail = finfo2; + } else { + myio->datfhead = myio->datftail = finfo2; + finfo2->fprev = finfo2->fnext = NULL; + } + } + cdb_lock_unlock(myio->lock); + free(hbuf); + + return 0; +} + + +/* flush i/o buffer */ +static int _vio_apnd2_flushbuf(CDBVIO *vio, int dtype) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + VIOAPND2FINFO *finfo; + VIOAPND2IOBUF *iobuf; + CDBHASHTABLE *ht; + uint32_t *fid; + uint32_t fsizemax; + + /* link to the proper operation object */ + if (dtype == VIOAPND2_INDEX) { + iobuf = &myio->ibuf; + ht = myio->idxmeta; + fsizemax = FIDXMAXSIZE; + } else if (dtype == VIOAPND2_DATA) { + iobuf = &myio->dbuf; + ht = myio->datmeta; + fsizemax = FDATMAXSIZE; + } else if (dtype == VIOAPND2_DELLOG) { + /* buffer for deletion is special */ + if (myio->delbufpos == 0) + return 0; + if (write(myio->dfd, myio->delbuf, sizeof(FOFF) * myio->delbufpos) + != sizeof(FOFF) * myio->delbufpos) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + myio->delbufpos = 0; + return 0; + } else { + cdb_seterrno(vio->db, CDB_INTERNALERR, __FILE__, __LINE__); + return -1; + } + fid = &iobuf->fid; + + /* get information from table */ + finfo = (VIOAPND2FINFO *)cdb_ht_get2(ht, fid, SI4, false); + if (finfo == NULL) { + cdb_seterrno(vio->db, CDB_INTERNALERR, __FILE__, __LINE__); + return -1; + } + + /* write out if buffered */ + if (iobuf->pos > 0) { + if (pwrite(iobuf->fd, iobuf->buf, iobuf->pos, iobuf->off) != iobuf->pos) { + /* to avoid compile warning */ + if (ftruncate(iobuf->fd, iobuf->off) < 0) ; + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + } + + /* mark the operation id */ + finfo->oidl = iobuf->oid; + + /* reset the buffer information */ + iobuf->pos = 0; + iobuf->off = lseek(iobuf->fd, 0, SEEK_END); + /* fix file size info whenever possible */ + finfo->fsize = iobuf->off; + iobuf->off = OFFALIGNED(iobuf->off); + + /* current writing file nearly full? open a new one */ + if (iobuf->off > fsizemax - 16 * KB) { + finfo->fstatus = VIOAPND2_FULL; + _vio_apnd2_writefmeta(vio, iobuf->fd, finfo); + close(iobuf->fd); + _vio_apnd2_shiftnew(vio, dtype); + } else + iobuf->limit = CDBMIN(IOBUFSIZE, fsizemax - iobuf->off) ; + + return 0; +} + +/* create a new file for buffer and writing */ +static int _vio_apnd2_shiftnew(CDBVIO *vio, int dtype) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + VIOAPND2IOBUF *iobuf; + CDBHASHTABLE *ht; + uint32_t *fnum; + uint32_t tryiter, curfid; + char filename[MAX_PATH_LEN]; + char ipfx[] = "idx"; + char dpfx[] = "dat"; + char *pfx; + + /* link to proper object by dtype */ + if (dtype == VIOAPND2_INDEX) { + iobuf = &myio->ibuf; + ht = myio->idxmeta; + fnum = &myio->ifnum; + pfx = ipfx; + } else if (dtype == VIOAPND2_DATA) { + iobuf = &myio->dbuf; + ht = myio->datmeta; + fnum = &myio->dfnum; + pfx = dpfx; + } else { + cdb_seterrno(vio->db, CDB_INTERNALERR, __FILE__, __LINE__); + return -1; + } + + curfid = iobuf->fid; + + /* reset invalid buffer, prevent for misuse */ + iobuf->fd = -1; + iobuf->fid = 0xffffff; + iobuf->limit = iobuf->pos = iobuf->off = 0xffffffff; + + /* find a valid fid, try 16M times at most */ + tryiter = 0; + while(cdb_ht_exist(ht, &curfid, SI4)) { + curfid++; + tryiter++; + if (tryiter == 0xffffff) { + cdb_seterrno(vio->db, CDB_NOFID, __FILE__, __LINE__); + return -1; + } + if (curfid == 0xffffff) + curfid = 0; + } + + /* open new file */ + snprintf(filename, MAX_PATH_LEN, "%s/%s%08d.cdb", myio->filepath, pfx, curfid); + iobuf->fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (iobuf->fd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + iobuf->limit = IOBUFSIZE; + iobuf->fid = curfid; + iobuf->off = FILEMETASIZE; + iobuf->pos = 0; + + /* set meta information for new file */ + VIOAPND2FINFO finfo, *finfo2; + finfo.fsize = lseek(iobuf->fd, 0, SEEK_END); + finfo.oidf = iobuf->oid; + finfo.oidl = iobuf->oid; + finfo.rcyled = 0; + finfo.lcktime = time(NULL); + finfo.fstatus = VIOAPND2_WRITING; + finfo.ftype = dtype; + finfo.fid = curfid; + finfo.unlink = false; + finfo.nexpire = 0xffffffff; + finfo.ref = 0; + /* meta information also be written to disk immediately */ + if (_vio_apnd2_writefmeta(vio, iobuf->fd, &finfo) < 0) { + close(iobuf->fd); + iobuf->fd = -1; + iobuf->fid = 0xffffff; + iobuf->limit = iobuf->pos = iobuf->off = 0xffffffff; + return -1; + } + (*fnum)++; + finfo2 = cdb_ht_insert2(ht, &curfid, SI4, &finfo, sizeof(VIOAPND2FINFO)); + if (dtype == VIOAPND2_INDEX) { + if (myio->idxfhead) { + finfo2->fprev = myio->idxftail; + myio->idxftail->fnext = finfo2; + finfo2->fnext = NULL; + myio->idxftail = finfo2; + } else { + myio->idxfhead = myio->idxftail = finfo2; + finfo2->fprev = finfo2->fnext = NULL; + } + } else if (dtype == VIOAPND2_DATA) { + if (myio->datfhead) { + finfo2->fprev = myio->datftail; + myio->datftail->fnext = finfo2; + finfo2->fnext = NULL; + myio->datftail = finfo2; + } else { + myio->datfhead = myio->datftail = finfo2; + finfo2->fprev = finfo2->fnext = NULL; + } + } + + return 0; +} + + +/* write a single file's meta information */ +static int _vio_apnd2_writefmeta(CDBVIO *vio, int fd, VIOAPND2FINFO *finfo) +{ + char buf[FILEMETASIZE]; + int pos = 0; + + memset(buf, 'X', FILEMETASIZE); + memcpy(buf, FILEMAGICHEADER, FILEMAGICLEN); + pos += FILEMAGICLEN; + *(uint64_t*)(buf + pos) = finfo->oidf; + pos += SI8; + *(uint64_t*)(buf + pos) = finfo->oidl; + pos += SI8; + *(uint32_t*)(buf + pos) = finfo->fsize; + pos += SI4; + *(uint32_t*)(buf + pos) = finfo->fid; + pos += SI4; + *(uint8_t*)(buf + pos) = finfo->fstatus; + pos++; + *(uint8_t*)(buf + pos) = finfo->ftype; + pos++; + + if (pwrite(fd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + return 0; +} + +/* read a single file's meta information */ +static int _vio_apnd2_readfmeta(CDBVIO *vio, int fd, VIOAPND2FINFO *finfo) +{ + char buf[FILEMETASIZE]; + int pos = 0; + + memset(buf, 'X', FILEMETASIZE); + if (pread(fd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + cdb_seterrno(vio->db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + + if (memcmp(buf, FILEMAGICHEADER, FILEMAGICLEN)) { + cdb_seterrno(vio->db, CDB_DATAERRMETA, __FILE__, __LINE__); + return -1; + } + + pos += FILEMAGICLEN; + finfo->oidf = *(uint64_t*)(buf + pos); + pos += SI8; + finfo->oidl = *(uint64_t*)(buf + pos); + pos += SI8; + finfo->fsize = *(uint32_t*)(buf + pos); + pos += SI4; + finfo->fid = *(uint32_t*)(buf + pos); + pos += SI4; + finfo->fstatus = *(uint8_t*)(buf + pos); + pos++; + finfo->ftype = *(uint8_t*)(buf + pos); + pos++; + return 0; +} + + +/* write to disk directly instead of using buffer(Only Appends) */ +static int _vio_apnd2_write(CDBVIO *vio, int fd, void *buf, uint32_t size, bool aligned) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + uint32_t off; + + if (size == 0) + return 0; + + off = lseek(fd, 0, SEEK_END); + if (aligned) + off = OFFALIGNED(off); + if (pwrite(fd, buf, size, off) != size) { + /* to avoid compile warning */ + if (ftruncate(myio->ibuf.fd, off) < 0) ; + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + + return size; +} + + +/* read from disk; if data has not been written, read from buffer */ +static int _vio_apnd2_read(CDBVIO *vio, int fd, void *buf, uint32_t size, uint64_t off) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + int ret; + + /* in buffer? */ + if (fd == myio->dbuf.fd && off >= myio->dbuf.off) { + uint64_t boff = off - myio->dbuf.off; + ret = CDBMIN(size, myio->dbuf.pos - boff); + memcpy(buf, myio->dbuf.buf + boff, ret); + } else if (fd == myio->ibuf.fd && off >= myio->ibuf.off) { + uint64_t boff = off - myio->ibuf.off; + ret = CDBMIN(size, myio->ibuf.pos - boff); + memcpy(buf, myio->ibuf.buf + boff, ret); + } else { + /* not in buffer */ + ret = pread(fd, buf, size, off); + if (ret < 0) { + cdb_seterrno(vio->db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + } + return ret; +} + + +/* write all files meta information into a file */ +static int _vio_apnd2_writemeta(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + char buf[FILEMETASIZE]; + char *hbuf; + int hbufsize; + int pos = 0; + + memset(buf, 'X', FILEMETASIZE); + memcpy(buf, FILEMAGICHEADER, FILEMAGICLEN); + pos += FILEMAGICLEN; + cdb_lock_lock(myio->lock); + *(uint32_t*)(buf + pos) = myio->ibuf.off; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->ibuf.limit; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->dbuf.off; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->dbuf.limit; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->ifnum; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->dfnum; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->ibuf.fid; + pos += SI4; + *(uint32_t*)(buf + pos) = myio->dbuf.fid; + pos += SI4; + + hbufsize = (SI4 + SI4 + SI4 + SI8 + SI8 + 1 + 1) * myio->ifnum; + hbufsize += (SI4 + SI4 + SI4 + SI4 + SI8 + SI8 + 1 + 1) * myio->dfnum; + hbuf = (char*)malloc(hbufsize); + memset(hbuf, 'X', hbufsize); + pos = 0; + /* iterate all the index files order by oid */ + VIOAPND2FINFO *finfo = myio->idxfhead; + while(finfo != NULL) { + *(uint32_t*)(hbuf + pos) = finfo->fid; + pos += 4; + *(uint32_t*)(hbuf + pos) = finfo->fsize; + pos += 4; + *(uint32_t*)(hbuf + pos) = finfo->rcyled; + pos += 4; + *(uint64_t*)(hbuf + pos) = finfo->oidf; + pos += 8; + *(uint64_t*)(hbuf + pos) = finfo->oidl; + pos += 8; + *(uint8_t*)(hbuf + pos) = finfo->fstatus; + pos += 1; + *(uint8_t*)(hbuf + pos) = finfo->ftype; + pos += 1; + finfo = finfo->fnext; + } + + /* iterate all the data files order by oid */ + finfo = myio->datfhead; + while(finfo != NULL) { + *(uint32_t*)(hbuf + pos) = finfo->fid; + pos += 4; + *(uint32_t*)(hbuf + pos) = finfo->fsize; + pos += 4; + *(uint32_t*)(hbuf + pos) = finfo->rcyled; + pos += 4; + *(uint32_t*)(hbuf + pos) = finfo->nexpire; + pos += 4; + *(uint64_t*)(hbuf + pos) = finfo->oidf; + pos += 8; + *(uint64_t*)(hbuf + pos) = finfo->oidl; + pos += 8; + *(uint8_t*)(hbuf + pos) = finfo->fstatus; + pos += 1; + *(uint8_t*)(hbuf + pos) = finfo->ftype; + pos += 1; + finfo = finfo->fnext; + } + cdb_lock_unlock(myio->lock); + + if (pwrite(myio->mfd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + free(hbuf); + return -1; + } + + if (pwrite(myio->mfd, hbuf, hbufsize, FILEMETASIZE) != hbufsize) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + free(hbuf); + return -1; + } + free(hbuf); + + return 0; +} + + +/* close db */ +static int _vio_apnd2_close(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDBHTITEM *item; + char filename[MAX_PATH_LEN] = {0}; + VIOAPND2FINFO *finfo; + + /* flush buffer */ + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->idxmeta, &myio->ibuf.fid, SI4, false); + if (finfo) + _vio_apnd2_writefmeta(vio, myio->ibuf.fd, finfo); + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &myio->dbuf.fid, SI4, false); + if (finfo) + _vio_apnd2_writefmeta(vio, myio->dbuf.fd, finfo); + + /* iterate and close the fd cache */ + item = cdb_ht_iterbegin(myio->fdcache); + while(item != NULL) { + close(*(int*)cdb_ht_itemval(myio->fdcache, item)); + item = cdb_ht_iternext(myio->fdcache, item); + } + + if (myio->dbuf.fd > 0) + close(myio->dbuf.fd); + if (myio->ibuf.fd > 0) + close(myio->ibuf.fd); + + /* rewrite the metafile */ + _vio_apnd2_writemeta(vio); + /* close all open files */ + snprintf(filename, MAX_PATH_LEN, "%s/pid.cdb", myio->filepath); + unlink(filename); + /* dellog only be useful for recovery of database unsafety close */ + snprintf(filename, MAX_PATH_LEN, "%s/dellog.cdb", myio->filepath); + unlink(filename); + _vio_apnd2_setopensig(vio, VIOAPND2_SIGCLOSED); + if (myio->hfd > 0) + close(myio->hfd); + if (myio->mfd > 0) + close(myio->mfd); + if (myio->dfd > 0) + close(myio->dfd); + _vio_apnd2_destroy(vio); + return 0; +} + + +/* open a file, and remember its fd. The function runs under lock protection */ +static int _vio_apnd2_loadfd(CDBVIO *vio, uint32_t fid, int dtype) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + int fd; + char filename[MAX_PATH_LEN]; + char ipfx[] = "idx"; + char dpfx[] = "dat"; + char *pfx; + uint32_t vfid; + + if (dtype == VIOAPND2_INDEX) { + pfx = ipfx; + vfid = VFIDIDX(fid); + } else if (dtype == VIOAPND2_DATA) { + pfx = dpfx; + vfid = VFIDDAT(fid); + } else { + cdb_seterrno(vio->db, CDB_INTERNALERR, __FILE__, __LINE__); + return -1; + } + + snprintf(filename, MAX_PATH_LEN, "%s/%s%08d.cdb", myio->filepath, pfx, fid); + fd = open(filename, O_RDONLY, 0644); + if (fd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + + /* cache the fd, close the oldest file not touched */ + cdb_ht_insert2(myio->fdcache, &vfid, SI4, &fd, sizeof(int)); + while(myio->fdcache->num > myio->maxfds) { + CDBHTITEM *item = cdb_ht_poptail(myio->fdcache); + close(*(int*)cdb_ht_itemval(myio->fdcache, item)); + free(item); + } + + return fd; +} + +/* read a index page */ +static int _vio_apnd2_readpage(CDBVIO *vio, CDBPAGE **page, FOFF off) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + int ret, fd; + uint32_t psize; + uint32_t fid, roff; + uint32_t fixbufsize = SBUFSIZE - (sizeof(CDBPAGE) - PAGEHSIZE); + uint32_t areadsize = PAGEAREADSIZE; //vio->db->areadsize; + + VOFF2ROFF(off, fid, roff); + /* avoid dirty memory */ + (*page)->magic = 0; + + cdb_lock_lock(myio->lock); + if (fid == myio->ibuf.fid) + /* read from current writing file? */ + fd = myio->ibuf.fd; + else { + /* old index file */ + int vfid, *fdret; + vfid = VFIDIDX(fid); + /* in cache? */ + fdret = cdb_ht_get2(myio->fdcache, &vfid, sizeof(vfid), true); + if (fdret == NULL) { + fd = _vio_apnd2_loadfd(vio, fid, VIOAPND2_INDEX); + if (fd < 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + } else + fd = *fdret; + } + + /* NOTICE: the data on disk actually starts at 'magic' field in structure */ + ret = _vio_apnd2_read(vio, fd, &(*page)->magic, areadsize, roff); + if (ret <= 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + + if ((*page)->magic != PAGEMAGIC) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRIDX, __FILE__, __LINE__); + return -1; + } + + psize = PAGESIZE(*page); + if (ret < areadsize && ret < psize) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRIDX, __FILE__, __LINE__); + return ret; + } else if (psize > areadsize) { + /* need another read operation since the page is a large than default read size */ + if (psize > fixbufsize) { + /* record is larger the stack size */ + CDBPAGE *npage = (CDBPAGE *)malloc(sizeof(CDBPAGE) + (*page)->num * sizeof(PITEM)); + memcpy(&npage->magic, &(*page)->magic, areadsize); + *page = npage; + } + + ret = _vio_apnd2_read(vio, fd, (char*)&(*page)->magic + areadsize, + psize - areadsize, roff + areadsize); + if (ret < psize - areadsize) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRIDX, __FILE__, __LINE__); + return -1; + } + } + + cdb_lock_unlock(myio->lock); + + /* remember where i got the page, calculate into junk space if page is discarded */ + (*page)->osize = OFFALIGNED(psize); + (*page)->ooff = off; + (*page)->cap = (*page)->num; + return 0; +} + +/* read a data record */ +static int _vio_apnd2_readrec(CDBVIO *vio, CDBREC** rec, FOFF off, bool readval) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + int ret, fd; + uint32_t rsize; + uint32_t fid, roff; + /* the 'rec' is hoped to be fit in stack, the actually size is a little smaller */ + /* because some fields in CDBREC structure are not on disk */ + uint32_t fixbufsize = SBUFSIZE - (sizeof(CDBREC) - RECHSIZE); + uint32_t areadsize = vio->db->areadsize; + + VOFF2ROFF(off, fid, roff); + /* avoid dirty memory */ + (*rec)->magic = 0; + + cdb_lock_lock(myio->lock); + if (fid == myio->dbuf.fid) + /* read from current writing file? */ + fd = myio->dbuf.fd; + else { + /* read from old data file */ + int vfid, *fdret; + vfid = VFIDDAT(fid); + fdret = cdb_ht_get2(myio->fdcache, &vfid, sizeof(vfid), true); + if (fdret == NULL) { + fd = _vio_apnd2_loadfd(vio, fid, VIOAPND2_DATA); + if (fd < 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + } else + fd = *fdret; + } + + /* NOTICE: the data on disk actually starts at 'magic' field in structure */ + ret = _vio_apnd2_read(vio, fd, &(*rec)->magic, areadsize, roff); + if (ret <= 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + + if ((*rec)->magic != RECMAGIC) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRDAT, __FILE__, __LINE__); + return -1; + } + + uint32_t ovsize = (*rec)->vsize; + if (!readval) + /* read key only */ + (*rec)->vsize = 0; + rsize = RECSIZE(*rec); + + if (ret < areadsize && ret < rsize) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRDAT, __FILE__, __LINE__); + return -1; + } else if (rsize > areadsize) { + /* need another read */ + if (rsize > fixbufsize) { + /* record is larger the stack size */ + CDBREC *nrec = (CDBREC *)malloc(sizeof(CDBREC)+(*rec)->ksize+(*rec)->vsize); + memcpy(&nrec->magic, &(*rec)->magic, areadsize); + *rec = nrec; + } + ret = _vio_apnd2_read(vio, fd, (char*)&(*rec)->magic + areadsize, + rsize - areadsize, roff + areadsize); + if (ret != rsize - areadsize) { + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_DATAERRDAT, __FILE__, __LINE__); + return -1; + } + } + cdb_lock_unlock(myio->lock); + + /* fix pointer */ + (*rec)->key = (*rec)->buf; + (*rec)->val = (*rec)->buf + (*rec)->ksize; + + /* even if didn't read the value, still keep the complete (old) size */ + if (!readval) + (*rec)->osize = OFFALIGNED(rsize + ovsize); + else + (*rec)->osize = OFFALIGNED(rsize); + + (*rec)->ooff = off; + return 0; +} + + +/* write a index page, return the written virtual offset */ +static int _vio_apnd2_writepage(CDBVIO *vio, CDBPAGE *page, FOFF *off) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + VIOAPND2FINFO *finfo; + uint32_t psize = PAGESIZE(page); + uint32_t fid, roff; + uint32_t ofid; + + page->magic = PAGEMAGIC; + page->oid = cdb_genoid(vio->db); + + cdb_lock_lock(myio->lock); + /* buffer ready? */ + if (myio->ibuf.fd < 0) { + if (_vio_apnd2_shiftnew(vio, VIOAPND2_INDEX) < 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + } + + /* if it was modified from existing page, remember the wasted space */ + if (OFFNOTNULL(page->ooff)) { + VOFF2ROFF(page->ooff, ofid, roff); + finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->idxmeta, &ofid, SI4, false); + if (finfo) + finfo->rcyled += page->osize; + } + + if (psize > myio->ibuf.limit) { + /* page too large */ + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + fid = myio->ibuf.fid; + roff = myio->ibuf.off; + _vio_apnd2_write(vio, myio->ibuf.fd, &page->magic, psize, true); + myio->ibuf.oid = page->oid; + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + cdb_lock_unlock(myio->lock); + + /* remember last wrote offset */ + ROFF2VOFF(fid, roff, *off); + page->ooff = *off; + page->osize = OFFALIGNED(psize); + return 0; + } else if (psize + myio->ibuf.pos > myio->ibuf.limit) + /* buffer is full */ + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + + /* copy to buffer */ + fid = myio->ibuf.fid; + roff = myio->ibuf.off + myio->ibuf.pos; + memcpy(myio->ibuf.buf + myio->ibuf.pos, &page->magic, psize); + myio->ibuf.pos += psize; + myio->ibuf.pos = OFFALIGNED(myio->ibuf.pos); + myio->ibuf.oid = page->oid; + cdb_lock_unlock(myio->lock); + ROFF2VOFF(fid, roff, *off); + + /* remember last wrote offset */ + page->ooff = *off; + page->osize = OFFALIGNED(psize); + return 0; +} + + +/* delete a record */ +static int _vio_apnd2_deleterec(CDBVIO *vio, CDBREC *rec, FOFF off) +{ + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + uint32_t ofid, roff; + + cdb_lock_lock(myio->lock); + myio->delbuf[myio->delbufpos] = off; + if (++myio->delbufpos == DELBUFMAX) { + if (_vio_apnd2_flushbuf(vio, VIOAPND2_DELLOG) < 0) + return -1; + } + + /* it is an deleted record, remember the space to be recycled */ + VOFF2ROFF(off, ofid, roff); + if (OFFNOTNULL(rec->ooff)) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &ofid, SI4, false); + if (finfo) { + finfo->rcyled += rec->osize; + } + } + cdb_lock_unlock(myio->lock); + return 0; +} + + + +/* write a data record, return the written virtual offset */ +static int _vio_apnd2_writerec(CDBVIO *vio, CDBREC *rec, FOFF *off, int ptrtype) { + VIOAPND2 *myio = (VIOAPND2*)vio->iometa; + uint32_t rsize = RECSIZE(rec); + uint32_t fid, roff, ofid; + if (ptrtype == VIOAPND2_RECEXTERNAL) + rec->magic = RECMAGIC; + + /* oid always are increment, even if it is a record moved from an old data file */ + rec->oid = cdb_genoid(vio->db); + cdb_lock_lock(myio->lock); + /* buffer ready? */ + if (myio->dbuf.fd < 0) { + if (_vio_apnd2_shiftnew(vio, VIOAPND2_DATA) < 0) { + cdb_lock_unlock(myio->lock); + return -1; + } + } + /* it is an overwritten record, remember the space to be recycled */ + if (OFFNOTNULL(rec->ooff)) { + VOFF2ROFF(rec->ooff, ofid, roff); + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &ofid, SI4, false); + if (finfo) + finfo->rcyled += rec->osize; + } + if (rsize > myio->dbuf.limit) { + /* record too large */ + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + fid = myio->dbuf.fid; + roff = myio->dbuf.off; + _vio_apnd2_write(vio, myio->dbuf.fd, &rec->magic, RECHSIZE, true); + if (ptrtype == VIOAPND2_RECINTERNAL) + _vio_apnd2_write(vio, myio->dbuf.fd, rec->buf, rec->ksize + rec->vsize, false); + else { + _vio_apnd2_write(vio, myio->dbuf.fd, rec->key, rec->ksize, false); + _vio_apnd2_write(vio, myio->dbuf.fd, rec->val, rec->vsize, false); + } + /* reset the buffer */ + myio->dbuf.oid = rec->oid; + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + if (rec->expire) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &fid, SI4, false); + if (finfo) { + if (finfo->nexpire == 0) { + finfo->lcktime = time(NULL); + finfo->nexpire = rec->expire; + } else if (finfo->nexpire > rec->expire) { + finfo->nexpire = rec->expire; + } + } + } + cdb_lock_unlock(myio->lock); + ROFF2VOFF(fid, roff, *off); + return 0; + } else if (rsize + myio->dbuf.pos > myio->dbuf.limit) + /* buffer is full */ + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + /* copy to buffer */ + fid = myio->dbuf.fid; + roff = myio->dbuf.off + myio->dbuf.pos; + memcpy(myio->dbuf.buf + myio->dbuf.pos, &rec->magic, RECHSIZE); + myio->dbuf.pos += RECHSIZE; + if (ptrtype == VIOAPND2_RECINTERNAL) { + memcpy(myio->dbuf.buf + myio->dbuf.pos, rec->buf, rec->ksize + rec->vsize); + myio->dbuf.pos += rec->ksize + rec->vsize; + } else { + memcpy(myio->dbuf.buf + myio->dbuf.pos, rec->key, rec->ksize); + myio->dbuf.pos += rec->ksize; + memcpy(myio->dbuf.buf + myio->dbuf.pos, rec->val, rec->vsize); + myio->dbuf.pos += rec->vsize; + } + myio->dbuf.pos = OFFALIGNED(myio->dbuf.pos); + myio->dbuf.oid = rec->oid; + if (rec->expire) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &fid, SI4, false); + if (finfo) { + if (finfo->nexpire == 0) { + finfo->lcktime = time(NULL); + finfo->nexpire = rec->expire; + } else if (finfo->nexpire > rec->expire) { + finfo->nexpire = rec->expire; + } + } + } + ROFF2VOFF(fid, roff, *off); + cdb_lock_unlock(myio->lock); + rec->osize = rsize; + rec->ooff = *off; + return 0; +} + +static int _vio_apnd2_writerecexternal(CDBVIO *vio, CDBREC *rec, FOFF *off) +{ + return _vio_apnd2_writerec(vio, rec, off, VIOAPND2_RECEXTERNAL); +} + +static int _vio_apnd2_writerecinternal(CDBVIO *vio, CDBREC *rec, FOFF *off) +{ + return _vio_apnd2_writerec(vio, rec, off, VIOAPND2_RECINTERNAL); +} + + +/* flush buffers, and sync data to disk from OS cache */ +static int _vio_apnd2_sync(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + cdb_lock_lock(myio->lock); + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + if (myio->dbuf.fd > 0) + fdatasync(myio->dbuf.fd); + if (myio->ibuf.fd > 0) + fdatasync(myio->ibuf.fd); + + _vio_apnd2_writehead(vio, false); + cdb_lock_unlock(myio->lock); + return 0; +} + + +/* write db information and main index table into a single file */ +static int _vio_apnd2_writehead(CDBVIO *vio, bool wtable) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDB *db = vio->db; + char buf[FILEMETASIZE]; + int pos = 0; + + memset(buf, 'X', FILEMETASIZE); + memcpy(buf, FILEMAGICHEADER, FILEMAGICLEN); + pos += FILEMAGICLEN; + *(uint32_t*)(buf + pos) = db->hsize; + pos += SI4; + *(uint64_t*)(buf + pos) = db->oid; + pos += SI8; + *(uint64_t*)(buf + pos) = db->roid; + pos += SI8; + *(uint64_t*)(buf + pos) = db->rnum; + pos += SI8; + *(uint32_t*)(buf + pos) = VIOAPND2_SIGOPEN; + pos += SI4; + + if (pwrite(myio->hfd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + + if (wtable && pwrite(myio->hfd, db->mtable, sizeof(FOFF) * db->hsize, FILEMETASIZE) + != sizeof(FOFF) * db->hsize) { + cdb_seterrno(vio->db, CDB_WRITEERR, __FILE__, __LINE__); + return -1; + } + return 0; +} + + +/* wrapped for upper layer */ +static int _vio_apnd2_writehead2(CDBVIO *vio) +{ + return _vio_apnd2_writehead(vio, true); +} + + +/* read db information and main index table from a single file */ +static int _vio_apnd2_readhead(CDBVIO *vio, bool rtable) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDB *db = vio->db; + char buf[FILEMETASIZE]; + int pos = 0; + + if (myio->create) { + /* the db is just created, allocate a empty main index table for db */ + db->mtable = (FOFF *)malloc(sizeof(FOFF) * db->hsize); + memset(db->mtable, 0, sizeof(FOFF) * db->hsize); + _vio_apnd2_writehead(vio, false); + return 0; + } + + if (pread(myio->hfd, buf, FILEMETASIZE, 0) != FILEMETASIZE) { + cdb_seterrno(db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + + if (memcmp(buf, FILEMAGICHEADER, FILEMAGICLEN)) { + cdb_seterrno(db, CDB_DATAERRMETA, __FILE__, __LINE__); + return -1; + } + + pos += FILEMAGICLEN; + db->hsize = *(uint32_t*)(buf + pos); + pos += SI4; + db->oid = *(uint64_t*)(buf + pos); + pos += SI8; + db->roid = *(uint64_t*)(buf + pos); + pos += SI8; + db->rnum = *(uint64_t*)(buf + pos); + pos += SI8; + /* 4 bytes reserved for open status */ + pos += SI4; + + if (!rtable) + return 0; + + if (db->mtable) + free(db->mtable); + db->mtable = (FOFF *)malloc(sizeof(FOFF) * db->hsize); + if (pread(myio->hfd, db->mtable, sizeof(FOFF) * db->hsize, FILEMETASIZE) != + sizeof(FOFF) * db->hsize) { + free(db->mtable); + cdb_seterrno(db, CDB_READERR, __FILE__, __LINE__); + return -1; + } + return 0; +} + + +/* wrapped for upper layer */ +static int _vio_apnd2_readhead2(CDBVIO *vio) +{ + return _vio_apnd2_readhead(vio, true); +} + + +/* check if some dat file has too large junk space */ +static void _vio_apnd2_rcyledataspacetask(void *arg) +{ + CDBVIO *vio = (CDBVIO *)arg; + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDBHTITEM *item; + uint32_t now = time(NULL); + uint32_t posblexpnum = 0; + cdb_lock_lock(myio->lock); + item = cdb_ht_iterbegin(myio->datmeta); + while(item != NULL) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO*)cdb_ht_itemval(myio->datmeta, item); + if (finfo->nexpire && finfo->nexpire <= now) + posblexpnum++; + item = cdb_ht_iternext(myio->datmeta, item); + } + + item = cdb_ht_iterbegin(myio->datmeta); + while(item != NULL) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO*)cdb_ht_itemval(myio->datmeta, item); + uint32_t fid = finfo->fid; + /* rcyled space size is inaccurate */ + if (finfo->rcyled * 2 < finfo->fsize + /* no data file possibly has expire record */ + && (posblexpnum == 0 + /* long enough time passed since last check on this file */ + || finfo->lcktime + posblexpnum * DATARCYLECHECKFACTOR > now + /* check the data file most recent expire record */ + || finfo->nexpire > now + /* no expire record */ + || finfo->nexpire == 0)) { + item = cdb_ht_iternext(myio->datmeta, item); + continue; + } + + /* do not work on the writing file or file to be deleted */ + if (finfo->fstatus != VIOAPND2_FULL || finfo->unlink) { + item = cdb_ht_iternext(myio->datmeta, item); + continue; + } + + /* have to iterate and calculate recycle space */ + finfo->ref++; + /* operation on this file should not in lock protection */ + cdb_lock_unlock(myio->lock); + + if (finfo->rcyled * 2 < finfo->fsize) { + _vio_apnd2_rcyledatafile(vio, finfo, false); + finfo->lcktime = now; + } + + if (finfo->rcyled * 2 >= finfo->fsize) { + _vio_apnd2_rcyledatafile(vio, finfo, true); + } + + cdb_lock_lock(myio->lock); + finfo->ref--; + if (finfo->ref == 0 && finfo->unlink) { + /* unlink the file */ + _vio_apnd2_unlink(vio, finfo, VIOAPND2_DATA); + cdb_ht_del2(myio->datmeta, &fid, SI4); + } + item = cdb_ht_iterbegin(myio->datmeta); + } + cdb_lock_unlock(myio->lock); +} + +/* only be called in _vio_apnd2_rcylepagespacetask; when a page is moved into a new + index file, its ooff should be changed, also its copy in cache should be updated */ +static void _vio_apnd2_fixcachepageooff(CDB *db, uint32_t bid, FOFF off) +{ + CDBPAGE *page = NULL; + + if (db->pcache) { + cdb_lock_lock(db->pclock); + page = cdb_ht_get2(db->pcache, &bid, SI4, true); + cdb_lock_unlock(db->pclock); + } + + /* not in pcache, exists in dirty page cache? */ + if (page == NULL && db->dpcache) { + cdb_lock_lock(db->dpclock); + page = cdb_ht_get2(db->dpcache, &bid, SI4, true); + cdb_lock_unlock(db->dpclock); + } + + if (page) + page->ooff = off; +} + +/* check if some index file has too large junk space */ +static void _vio_apnd2_rcylepagespacetask(void *arg) +{ + CDBVIO *vio = (CDBVIO *)arg; + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDBHTITEM *item; + + cdb_lock_lock(myio->lock); + item = cdb_ht_iterbegin(myio->idxmeta); + while(item != NULL) { + VIOAPND2FINFO *finfo = (VIOAPND2FINFO*)cdb_ht_itemval(myio->idxmeta, item); + uint32_t fid = finfo->fid; + + /* do not work on the writing file or file to be deleted */ + if (finfo->fstatus != VIOAPND2_FULL || finfo->unlink) { + item = cdb_ht_iternext(myio->idxmeta, item); + continue; + } + + /* junk space too large? */ + if (finfo->rcyled * 2 > finfo->fsize) { + int fd; + char filename[MAX_PATH_LEN]; + snprintf(filename, MAX_PATH_LEN, "%s/idx%08d.cdb", myio->filepath, fid); + fd = open(filename, O_RDONLY, 0644); + if (fd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + item = cdb_ht_iternext(myio->idxmeta, item); + continue; + } + finfo->ref++; + /* I/O should not block the lock */ + cdb_lock_unlock(myio->lock); + + uint32_t fsize = lseek(fd, 0, SEEK_END); + uint32_t pos = FILEMETASIZE; + char *map = mmap(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0); + while(pos < fsize) { + CDBPAGE *page = (CDBPAGE *)&map[pos-(sizeof(CDBPAGE) - PAGEHSIZE)]; + FOFF off; + + if (page->magic != PAGEMAGIC) { + pos += ALIGNBYTES; + continue; + } + + ROFF2VOFF(fid, pos, off); + page->ooff = off; + page->osize = OFFALIGNED(PAGESIZE(page)); + if (OFFEQ(vio->db->mtable[page->bid], off)) { + FOFF noff; + _vio_apnd2_writepage(vio, page, &noff); + /* lock and double check */ + cdb_lock_lock(vio->db->mlock[page->bid % MLOCKNUM]); + if (OFFEQ(vio->db->mtable[page->bid], off)) { + vio->db->mtable[page->bid] = noff; + _vio_apnd2_fixcachepageooff(vio->db, page->bid, noff); + } + cdb_lock_unlock(vio->db->mlock[page->bid % MLOCKNUM]); + } + pos += OFFALIGNED(PAGESIZE(page)); + } + munmap(map, fsize); + close(fd); + + cdb_lock_lock(myio->lock); + /* drop information for the file */ + finfo->ref--; + finfo->unlink = true; + if (finfo->ref == 0) { + /* unlink the file */ + _vio_apnd2_unlink(vio, finfo, VIOAPND2_INDEX); + cdb_ht_del2(myio->idxmeta, &fid, SI4); + } + /* reset the iterator */ + item = cdb_ht_iterbegin(myio->idxmeta); + continue; + } + item = cdb_ht_iternext(myio->idxmeta, item); + } + cdb_lock_unlock(myio->lock); +} + + +/* unlink a file and remove fd from fdcache. The function runs under lock protection */ +static void _vio_apnd2_unlink(CDBVIO *vio, VIOAPND2FINFO *finfo, int dtype) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + char filename[MAX_PATH_LEN]; + char ipfx[] = "idx"; + char dpfx[] = "dat"; + char *pfx; + uint32_t *fnum; + uint32_t vfid, fid = finfo->fid; + VIOAPND2FINFO **fhead, **ftail; + CDBHTITEM *fditem = NULL; + + if (dtype == VIOAPND2_INDEX) { + pfx = ipfx; + vfid = VFIDIDX(fid); + fnum = &myio->ifnum; + fhead = &myio->idxfhead; + ftail = &myio->idxftail; + } else if (dtype == VIOAPND2_DATA) { + pfx = dpfx; + vfid = VFIDDAT(fid); + fnum = &myio->dfnum; + fhead = &myio->datfhead; + ftail = &myio->datftail; + } else + return; + + snprintf(filename, MAX_PATH_LEN, "%s/%s%08d.cdb", myio->filepath, pfx, fid); + fditem = cdb_ht_del(myio->fdcache, &vfid, SI4); + if (fditem != NULL) { + close(*(int*)cdb_ht_itemval(myio->fdcache, fditem)); + free(fditem); + } + (*fnum)--; + unlink(filename); + + /* fix linked list of data/index files after remove a finfo from meta table */ + if (finfo->fprev) + finfo->fprev->fnext = finfo->fnext; + if (finfo->fnext) + finfo->fnext->fprev = finfo->fprev; + if (*fhead == finfo) + *fhead = finfo->fnext; + if (*ftail == finfo) + *ftail = finfo->fprev; +} + + +/* only be used for sorting files at recovery */ +typedef struct { + uint32_t fid; + uint64_t oidf; +} VIOAPND2SREORDER; + + +static int _vio_apnd2_cmpfuncsreorder(const void *p1, const void *p2) +{ + VIOAPND2SREORDER *s1, *s2; + s1 = (VIOAPND2SREORDER *)p1; + s2 = (VIOAPND2SREORDER *)p2; + return s1->oidf - s2->oidf; +} + + +/* recovery the database if it was not close properly + * or force recovery from roid = 0 + * the procedure runs with no lock protection */ +static int _vio_apnd2_recovery(CDBVIO *vio, bool force) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDB *db = vio->db; + char filename[MAX_PATH_LEN]; + struct dirent *filelist; + VIOAPND2SREORDER *idxorders; + int idxpos, idxlimit; + VIOAPND2SREORDER *datorders; + int datpos, datlimit; + uint32_t imaxfid = 0, dmaxfid = 0; + bool gotmindex = false; + + + idxpos = datpos = 0; + idxlimit = datlimit = 256; + idxorders = (VIOAPND2SREORDER *)malloc(idxlimit * sizeof(VIOAPND2SREORDER)); + datorders = (VIOAPND2SREORDER *)malloc(datlimit * sizeof(VIOAPND2SREORDER)); + DIR *dir = opendir(myio->filepath); + myio->dfnum = myio->ifnum = 0; + myio->datfhead = myio->datftail = myio->idxfhead = myio->idxftail = NULL; + /* special value to mark if found current writing file */ + myio->ibuf.fid = myio->dbuf.fid = -1; + for (filelist = readdir(dir); filelist; filelist = readdir(dir)) { + // Check file name/type + const char *cstr = filelist->d_name; + if (strncmp(cstr + strlen(cstr) - 4, ".cdb", 4) != 0) + /* not a cuttdb file*/ + continue; + if (strcmp(cstr, "dellog.cdb") == 0) { + snprintf(filename, MAX_PATH_LEN, "%s/%s", myio->filepath, cstr); + myio->dfd = open(filename, O_RDONLY, 0644); + } else if (strcmp(cstr, "mainindex.cdb") == 0) { + gotmindex = true; +// snprintf(filename, MAX_PATH_LEN, "%s/%s", myio->filepath, cstr); +// myio->hfd = open(filename, O_RDONLY, 0644); +// if (_vio_apnd2_readhead(vio, false) < 0 || db->hsize == 0) { +// goto ERRRET; +// } +// db->mtable = (FOFF *)malloc(sizeof(FOFF) * db->hsize); +// gotmindex = true; +// memset(db->mtable, 0, sizeof(FOFF) * db->hsize); + } else if (strcmp(cstr, "mainmeta.cdb") == 0) { + snprintf(filename, MAX_PATH_LEN, "%s/%s", myio->filepath, cstr); + myio->mfd = open(filename, O_RDWR, 0644); + if (myio->mfd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + continue; + } + } else if (strlen(cstr) == 15 + && (strncmp(cstr, "dat", 3) == 0 || strncmp(cstr, "idx", 3) == 0)) { + VIOAPND2FINFO finfo; + uint64_t fsize = 0; + + snprintf(filename, MAX_PATH_LEN, "%s/%s", myio->filepath, cstr); + int fd = open(filename, O_RDWR, 0644); + if (fd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + continue; + } + if (_vio_apnd2_readfmeta(vio, fd, &finfo) < 0) { + close(fd); + continue; + } + fsize = lseek(fd, 0, SEEK_END); + finfo.rcyled = 0; + finfo.ref = 0; + finfo.unlink = false; + finfo.fprev = finfo.fnext = NULL; + if (finfo.ftype == VIOAPND2_INDEX) { + if (force) { + /* delete all index file and rebuild them if force to recovery */ + close(fd); + unlink(filename); + } else { + cdb_ht_insert2(myio->idxmeta, &finfo.fid, SI4, &finfo, sizeof(VIOAPND2FINFO)); + idxorders[idxpos].fid = finfo.fid; + idxorders[idxpos].oidf = finfo.oidf; + if (++idxpos == idxlimit) { + VIOAPND2SREORDER *tmp = (VIOAPND2SREORDER *)malloc(idxlimit * 2 * sizeof(VIOAPND2SREORDER)); + memcpy(tmp, idxorders, idxlimit * sizeof(VIOAPND2SREORDER)); + idxlimit *= 2; + free(idxorders); + idxorders = tmp; + } + if(finfo.fstatus == VIOAPND2_WRITING) { + myio->ibuf.fid = finfo.fid; + myio->ibuf.off = OFFALIGNED(fsize); + myio->ibuf.pos = 0; + myio->ibuf.fd = fd; + } else + close(fd); + if (finfo.fid > imaxfid) + imaxfid = finfo.fid; + myio->ifnum++; + } + } else if (finfo.ftype == VIOAPND2_DATA) { + /* no information about nearest expire record time, make a fake one(non zero) */ + finfo.nexpire = finfo.lcktime = time(NULL); + cdb_ht_insert2(myio->datmeta, &finfo.fid, SI4, &finfo, sizeof(VIOAPND2FINFO)); + datorders[datpos].fid = finfo.fid; + datorders[datpos].oidf = finfo.oidf; + if (++datpos == datlimit) { + VIOAPND2SREORDER *tmp = (VIOAPND2SREORDER *)malloc(datlimit * 2 * sizeof(VIOAPND2SREORDER)); + memcpy(tmp, datorders, datlimit * sizeof(VIOAPND2SREORDER)); + datlimit *= 2; + free(datorders); + datorders = tmp; + } + if (finfo.fstatus == VIOAPND2_WRITING) { + myio->dbuf.fid = finfo.fid; + myio->dbuf.off = OFFALIGNED(fsize); + myio->dbuf.pos = 0; + myio->dbuf.fd = fd; + } else + close(fd); + if (finfo.fid > dmaxfid) + dmaxfid = finfo.fid; + myio->dfnum++; + } else + close(fd); + } /* end of else */ + } /* end of for */ + + + /* fix recycled size */ + _vio_apnd2_readmeta(vio, true); + closedir(dir); + + if (!gotmindex) { + /* recovery failed */ + /* return */ + goto ERRRET; + } else { + if (_vio_apnd2_readhead(vio, false) < 0) + goto ERRRET; + } + + if (myio->mfd < 0) { + snprintf(filename, MAX_PATH_LEN, "%s/mainmeta.cdb", myio->filepath); + myio->mfd = open(filename, O_RDWR | O_CREAT, 0644); + if (myio->mfd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + goto ERRRET; + } + } + + /* index file complele broken, replay all records to build the index */ + if (myio->ifnum == 0 || force) + db->roid = 0; + /* re-count records num */ + db->rnum = 0; + + /* fix index/data file meta relation */ + qsort(datorders, datpos, sizeof(VIOAPND2SREORDER), _vio_apnd2_cmpfuncsreorder); + qsort(idxorders, idxpos, sizeof(VIOAPND2SREORDER), _vio_apnd2_cmpfuncsreorder); + + VIOAPND2FINFO *lfinfo = NULL; + for(int i = 0; i < datpos; i++) { + VIOAPND2FINFO *cfinfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &datorders[i].fid, SI4, false); + if (cfinfo == NULL) + continue; + if (lfinfo) + lfinfo->fnext = cfinfo; + else { + myio->datfhead = cfinfo; + } + cfinfo->fprev = lfinfo; + lfinfo = cfinfo; + } + myio->datftail = lfinfo; + if (lfinfo) + lfinfo->fnext = NULL; + lfinfo = NULL; + for(int i = 0; i < idxpos; i++) { + VIOAPND2FINFO *cfinfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->idxmeta, &idxorders[i].fid, SI4, false); + if (cfinfo == NULL) + continue; + if (lfinfo) + lfinfo->fnext = cfinfo; + else { + myio->idxfhead = cfinfo; + } + cfinfo->fprev = lfinfo; + lfinfo = cfinfo; + } + myio->idxftail = lfinfo; + if (lfinfo) + lfinfo->fnext = NULL; + lfinfo = NULL; + + if (myio->ibuf.fid == -1) { + myio->ibuf.fid = 0; + _vio_apnd2_shiftnew(vio, VIOAPND2_INDEX); + } + if (myio->dbuf.fid == -1) { + myio->dbuf.fid = 0; + _vio_apnd2_shiftnew(vio, VIOAPND2_DATA); + } + + /* fix offsets in main index table */ + db->mtable = (FOFF *)malloc(db->hsize * sizeof(FOFF)); + memset(db->mtable, 0, db->hsize * sizeof(FOFF)); + void *it = _vio_apnd2_pageiterfirst(vio, 0); + if (it) { + char sbuf[SBUFSIZE]; + CDBPAGE *page = (CDBPAGE *)sbuf; + /* need not use iterator since don't care about contents in page */ + /* I'm just lazy, cpu time is cheap */ + while(_vio_apnd2_pageiternext(vio, &page, it) == 0) { + if (OFFNOTNULL(db->mtable[page->bid])) { + /* recalculate the space to be recycled */ + uint32_t ofid, roff; + char sbuf[SBUFSIZE]; + CDBPAGE *opage = (CDBPAGE *)sbuf; + _vio_apnd2_readpage(vio, &opage, db->mtable[page->bid]); + if (OFFNOTNULL(opage->ooff)) { + VOFF2ROFF(opage->ooff, ofid, roff); + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->idxmeta, &ofid, SI4, false); + if (finfo) + finfo->rcyled += opage->osize; + } + /* fix impaction of old page */ + db->rnum -= opage->num; + if (opage != (CDBPAGE *)sbuf) + free(opage); + } + db->mtable[page->bid] = page->ooff; + db->rnum += page->num; + if (page != (CDBPAGE *)sbuf) { + free(page); + page = (CDBPAGE *)sbuf; + } + } + _vio_apnd2_pageiterdestory(vio, it); + } + + /* like what was did just now */ + it = _vio_apnd2_reciterfirst(vio, db->roid); + if (it) { + char sbuf[SBUFSIZE]; + CDBREC *rec = (CDBREC *)sbuf; + while(_vio_apnd2_reciternext(vio, &rec, it) == 0) { + FOFF soffs[SFOFFNUM]; + FOFF *soff = soffs, ooff; + char sbuf2[SBUFSIZE]; + OFFZERO(ooff); + CDBREC *rrec = (CDBREC*)sbuf2; + uint64_t hash = CDBHASH64(rec->buf, rec->ksize); + + /* check record with duplicate key(old version/overwritten maybe */ + int retnum = cdb_getoff(db, hash, &soff, CDB_NOTLOCKED); + for(int i = 0; i < retnum; i++) { + if (rrec != (CDBREC*)sbuf2) { + free(rrec); + rrec = (CDBREC*)sbuf2; + } + + int cret = _vio_apnd2_readrec(db->vio, &rrec, soff[i], false); + if (cret < 0) + continue; + + if (rec->ksize == rrec->ksize && memcmp(rrec->key, rec->key, rec->ksize) == 0) { + ooff = rrec->ooff; + break; + } + } + if (soff != soffs) + free(soff); + if (rrec != (CDBREC*)sbuf2) + free(rrec); + + if (OFFNOTNULL(ooff)) + /* replace offset in index */ + cdb_replaceoff(db, hash, ooff, rec->ooff, CDB_NOTLOCKED); + else + cdb_updatepage(vio->db, hash, rec->ooff, CDB_PAGEINSERTOFF, CDB_NOTLOCKED); + + if (rec->oid > db->oid) + db->oid = rec->oid; + if (rec != (CDBREC *)sbuf) { + free(rec); + rec = (CDBREC *)sbuf; + } + } + _vio_apnd2_reciterdestory(vio, it); + } + + /* replay deletion logs */ + FOFF delitems[1024]; + for(; myio->dfd > 0;) { + int ret = read(myio->dfd, delitems, 1024 * sizeof(FOFF)); + if (ret > 0) { + for(int j = 0; j * sizeof(FOFF) < ret; j++) { + char sbuf[SBUFSIZE]; + uint32_t ofid, roff; + CDBREC *rec = (CDBREC *)sbuf; + if (_vio_apnd2_readrec(vio, &rec, delitems[j], false) < 0) + continue; + if (cdb_updatepage(db, CDBHASH64(rec->key, rec->ksize), + delitems[j], CDB_PAGEDELETEOFF, CDB_NOTLOCKED) == 0) + VOFF2ROFF(delitems[j], ofid, roff); + VIOAPND2FINFO *finfo = (VIOAPND2FINFO *)cdb_ht_get2(myio->datmeta, &ofid, SI4, false); + if (finfo) + finfo->rcyled += rec->osize; + if (rec != (CDBREC *)sbuf) + free(rec); + } + } else { + close(myio->dfd); + myio->dfd = -1; + } + } + + cdb_flushalldpage(db); + _vio_apnd2_writemeta(vio); + _vio_apnd2_writehead(vio, true); + cdb_ht_clean(myio->idxmeta); + cdb_ht_clean(myio->datmeta); + free(idxorders); + free(datorders); + /* mfd / dfd will be opened again after this function, but hfd won't be */ + myio->datfhead = myio->datftail = myio->idxfhead = myio->idxftail = NULL; + if (myio->ibuf.fd > 0) + close(myio->ibuf.fd); + if (myio->dbuf.fd > 0) + close(myio->dbuf.fd); + if (myio->mfd > 0) + close(myio->mfd); + if (myio->dfd > 0) + close(myio->dfd); + return 0; + +ERRRET: + closedir(dir); + if (myio->hfd > 0) + close(myio->hfd); + if (myio->mfd > 0) + close(myio->mfd); + if (myio->dfd > 0) + close(myio->dfd); + free(datorders); + free(idxorders); + return -1; +} + + +static VIOAPND2FINFO* _vio_apnd2_fileiternext(CDBVIO *vio, int dtype, uint64_t oid) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + uint64_t foid = (uint64_t)-1; + CDBHTITEM *item; + CDBHASHTABLE *ht; + VIOAPND2FINFO *finfo = NULL; + + if (dtype == VIOAPND2_INDEX) + ht = myio->idxmeta; + else if (dtype == VIOAPND2_DATA) + ht = myio->datmeta; + else + return NULL; + + cdb_lock_lock(myio->lock); + item = cdb_ht_iterbegin(ht); + while(item) { + VIOAPND2FINFO *tfinfo = (VIOAPND2FINFO *)cdb_ht_itemval(ht, item); + if (tfinfo->oidf < foid && tfinfo->oidf >= oid) { + foid = tfinfo->oidf; + finfo = tfinfo; + } + item = cdb_ht_iternext(ht, item); + } + if (finfo) + finfo->ref++; + cdb_lock_unlock(myio->lock); + return finfo; +} + +static int _vio_apnd2_iterfirst(CDBVIO *vio, VIOAPND2ITOR *it, int dtype, int64_t oid) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + CDBHASHTABLE *tmpcache; + char filename[MAX_PATH_LEN]; + char ipfx[] = "idx"; + char dpfx[] = "dat"; + char *pfx; + + if (dtype == VIOAPND2_INDEX) { + pfx = ipfx; + tmpcache = myio->idxmeta; + } else if (dtype == VIOAPND2_DATA) { + pfx = dpfx; + tmpcache = myio->datmeta; + } else + return -1; + + if (it->finfo == NULL) + it->finfo = _vio_apnd2_fileiternext(vio, dtype, oid); + if (it->finfo == NULL) { + return -1; + } + + snprintf(filename, MAX_PATH_LEN, "%s/%s%08d.cdb", myio->filepath, pfx, it->finfo->fid); + it->fd = open(filename, O_RDONLY, 0644); + if (it->fd < 0) { + cdb_lock_lock(myio->lock); + it->finfo->ref--; + if (it->finfo->ref == 0 && it->finfo->unlink) { + /* unlink the file */ + _vio_apnd2_unlink(vio, it->finfo, dtype); + cdb_ht_del2(tmpcache, &it->finfo->fid, SI4); + } + cdb_lock_unlock(myio->lock); + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + + it->fsize = lseek(it->fd, 0, SEEK_END); + it->mmap = mmap(NULL, it->fsize, PROT_READ, MAP_PRIVATE, it->fd, 0); + it->off = FILEMETASIZE; + it->oid = oid; + + while(it->off < it->fsize) { + if (dtype == VIOAPND2_INDEX) { + CDBPAGE *page = (CDBPAGE *)(it->mmap + it->off -(sizeof(CDBPAGE) - PAGEHSIZE)); + if (page->magic != PAGEMAGIC) { + it->off += ALIGNBYTES; + continue; + } + if (page->oid >= oid) + break; + it->off += OFFALIGNED(PAGESIZE(page)); + } else if (dtype == VIOAPND2_DATA) { + CDBREC *rec = (CDBREC *)(it->mmap + it->off -(sizeof(CDBREC) - RECHSIZE)); + if (rec->magic != RECMAGIC && rec->magic != DELRECMAGIC) { + it->off += ALIGNBYTES; + continue; + } + if (rec->oid >= oid) + break; + it->off += OFFALIGNED(RECSIZE(rec)); + } + } + + if (it->off >= it->fsize) { + munmap(it->mmap, it->fsize); + close(it->fd); + cdb_lock_lock(myio->lock); + it->finfo->ref--; + if (it->finfo->ref == 0 && it->finfo->unlink) { + /* unlink the file */ + _vio_apnd2_unlink(vio, it->finfo, dtype); + cdb_ht_del2(tmpcache, &it->finfo->fid, SI4); + } + cdb_lock_unlock(myio->lock); + return -1; + } + return 0; +} + + +static int _vio_apnd2_pageiternext(CDBVIO *vio, CDBPAGE **page, void *iter) +{ + VIOAPND2ITOR *it = (VIOAPND2ITOR *)iter; + CDBPAGE *cpage; + uint32_t fixbufsize = SBUFSIZE - (sizeof(CDBPAGE) - PAGEHSIZE); + + for(;;) { + if (it->off >= it->fsize) { + it->oid = CDBMAX(it->oid, it->finfo->oidl); + _vio_apnd2_iterfree(vio, VIOAPND2_INDEX, it); + if (_vio_apnd2_iterfirst(vio, it, VIOAPND2_INDEX, it->oid) < 0) + return -1; + } + cpage = (CDBPAGE *)(it->mmap + it->off -(sizeof(CDBPAGE) - PAGEHSIZE)); + if (cpage->magic != PAGEMAGIC) { + it->off += ALIGNBYTES; + continue; + } + if (PAGESIZE(cpage) <= fixbufsize) + memcpy(&(*page)->magic, &cpage->magic, PAGESIZE(cpage)); + else { + *page = (CDBPAGE *)malloc(sizeof(CDBPAGE) + (*page)->num * sizeof(PITEM)); + memcpy(&(*page)->magic, &cpage->magic, PAGESIZE(cpage)); + } + (*page)->osize = PAGESIZE(cpage); + (*page)->cap = (*page)->num; + ROFF2VOFF(it->finfo->fid, it->off, (*page)->ooff); + /* set iterator to next one */ + it->oid = (*page)->oid + 1; + it->off += OFFALIGNED(PAGESIZE(cpage)); + return 0; + } + return -1; +} + +static int _vio_apnd2_reciternext(CDBVIO *vio, CDBREC **rec, void *iter) +{ + VIOAPND2ITOR *it = (VIOAPND2ITOR *)iter; + CDBREC *crec; + uint32_t fixbufsize = SBUFSIZE - (sizeof(CDBREC) - RECHSIZE); + + for(;;) { + if (it->off >= it->fsize) { + it->oid = CDBMAX(it->oid, it->finfo->oidl); + _vio_apnd2_iterfree(vio, VIOAPND2_DATA, it); + if (_vio_apnd2_iterfirst(vio, it, VIOAPND2_DATA, it->oid) < 0) + return -1; + } + crec = (CDBREC *)(it->mmap + it->off -(sizeof(CDBREC) - RECHSIZE)); + if (crec->magic != RECMAGIC && crec->magic != DELRECMAGIC) { + it->off += ALIGNBYTES; + continue; + } + if (RECSIZE(crec) <= fixbufsize) + memcpy(&(*rec)->magic, &crec->magic, RECSIZE(crec)); + else { + *rec = (CDBREC *)malloc(sizeof(CDBREC) + crec->ksize + crec->vsize); + memcpy(&(*rec)->magic, &crec->magic, RECSIZE(crec)); + } + + (*rec)->osize = RECSIZE(crec); + (*rec)->expire = crec->expire; + ROFF2VOFF(it->finfo->fid, it->off, (*rec)->ooff); + (*rec)->key = (*rec)->buf; + (*rec)->val = (*rec)->buf + (*rec)->ksize; + + /* set iterator to next one */ + it->oid = (*rec)->oid + 1; + it->off += OFFALIGNED(RECSIZE(crec)); + return 0; + } + return -1; +} + + +static int _vio_apnd2_iterfree(CDBVIO *vio, int dtype, VIOAPND2ITOR *it) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + if (it->mmap) { + munmap(it->mmap, it->fsize); + close(it->fd); + cdb_lock_lock(myio->lock); + it->finfo->ref--; + if (it->finfo->ref == 0 && it->finfo->unlink) { + /* unlink the file */ + VIOAPND2FINFO *tfinfo; + it->finfo->fnext->fprev = it->finfo->fprev; + it->finfo->fprev->fnext = it->finfo->fnext; + tfinfo = it->finfo; + it->finfo = it->finfo->fnext; + _vio_apnd2_unlink(vio, tfinfo, dtype); + if (dtype == VIOAPND2_INDEX) + cdb_ht_del2(myio->idxmeta, &tfinfo->fid, SI4); + else if (dtype == VIOAPND2_DATA) + cdb_ht_del2(myio->datmeta, &tfinfo->fid, SI4); + } else + it->finfo = it->finfo->fnext; + if (it->finfo) + it->finfo->ref++; + cdb_lock_unlock(myio->lock); + it->mmap = NULL; + } + return 0; +} + + +static void* _vio_apnd2_reciterfirst(CDBVIO *vio, uint64_t oid) +{ + VIOAPND2ITOR *it = (VIOAPND2ITOR *)malloc(sizeof(VIOAPND2ITOR)); + + /* iterator won't get to buffered data */ + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + it->mmap = NULL; + it->finfo = NULL; + if (_vio_apnd2_iterfirst(vio, it, VIOAPND2_DATA, oid) < 0) { + free(it); + return NULL; + } + return (void*)it; +} + + +static void _vio_apnd2_reciterdestory(CDBVIO *vio, void *iter) +{ + if (iter) { + _vio_apnd2_iterfree(vio, VIOAPND2_DATA, (VIOAPND2ITOR *)iter); + free(iter); + } +} + +static void* _vio_apnd2_pageiterfirst(CDBVIO *vio, uint64_t oid) +{ + VIOAPND2ITOR *it = (VIOAPND2ITOR *)malloc(sizeof(VIOAPND2ITOR)); + + /* iterator won't get to buffered data */ + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + it->mmap = NULL; + it->finfo = NULL; + if (_vio_apnd2_iterfirst(vio, it, VIOAPND2_INDEX, oid) < 0) { + free(it); + return NULL; + } + return (void*)it; +} + + +static void _vio_apnd2_pageiterdestory(CDBVIO *vio, void *iter) +{ + if (iter) { + _vio_apnd2_iterfree(vio, VIOAPND2_INDEX, (VIOAPND2ITOR *)iter); + free(iter); + } +} + +static int _vio_apnd2_rcyledatafile(CDBVIO *vio, VIOAPND2FINFO *finfo, bool rcyle) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + int fd; + char filename[MAX_PATH_LEN]; + uint32_t nexpire = 0xffffffff; + + snprintf(filename, MAX_PATH_LEN, "%s/dat%08d.cdb", myio->filepath, finfo->fid); + fd = open(filename, O_RDONLY, 0644); + if (fd < 0) { + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + return -1; + } + + uint32_t frsize = 0, fsize = lseek(fd, 0, SEEK_END); + uint32_t pos = FILEMETASIZE; + char *map = mmap(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0); + uint32_t now = time(NULL); + while(pos < fsize) { + CDBREC *rec = (CDBREC *)&map[pos-(sizeof(CDBREC) - RECHSIZE)]; + FOFF off; + uint64_t hash; + + if (rec->magic != RECMAGIC && rec->magic != DELRECMAGIC) { + pos += ALIGNBYTES; + continue; + } + + ROFF2VOFF(finfo->fid, pos, off); + hash = CDBHASH64(rec->buf, rec->ksize); + if (cdb_checkoff(vio->db, hash, off, CDB_NOTLOCKED) + /* not expired */ + && (rec->expire > now || rec->expire == 0)) { + /* nearest expire record in current file */ + if (rec->expire && rec->expire < nexpire) + nexpire = rec->expire; + + /* record exist in index, skip */ + if (rcyle) { + FOFF noff; + rec->ooff = off; + rec->osize = OFFALIGNED(RECSIZE(rec)); + _vio_apnd2_writerecinternal(vio, rec, &noff); + cdb_replaceoff(vio->db, hash, off, noff, CDB_NOTLOCKED); + } + } else { + if (rcyle && rec->expire && rec->expire < now) { + /* expired record, delete from index page */ + cdb_updatepage(vio->db, hash, off, CDB_PAGEDELETEOFF, CDB_NOTLOCKED); + } + frsize += OFFALIGNED(RECSIZE(rec)); + } + pos += OFFALIGNED(RECSIZE(rec)); + } + munmap(map, fsize); + close(fd); + cdb_lock_lock(myio->lock); + /* fix metainfo about nearest expire time in current data file */ + if (nexpire == 0xffffffff) + finfo->nexpire = 0; + else + finfo->nexpire = nexpire; + finfo->rcyled = frsize; + if (rcyle) { + /* unlink */ + finfo->unlink = true; + } + cdb_lock_unlock(myio->lock); + return 0; +} + + +static void _vio_apnd2_cleanpoint(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + char filename[MAX_PATH_LEN]; + + cdb_lock_lock(myio->lock); + _vio_apnd2_flushbuf(vio, VIOAPND2_DATA); + _vio_apnd2_flushbuf(vio, VIOAPND2_INDEX); + _vio_apnd2_writehead(vio, false); + if (myio->dfd > 0) + close(myio->dfd); + snprintf(filename, MAX_PATH_LEN, "%s/dellog.cdb", myio->filepath); + /* clean the previous deletion log */ + myio->dfd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); + /* open failed, whom to tell? */ + if (myio->dfd < 0) + cdb_seterrno(vio->db, CDB_OPENERR, __FILE__, __LINE__); + cdb_lock_unlock(myio->lock); +} + + +static int _vio_apnd2_checkopensig(CDBVIO *vio) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + int pos = 0; + uint32_t ret; + + if (myio->hfd < 0) + return -1; + + pos += FILEMAGICLEN; + pos += SI4; + pos += SI8; + pos += SI8; + pos += SI8; + if (pread(myio->hfd, &ret, SI4, pos) != SI4) + return -1; + + return ret; +} + + +static int _vio_apnd2_setopensig(CDBVIO *vio, int sig) +{ + VIOAPND2 *myio = (VIOAPND2 *)vio->iometa; + int pos = 0; + uint32_t val = sig; + if (myio->hfd < 0) + return -1; + + pos += FILEMAGICLEN; + pos += SI4; + pos += SI8; + pos += SI8; + pos += SI8; + if (pwrite(myio->hfd, &val, SI4, pos) != SI4) + return -1; + return 0; +} + + diff --git a/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.h b/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.h new file mode 100644 index 0000000000000000000000000000000000000000..cb47a7dbd18a0f40ad1f9a571b35b94feaeff6d5 --- /dev/null +++ b/libdap-chain-global-db/libdap-cuttdb/src/vio_apnd2.h @@ -0,0 +1,23 @@ +/* + * CuttDB - a fast key-value storage engine + * + * + * http://code.google.com/p/cuttdb/ + * + * Copyright (c) 2012, Siyuan Fu. All rights reserved. + * Use and distribution licensed under the BSD license. + * See the LICENSE file for full text + * + * Author: Siyuan Fu <fusiyuan2010@gmail.com> + * + */ + + +#ifndef _VIO_APND2_H_ +#define _VIO_APND2_H_ +#include "cdb_vio.h" + + +void vio_apnd2_init(CDBVIO *vio); + +#endif