git-lfs是git对于二进制文件管理的一种扩展,可以减小仓库体积。
背景 我的图片库 由于存放了大量的图片,发现仓库本身高达5G ,其中 .git 目录就占了一半,应该是每一个二进制文件就会有个备份,再加上被删除的文件。
尝试过重建commit历史,但是依然没减小。
后来了解到git-lfs ,github的lfs仓库就给1G ,等于没有,于是准备自建一个。
由于个人的兴趣爱好原因,准备使用C 来实现
项目搭建 经过一通搜索,找到了一个C的web框架facil.io ,正常情况下,应该没什么人会用纯C写web,起码也要用C++,所以能找到一个框架真不容易
C的编译可以命令行,本次选择使用CMake 。
开发软件使用VSCode
开发环境搭建使用之前搞出来的docker镜像 ,再用VSCode远程开发
由于我对CMake完全不熟悉,所以用了一个插件 来新建项目。
插件本身只是创建基础的目录结构,启动脚本之类的。源代码初始化需要使用facil 提供的脚本
1 bash <(curl -s https://raw.githubusercontent.com/boazsegev/facil.io/master/scripts/new/app) appname
上面的两个步骤需要两个单独的文件夹,之后把脚本创建的目录里的 .c 和 .h 复制到插件目录里。
为了引入facil 依赖,将其源码作为git子模块引入
1 git submodule add https://github.com/boazsegev/facil.io
最后目录结构如下
1 2 3 4 5 6 7 8 9 10 11 12 13 . ├── CMakeLists.txt ├── LICENSE ├── README.md ├── facil.io ├── include │ ├── cli.h │ ├── http_service.h │ └── main .h └── src ├── cli.c ├── http_service.c └── main .c
协议文档 本次要实现一个最简单的lfs服务器,相关文档可以查看lfs仓库
路由 facil是个基础的web框架,没有路由功能,不过没有正好,本来也不需要那些功能。
这里只需要实现一个 Batch 接口,只需要以下代码就能判断url
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 static void on_http_request (http_s *h) { fio_str_info_s path = fiobj_obj2cstr(h->path); fio_str_info_s method = fiobj_obj2cstr(h->method); fio_str_info_s body = fiobj_obj2cstr(h->body); if (strcmp (path.data, LFS_BATCH_URL_PATH) == 0 ) { batch_request(h); } else { } }
LFS_BATCH_URL_PATH是一个字符串宏,具体内容是:
1 #define LFS_BATCH_URL_PATH "/objects/batch"
c语言作为早期的高级语言,功能比较原始。比如字符串是用字符数组——实际上很多更高级的语言也是这样——实现的,所以很多库都需要自己用结构体来定义字符串,上面的fio_str_info_s是facil 定义的,之后还有别的库定义的字符串
逻辑 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 void _batch_request(http_s *h, FIOBJ jsonBody, lfs_item_each each) { printf ("request 1\n" ); FIOBJ objectsKey = fiobj_str_new("objects" , strlen ("objects" )); FIOBJ objects = fiobj_hash_get(jsonBody, objectsKey); if (!fiobj_type_is(objects, FIOBJ_T_ARRAY)) { printf ("not allowed json body " ); fio_free(objects); fio_free(objectsKey); return ; } printf ("request 2\n" ); FIOBJ res = fiobj_hash_new2(3 ); FIOBJ transferKey = fiobj_str_new("transfer" , strlen ("transfer" )); FIOBJ basic = fiobj_str_new("basic" , strlen ("basic" )); FIOBJ hash_algo_key = fiobj_str_new("hash_algo" , strlen ("hash_algo" )); FIOBJ sha256 = fiobj_str_new("sha256" , strlen ("sha256" )); fiobj_hash_set(res, transferKey, basic); fiobj_hash_set(res, hash_algo_key, sha256); size_t count = fiobj_ary_count(objects); printf ("request 3" ); int i = 0 ; for (i = 0 ; i < count; i++) { FIOBJ item = fiobj_ary_index(objects, i); printf ("request 4\n" ); each(item); printf ("request 5\n" ); } fiobj_hash_set(res, objectsKey, objects); FIOBJ f = fiobj_obj2json(res, 1 ); fio_str_info_s res_str = fiobj_obj2cstr(f); fiobj_free(f); printf ("res %s\n" , res_str.data); FIOBJ contentTypeKey = fiobj_str_new("Content-Type" , strlen ("Content-Type" )); FIOBJ contentType = fiobj_str_new("application/vnd.git-lfs+json" , strlen ("application/vnd.git-lfs+json" )); int r = http_set_header(h, contentTypeKey, contentType); printf ("set content type %d\n" , r); http_send_body(h, res_str.data, res_str.len); fiobj_free(objects); fiobj_free(objectsKey); fiobj_free(contentTypeKey); fiobj_free(contentType); fiobj_free(hash_algo_key); fiobj_free(sha256); fiobj_free(transferKey); fiobj_free(basic); fiobj_free(res); }
lfs_item_each是一个函数指针,upload和download有不同的实现,由于逻辑本身很简单,不再贴代码了,要看可以直接去仓库
存储 git-lfs 本身不负责存储,只是负责提供存储相关的API。本次采用腾讯云COS 作为存储
需要调用两个核心方法cos_gen_presigned_url 和cos_gen_object_url ;一个是上传用url,一个是下载用url。
COS库本身的编译安装这里略过不提
由于引入了第三方库,所以需要修改CMakeLists.txt ,参考cos提供的demo里的,直接把内容拷贝过来,最后就是这样
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 cmake_minimum_required (VERSION 3.14 )set (PROJECT_N lfs)project (${PROJECT_N} VERSION 1.0 )set (CMAKE_C_STANDARD 99 )set (CMAKE_C_STANDARD_REQUIRED True )set (CMAKE_EXPORT_COMPILE_COMMANDS ON ) file (GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR} /src/*.c)MACRO (header_directories return_list includes_base_folder extention) FILE (GLOB_RECURSE new_list ${includes_base_folder} /*.${extention} ) SET (dir_list "" ) FOREACH (file_path ${new_list} ) GET_FILENAME_COMPONENT (dir_path ${file_path} PATH) SET (dir_list ${dir_list} ${dir_path} ) ENDFOREACH () LIST (REMOVE_DUPLICATES dir_list) SET (${return_list} ${dir_list} )ENDMACRO () header_directories(INCLUDES ${PROJECT_SOURCE_DIR} /include / h)FIND_PROGRAM (APR_CONFIG_BIN NAMES apr-config apr-1 -config PATHS /usr/bin /usr/local/bin /usr/local/apr/bin/)FIND_PROGRAM (APU_CONFIG_BIN NAMES apu-config apu-1 -config PATHS /usr/bin /usr/local/bin /usr/local/apr/bin/)IF (APR_CONFIG_BIN) EXECUTE_PROCESS ( COMMAND ${APR_CONFIG_BIN} --includedir OUTPUT_VARIABLE APR_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) EXECUTE_PROCESS ( COMMAND ${APR_CONFIG_BIN} --cflags OUTPUT_VARIABLE APR_C_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) EXECUTE_PROCESS ( COMMAND ${APR_CONFIG_BIN} --link-ld OUTPUT_VARIABLE APR_LIBRARIES OUTPUT_STRIP_TRAILING_WHITESPACE )ELSE () MESSAGE (FATAL_ERROR "Could not find apr-config/apr-1-config" )ENDIF ()IF (APU_CONFIG_BIN) EXECUTE_PROCESS ( COMMAND ${APU_CONFIG_BIN} --includedir OUTPUT_VARIABLE APR_UTIL_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) EXECUTE_PROCESS ( COMMAND ${APU_CONFIG_BIN} --cflags OUTPUT_VARIABLE APU_C_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) EXECUTE_PROCESS ( COMMAND ${APU_CONFIG_BIN} --link-ld OUTPUT_VARIABLE APU_LIBRARIES OUTPUT_STRIP_TRAILING_WHITESPACE )ELSE () MESSAGE (FATAL_ERROR "Could not find apu-config/apu-1-config" )ENDIF ()FIND_PROGRAM (CURL_CONFIG_BIN NAMES curl-config) IF (CURL_CONFIG_BIN) EXECUTE_PROCESS ( COMMAND ${CURL_CONFIG_BIN} --libs OUTPUT_VARIABLE CURL_LIBRARIES OUTPUT_STRIP_TRAILING_WHITESPACE )ELSE () MESSAGE (FATAL_ERROR "Could not find curl-config" )ENDIF ()include_directories (${APR_INCLUDE_DIR} )include_directories (${APR_UTIL_INCLUDE_DIR} )include_directories (${MINIXML_INCLUDE_DIR} )include_directories (${CURL_INCLUDE_DIR} )include_directories ("/usr/local/include/cos_c_sdk" )find_library (APR_LIBRARY apr-1 PATHS /usr/local/apr/lib/)find_library (APR_UTIL_LIBRARY aprutil-1 PATHS /usr/local/apr/lib/)find_library (MINIXML_LIBRARY mxml)find_library (CURL_LIBRARY curl)find_library (COS_LIBRARY cos_c_sdk PATHS /usr/local/lib/)add_subdirectory (facil.io)message (STATUS ${SRCS} )add_executable (${PROJECT_N} ${SRCS} )target_include_directories (${PROJECT_N} PUBLIC include )target_link_libraries (${PROJECT_N} facil.io)target_link_libraries (${PROJECT_N} ${COS_LIBRARY} )target_link_libraries (${PROJECT_N} ${APR_UTIL_LIBRARY} )target_link_libraries (${PROJECT_N} ${APR_LIBRARY} )target_link_libraries (${PROJECT_N} ${MINIXML_LIBRARY} )target_link_libraries (${PROJECT_N} ${CURL_LIBRARY} )
docker镜像 C程序过于依赖环境,所以最好使用docker镜像来运行构建产物。
采用docker多阶段构建 ,对docker版本要求较高,公司的测试服务器版本就很低,不支持该特性
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 FROM ubuntu:20.04 ENV TZ=Asia/ShanghaiRUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && export DEBIAN_FRONTEND=noninteractive RUN sed -i "s@http://.*archive.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list \ && sed -i "s@http://.*security.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list \ && apt update -y && apt upgrade -y RUN apt install -y cmake g++ libaprutil1-dev libcurl4-openssl-dev curl wget git libssl-dev RUN wget https://github.com/michaelrsweet/mxml/releases/download/v3.3.1/mxml-3.3.1.tar.gz \ && tar -zxf mxml-3.3.1.tar.gz && cd mxml-3.3.1 && ./configure && make && make install RUN wget https://dlcdn.apache.org/apr/apr-1.7.2.tar.gz \ && tar -zxf apr-1.7.2.tar.gz && cd apr-1.7.2 && ./configure && make && make install RUN wget https://github.com/tencentyun/cos-c-sdk-v5/archive/refs/tags/v5.0.16.tar.gz \ && tar -zxf v5.0.16.tar.gz && cd cos-c-sdk-v5-5.0.16 && cmake . && make && make install RUN wget https://curl.se/download/curl-7.88.1.tar.gz RUN apt install -y libssl-dev RUN tar -zxf curl-7.88.1.tar.gz && cd curl-7.88.1/ && ./configure --disable-ldap --disable-ldaps --with-openssl && make && make install ADD . /data RUN cd /data && git submodule update --init --recursive && mkdir /data/build && cd /data/build && cmake .. && make && chmod +x lfs && cp ../entrypoint.sh ./ && chmod +x entrypoint.sh && cp ../pack.sh ./ && mkdir lib && sh pack.sh RUN FROM ubuntu:20.04 COPY --from=0 /data/build/ /lfs EXPOSE 3000 ENTRYPOINT ["/lfs/entrypoint.sh" ]
有几点需要注意,现在都是动态编译的,单个文件无法运行,需要一堆.so动态库,所以需要使用pack.sh来拷贝依赖,entrypoint.sh来运行程序
pack.sh
1 2 3 4 5 exe="lfs" # 这里是最终构建的可执行程序的名字 des="$(pwd)/lib" echo $des deplist=$(ldd $exe | awk '{if (match($3,"/")){ printf("%s "),$3 } }') cp $deplist $des
entrypoint.sh
1 2 3 4 5 6 7 8 9 10 # !/bin/sh dirname=`dirname $0` tmp="${dirname#?}" if [ "${dirname%$tmp}" != "/" ]; then dirname=$PWD/$dirname fi LD_LIBRARY_PATH=$dirname/lib export LD_LIBRARY_PATH echo $LD_LIBRARY_PATH$ dirname /lfs "$@ "
基础原理就是告诉Linux系统去哪里找对应的动态库文件。
但是上面拷贝的动态库实际上是不全的,缺少glibc ,这一部分一般由操作系统提供,所以最终使用的运行镜像是Ubuntu:20.04 ,不能使用诸如alpine 、busybox 之类的精简镜像,因为没有匹配的环境
静态编译 由于动态库下最终镜像体积高达87MB ,实在是太大了,所以尝试使用静态编译,然后更新精简镜像来缩小体积
想要实现动态编译,就要告诉CMake去查找 .a 文件,可以添加以下代码
1 2 3 4 5 6 7 8 9 10 if (WIN32 OR MSVC) set (CMAKE_FIND_LIBRARY_SUFFIXES ".lib" )elseif (UNIX) set (CMAKE_FIND_LIBRARY_SUFFIXES ".a" )endif ()
注意 cos 库的 .a 文件名与常规命名规则不同,修改一下
1 find_library (COS_LIBRARY libcos_c_sdk_static.a PATHS /usr/local/lib/)
由于我不知道的原因,静态编译需要把动态编译下不需要关心的依赖的依赖也给加进来,这里主要是 liburl 的一些依赖
1 2 3 4 5 6 7 8 9 find_library (IDN_LIBRARY idn)find_library (SSL_LIBRARY ssl)find_library (C_LIBRARY crypto)find_library (DL_LIBRARY dl)target_link_libraries (${LIBRARY_N} ${IDN_LIBRARY} ${SSL_LIBRARY} ${C_LIBRARY} ${DL_LIBRARY} ${THREAD_LIBRARY} )
Docker镜像可以使用busybox ,我还调整curl编译参数,去掉一些不需要的功能。最终产物就只有12.1MB 了,比较完美
其他 可以使用github actions来构建docker镜像,并发布到github packages,这里不再赘述
参考资料